Move newline chars back into elements.

This commit is contained in:
Emilia Simmons 2021-07-21 09:38:14 -04:00
parent a824866dfa
commit c2ff817f82
6 changed files with 686 additions and 991 deletions

View file

@ -856,11 +856,9 @@ Markup.5c - Junk
===========
Markup.5d - Junk
===========
b *a
* b* a
----------
(document
@ -1176,6 +1174,7 @@ List.9 - With markup
Directive.1 - Document
==============
#+a: b
----------
(document (directive (name) (value)))
@ -1255,6 +1254,16 @@ Directive.6b - Sublist
(document (body (list (directive (name) (value)) (listitem (list (directive (name) (value)) (listitem))))))
==============
Directive.7 - Directive unrelated to section
==============
#+a: b
* c
----------
(document (body (directive (name) (value))) (section (headline (stars) (item))))
=============
LatexEnv.1 -
=============

View file

@ -1,14 +1,9 @@
// Dynamic precedence constants ========================== {{{1
DYN = {
multiline: -10,
tablefm: 1, // over directive
paragraphnl: -1,
paragraphtext: 1,
nonparagraph: 10, // not sure why this needs to be so high
multiline: -1,
hltags: 1,
listtag: 1,
conflicts: -1,
footnote: 1, // paragraph\nfn -> continued paragraph (footnote) instead of fndef
}
org_grammar = {
@ -25,6 +20,7 @@ org_grammar = {
$.stars,
$._sectionend,
$._markup,
$._eof, // Basically just '\0', but allows multiple to be matched
],
inline: $ => [
@ -38,12 +34,11 @@ org_grammar = {
precedences: _ => [
['fn_definition', 'footnote'],
['eol', 'nl'],
],
conflicts: $ => [
[$._itemtag, $._textelement], // textelement in $._itemtext
[$.item], // :tags: in headlines
[$.item], // :tags: in headlines
// Markup
[$._conflicts, $.bold],
@ -53,27 +48,12 @@ org_grammar = {
[$._conflicts, $.code],
[$._conflicts, $.verbatim],
// For deciding where the newlines go
[$._nl, $._eol],
// Multiline -- continue the item or start a new one?
[$.body],
[$._paragraph_body],
[$._text_body],
[$.paragraph],
[$.comment],
[$.section],
[$.table],
// _text_body conflicts
[$.drawer],
[$.block],
[$.dynamic_block],
[$.latex_env],
[$.fndef],
// directives
// [$.document, $.paragraph, $._element, $.fndef, $.drawer, $.block, $.dynamic_block, $.list, $.table, $.latex_env],
// [$.paragraph, $._element, $.fndef, $.drawer, $.block, $.dynamic_block, $.list, $.table, $.latex_env],
// Subscript and underlines
[$._textelement, $.subscript, $._conflicts],
@ -82,49 +62,50 @@ org_grammar = {
rules: {
// Document, sections, body, & paragraph =============== {{{1
// prec over body -> element for directive list
document: $ => prec(1, seq(
optional(choice(
seq($._directive_list, $._eol),
seq($._directive_list, $._nl, repeat1($._nl), $.body),
$._directive_list, // required in combination with:
seq($._directive_list, $._eof), // equal precedence with _element
seq(repeat($._nl), $.body),
seq($._directive_list, repeat1($._nl), $.body),
)),
repeat($._nl),
optional(sep1($.section, $._nl))
repeat($.section),
)),
_nl: _ => choice('\n', '\r'),
_eol: _ => choice('\n', '\r', '\0'),
_eol: $ => choice('\n', '\r', $._eof),
section: $ => seq(
$.headline,
optional(seq($._nl, field('plan', $.plan))),
optional(seq($._nl, field('property_drawer', $.property_drawer))),
optional(seq(repeat1($._nl), $.body)),
optional(choice(
repeat1($._nl),
repeat1(seq(repeat1($._nl), $.section)),
)),
$.headline, $._eol,
optional(seq($.plan)),
optional(seq($.property_drawer)),
repeat($._nl),
optional(seq($.body, repeat($._nl))),
repeat($.section),
$._sectionend,
),
body: $ => sep1($._element, repeat1($._nl)),
_paragraph_body: $ => sep1(repeat1($._textelement), $._nl),
// _paragraph_body: $ => sep1(prec.dynamic(1, repeat1($._textelement)), prec.dynamic(-1, $._nl)),
_text_body: $ => sep1(repeat1($._text), $._nl),
body: $ => choice(
seq(sep1($._element, repeat($._nl))),
seq(sep1($._element, repeat($._nl)), $._directive_list),
$._directive_list,
), // the directive list + choice Solves Directive.7
// Element and textelement ============================= {{{1
_element: $ => choice(
seq($._directive_list, $._eol),
seq($._directive_list, $._eof),
$.comment,
// Have attached directive
$.paragraph,
$.fndef,
$.drawer,
$.list,
$.block,
$.dynamic_block,
$.paragraph,
$.table,
$.latex_env,
),
@ -153,11 +134,8 @@ org_grammar = {
// Prec prefers one paragraph over multiple for multi-line
paragraph: $ => prec.dynamic(DYN.multiline, seq(
optional(seq($._directive_list, $._nl)),
sep1(
prec.dynamic(DYN.paragraphtext, repeat1($._textelement)),
prec.dynamic(DYN.paragraphnl, $._nl)
),
optional($._directive_list),
repeat1(seq(repeat1($._textelement), $._eol)),
)),
// Headlines =========================================== {{{1
@ -190,7 +168,7 @@ org_grammar = {
':PROPERTIES:',
sep1(repeat1($._nl), $.property),
':END:',
optional(':'), // FIXME: report bug
$._eol,
),
property: $ => seq(
@ -206,13 +184,16 @@ org_grammar = {
_deadline: _ => 'DEADLINE:',
_closed: _ => 'CLOSED:',
plan: $ => repeat1(prec(1, // precedence over paragraph→timestamp
choice(
$.timestamp,
$.scheduled,
$.deadline,
$.closed,
))),
plan: $ => seq(
repeat1(prec(1, // precedence over paragraph→timestamp
choice(
$.timestamp,
$.scheduled,
$.deadline,
$.closed,
))),
$._eol,
),
scheduled: $ => seq($._scheduled, $.timestamp),
deadline: $ => seq($._deadline, $.timestamp),
@ -365,56 +346,62 @@ org_grammar = {
fndef: $ => prec('fn_definition',
seq(
optional(seq($._directive_list, $._nl)),
optional($._directive_list),
$._fn,
$._fn_label,
']',
$._paragraph_body,
repeat1(seq(repeat1($._textelement), $._eol)),
)),
footnote: $ => prec('footnote', seq(
$._fn,
choice(
$._fn_label,
seq(
optional($._fn_label),
token.immediate(':'),
sep1(repeat1($._textelement), $._eol)
),
),
']',
)),
footnote: $ => prec('footnote',
prec.dynamic(DYN.footnote, seq(
$._fn,
choice(
$._fn_label,
seq(optional($._fn_label), token.immediate(':'), $._paragraph_body),
),
']',
))),
// Directive & Comment ================================= {{{1
// Directive & Comments================================= {{{1
_directive_list: $ => sep1($.directive, $._nl),
_directive_list: $ => repeat1($.directive),
directive: $ => seq(
'#+',
field('name', alias(token.immediate(/[^\p{Z}\n\r:]+/), $.name)),
token.immediate(':'),
field('value', alias(repeat($._text), $.value)),
$._eol
),
comment: $ => sep1(seq(prec.dynamic(DYN.nonparagraph, /#[^+\n\r]/), repeat($._text)), $._nl),
comment: $ => prec.right(repeat1(seq(/#[^+\n\r]/, repeat($._text), $._eol))),
// Drawer ============================================== {{{1
// precedence over :
drawer: $ => seq(
optional(seq($._directive_list, $._nl)),
optional($._directive_list),
$._drawer_begin,
sep1(repeat1($._nl), $._paragraph_body),
repeat($._nl),
repeat(seq(repeat1($._textelement), repeat1($._nl))),
$._drawer_end,
),
_drawer_begin: $ => seq( ':', $._drawername, token.immediate(':')),
// FIXME: report bug about optional(':')
_drawer_end: $ => seq( ':END:', optional(':')),
_drawer_begin: $ => seq(':', $._drawername, token.immediate(':'), $._nl),
_drawer_end: $ => seq(':END:', $._eol),
_drawername: _ => token.immediate(/[\p{L}\p{N}\p{Pd}\p{Pc}]+/),
// Block =============================================== {{{1
block: $ => seq(
optional(seq($._directive_list, $._nl)),
optional($._directive_list),
$._block_begin,
sep1(repeat1($._nl), $._text_body),
repeat($._nl),
repeat(seq(repeat1($._text), repeat1($._nl))),
$._block_end,
),
@ -422,11 +409,12 @@ org_grammar = {
'#+BEGIN_',
alias($._name, $.name),
optional(alias(repeat1($._text), $.parameters)),
$._eol,
),
_block_end: $ => seq(
'#+END_', $._name,
optional('_'), // FIXME: report bug
$._eol,
),
_name: _ => token.immediate(/[^\p{Z}\n\r]+/),
@ -434,9 +422,10 @@ org_grammar = {
// Dynamic block ======================================= {{{1
dynamic_block: $ => seq(
optional(seq($._directive_list, $._nl)),
optional($._directive_list),
$._dynamic_begin,
sep1(repeat1($._nl), $._text_body),
repeat($._nl),
repeat(seq(repeat1($._text), repeat1($._nl))),
$._dynamic_end,
),
@ -444,18 +433,19 @@ org_grammar = {
'#+BEGIN:',
alias(/[^\p{Z}\n\r]+/, $.name),
optional(alias(repeat1($._text), $.parameters)),
$._eol,
),
_dynamic_end: $ => seq(
'#+END:',
optional(':'), // FIXME: report bug
$._eol,
),
// Lists =============================================== {{{1
list: $ => seq(
optional(seq($._directive_list, $._nl)),
$._liststart, // captures indent length and bullet
optional($._directive_list),
$._liststart, // captures indent length and bullet type
repeat(seq($.listitem, $._listitemend, repeat1($._nl))),
seq($.listitem, $._listend)
),
@ -487,28 +477,28 @@ org_grammar = {
// prec so a new row is higher precedence than a new table
table: $ => prec.dynamic(DYN.multiline, seq(
optional(seq($._directive_list, $._nl)),
sep1(choice($.row, $._hrule), $._nl),
repeat(seq($._nl, $.formula)),
optional($._directive_list),
repeat1(choice($.row, $._hrule)),
repeat($.formula),
)),
row: $ => seq(repeat1($.cell), '|'),
row: $ => seq(repeat1($.cell), '|', $._eol),
cell: $ => seq('|', field('contents', repeat($._text))),
_hrule: _ => seq(
_hrule: $ => seq(
'|',
repeat1(seq(/[-+]+/, '|')),
optional('-'), // FIXME
$._eol,
),
// prec over directive. Not sure why it needs to be 2 over 1.
formula: $ => prec.dynamic(DYN.tablefm, seq('#+TBLFM:', field('formula', repeat($._text)))),
formula: $ => seq('#+TBLFM:', field('formula', repeat($._text)), $._eol),
// Latex environment =================================== {{{1
latex_env: $ => seq(
optional(seq($._directive_list, $._nl)),
optional($._directive_list),
$._env_begin,
sep1(repeat1($._nl), $._text_body),
repeat($._nl),
repeat(seq(repeat1($._text), repeat1($._nl))),
$._env_end,
),
@ -516,13 +506,14 @@ org_grammar = {
'\\begin{',
field('name', /[\p{L}\p{N}]+/),
token.immediate('}'),
$._eol
),
_env_end: $ => seq(
'\\end{',
/[\p{L}\p{N}]+/,
token.immediate('}'),
optional('}'), // FIXME: report bug
$._eol
),
// Text ================================================ {{{1

File diff suppressed because it is too large Load diff

View file

@ -818,28 +818,7 @@
{
"type": "section",
"named": true,
"fields": {
"plan": {
"multiple": false,
"required": false,
"types": [
{
"type": "plan",
"named": true
}
]
},
"property_drawer": {
"multiple": false,
"required": false,
"types": [
{
"type": "property_drawer",
"named": true
}
]
}
},
"fields": {},
"children": {
"multiple": true,
"required": true,
@ -852,6 +831,14 @@
"type": "headline",
"named": true
},
{
"type": "plan",
"named": true
},
{
"type": "property_drawer",
"named": true
},
{
"type": "section",
"named": true
@ -1066,10 +1053,6 @@
"named": true,
"fields": {}
},
{
"type": "\u0000",
"named": false
},
{
"type": "\n",
"named": false

Binary file not shown.

View file

@ -18,6 +18,7 @@ enum TokenType { // {{{1
HLSTARS,
SECTIONEND,
MARKUP,
ENDOFFILE,
};
enum Bullet { // {{{1
@ -151,152 +152,136 @@ struct Scanner { // {{{1
return NOTABULLET;
}
bool scan(TSLexer *lexer, const bool *valid_symbols) { // {{{1
bool scan(TSLexer *lexer, const bool *valid_symbols) { // {{{1
// - Section ends {{{1
// I think section back > 0 is implied by it being valid
// TEST: listitemend/listitemend should be mutually exclusive with sectionend
if (valid_symbols[SECTIONEND] && lexer->lookahead == '\n') {
lexer->mark_end(lexer);
skip(lexer);
int stars = 0;
while (lexer->lookahead == '*') {
stars++;
// - Section ends {{{2
int16_t indent_length = 0;
lexer->mark_end(lexer);
for (;;) {
if (lexer->lookahead == ' ') {
indent_length++;
} else if (lexer->lookahead == '\t') {
indent_length += 8;
} else if (lexer->lookahead == '\0') {
// if (valid_symbols[LISTITEMEND]) { lexer->result_symbol = LISTITEMEND; }
// else
if (valid_symbols[LISTEND]) { lexer->result_symbol = LISTEND; }
else if (valid_symbols[SECTIONEND]) { lexer->result_symbol = SECTIONEND; }
else if (valid_symbols[ENDOFFILE]) { lexer->result_symbol = ENDOFFILE; }
else return false;
return true;
} else {
break;
}
skip(lexer);
}
// - Listiem ends {{{2
// Listend -> end of a line, looking for:
// 1. dedent
// 2. same indent, not a bullet
// 3. three eols
if (lexer->lookahead == '\n') {
if (valid_symbols[LISTEND] || valid_symbols[LISTITEMEND]) {
int16_t newlines = 0;
for (;;) {
if (lexer->lookahead == ' ') {
indent_length++;
} else if (lexer->lookahead == '\t') {
indent_length += 8;
} else if (lexer->lookahead == '\0') {
return dedent(lexer);
} else if (lexer->lookahead == '\n') {
if (++newlines > 2) return dedent(lexer);
indent_length = 0;
} else {
break;
}
skip(lexer);
}
if (stars > 0 && stars <= section_stack.back()) {
section_stack.pop_back();
lexer->result_symbol = SECTIONEND;
return true;
}
return false;
}
// - Count whitespace {{{1
int16_t indent_length = 0;
if (indent_length < indent_length_stack.back()) {
return dedent(lexer);
} else if (indent_length == indent_length_stack.back()) {
if (getbullet(lexer) == bullet_stack.back()) {
lexer->result_symbol = LISTITEMEND;
return true;
}
return dedent(lexer);
}
}
return false;
}
// - Col=0 star {{{2
if (indent_length == 0 && lexer->lookahead == '*') {
lexer->mark_end(lexer);
for (;;) {
if (lexer->lookahead == ' ') {
indent_length++;
} else if (lexer->lookahead == '\t') {
indent_length += 8;
} else {
break;
}
int16_t stars = 1;
skip(lexer);
while (lexer->lookahead == '*') {
stars++;
skip(lexer);
}
if (valid_symbols[SECTIONEND] && lexer->lookahead == '\0' && section_stack.back() > 0) {
lexer->result_symbol = SECTIONEND;
lexer->mark_end(lexer);
if (valid_symbols[SECTIONEND] && iswspace(lexer->lookahead) && stars > 0 && stars <= section_stack.back()) {
section_stack.pop_back();
lexer->result_symbol = SECTIONEND;
return true;
} else if (valid_symbols[HLSTARS] && iswspace(lexer->lookahead)) {
section_stack.push_back(stars);
lexer->mark_end(lexer);
lexer->result_symbol = HLSTARS;
return true;
} else if (valid_symbols[MARKUP] && stars == 1 && (!iswspace(lexer->lookahead) && lexer->lookahead != '\0')) {
lexer->result_symbol = MARKUP;
return true;
}
return false;
}
if (valid_symbols[LISTEND] && lexer->lookahead == '\0') {
lexer->result_symbol = LISTEND;
// - Liststart and bullets {{{2
if (valid_symbols[LISTSTART] || valid_symbols[BULLET]) {
// + and * need processing here, getbullet skips characters.
bool markup = lexer->lookahead == '+' || lexer->lookahead == '*';
Bullet bullet = getbullet(lexer);
if (valid_symbols[BULLET] && bullet == bullet_stack.back() && indent_length == indent_length_stack.back()) {
lexer->mark_end(lexer);
lexer->result_symbol = BULLET;
return true;
}
// - Listiem ends {{{1
// Listend -> end of a line, looking for:
// 1. dedent
// 2. same indent, not a bullet
// 3. three eols
if (lexer->lookahead == '\n') {
if (valid_symbols[LISTEND] || valid_symbols[LISTITEMEND]) {
int16_t newlines = 0;
for (;;) {
if (lexer->lookahead == ' ') {
indent_length++;
} else if (lexer->lookahead == '\t') {
indent_length += 8;
} else if (lexer->lookahead == '\0') {
return dedent(lexer);
} else if (lexer->lookahead == '\n') {
if (++newlines > 2) return dedent(lexer);
indent_length = 0;
} else {
break;
}
skip(lexer);
}
if (indent_length < indent_length_stack.back()) {
return dedent(lexer);
} else if (indent_length == indent_length_stack.back()) {
if (getbullet(lexer) == bullet_stack.back()) {
lexer->result_symbol = LISTITEMEND;
return true;
}
return dedent(lexer);
}
}
return false;
}
// - Col=0 star {{{1
if (indent_length == 0 && lexer->lookahead == '*') {
lexer->mark_end(lexer);
int16_t stars = 1;
skip(lexer);
while (lexer->lookahead == '*') {
stars++;
skip(lexer);
}
if (valid_symbols[HLSTARS] && lexer->lookahead == ' ' || lexer->lookahead == '\t') {
section_stack.push_back(stars);
lexer->mark_end(lexer);
lexer->result_symbol = HLSTARS;
return true;
} else if (valid_symbols[MARKUP] && stars == 1 && (!iswspace(lexer->lookahead) && lexer->lookahead != '\0')) {
lexer->result_symbol = MARKUP;
return true;
}
return false;
}
// - Liststart and bullets {{{1
if (valid_symbols[LISTSTART] || valid_symbols[BULLET]) {
// + and * need processing here, getbullet skips characters.
bool markup = lexer->lookahead == '+' || lexer->lookahead == '*';
Bullet bullet = getbullet(lexer);
if (valid_symbols[BULLET] && bullet == bullet_stack.back() && indent_length == indent_length_stack.back()) {
lexer->mark_end(lexer);
lexer->result_symbol = BULLET;
return true;
} else if (valid_symbols[LISTSTART] && bullet != NOTABULLET && indent_length > indent_length_stack.back()) {
indent_length_stack.push_back(indent_length);
bullet_stack.push_back(bullet);
lexer->result_symbol = LISTSTART;
return true;
} else if (valid_symbols[MARKUP] && bullet == NOTABULLET && markup) {
lexer->result_symbol = MARKUP;
return (!iswspace(lexer->lookahead) && lexer->lookahead != '\0');
}
}
// - Markup {{{1
if (valid_symbols[MARKUP] && (indent_length > 0 || lexer->get_column(lexer) == 0)
&& (lexer->lookahead == '*'
|| lexer->lookahead == '/'
|| lexer->lookahead == '_'
|| lexer->lookahead == '+'
|| lexer->lookahead == '~'
|| lexer->lookahead == '=')) {
lexer->mark_end(lexer);
skip(lexer);
} else if (valid_symbols[LISTSTART] && bullet != NOTABULLET && indent_length > indent_length_stack.back()) {
indent_length_stack.push_back(indent_length);
bullet_stack.push_back(bullet);
lexer->result_symbol = LISTSTART;
return true;
} else if (valid_symbols[MARKUP] && bullet == NOTABULLET && markup) {
lexer->result_symbol = MARKUP;
return (!iswspace(lexer->lookahead) && lexer->lookahead != '\0');
}
// - Default {{{1
return false;
}
// - Markup {{{2
if (valid_symbols[MARKUP] && (indent_length > 0 || lexer->get_column(lexer) == 0)
&& (lexer->lookahead == '*'
|| lexer->lookahead == '/'
|| lexer->lookahead == '_'
|| lexer->lookahead == '+'
|| lexer->lookahead == '~'
|| lexer->lookahead == '=')) {
lexer->mark_end(lexer);
skip(lexer);
lexer->result_symbol = MARKUP;
return (!iswspace(lexer->lookahead) && lexer->lookahead != '\0');
}
// - Default {{{2
return false;
}
};
}