Initial commit
This commit is contained in:
commit
8d6ef83961
4 changed files with 1771 additions and 0 deletions
7
.gitignore
vendored
Normal file
7
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
Cargo.lock
|
||||
package-lock.json
|
||||
node_modules
|
||||
build
|
||||
*.log
|
||||
/examples/*/
|
||||
/target/
|
||||
1005
corpus/basic.tst
Normal file
1005
corpus/basic.tst
Normal file
File diff suppressed because it is too large
Load diff
434
grammar.js
Normal file
434
grammar.js
Normal file
|
|
@ -0,0 +1,434 @@
|
|||
org_grammar = {
|
||||
// EXTERNALS, INLINE =================================== {{{1
|
||||
name: 'org',
|
||||
extras: _ => [' '], // Treat newlines explicitly
|
||||
|
||||
externals: $ => [
|
||||
$._liststart,
|
||||
$._listend,
|
||||
$._listitemend,
|
||||
$._bullet,
|
||||
$.stars,
|
||||
$._sectionend,
|
||||
$._markup,
|
||||
],
|
||||
|
||||
// inline: $ => [$._word, $._numbers, $._junk],
|
||||
// inline: $ => [ $._activeStart, $._activeEnd, $._inactiveStart, $._inactiveEnd,
|
||||
// $._tsSeparator, $._ymd, $._dayname,],
|
||||
|
||||
|
||||
// PRECEDENCES, CONFLICT =============================== {{{1
|
||||
precedences: _ => [
|
||||
['section', 'element', 'paragraph', 'textelement'],
|
||||
['plan', 'textelement'],
|
||||
['fn_definition', 'footnote'],
|
||||
],
|
||||
|
||||
conflicts: $ => [
|
||||
[$._text, $.bold],
|
||||
[$._text, $.italic],
|
||||
[$._text, $.underline],
|
||||
[$._text, $.strikethrough],
|
||||
[$._text, $.code],
|
||||
[$._text, $.verbatim],
|
||||
[$.item],
|
||||
[$._lastitem],
|
||||
],
|
||||
|
||||
rules: {
|
||||
// DOCUMENT, SECTIONS, BODY, & PARAGRAPH =============== {{{1
|
||||
|
||||
document: $ => seq(
|
||||
optional($.body),
|
||||
repeat($.section),
|
||||
),
|
||||
|
||||
// SECTIONS, BODY, PARAGRAPH =========================== {{{1
|
||||
|
||||
section: $ => prec.dynamic(1, prec('section',
|
||||
seq(
|
||||
$.headline, $._eol,
|
||||
optional(seq(
|
||||
optional(seq($.plan, $._eol)),
|
||||
optional(seq($.property_drawer, $._eol)),
|
||||
optional($.body),
|
||||
repeat($.section),
|
||||
)),
|
||||
$._sectionend,
|
||||
))),
|
||||
|
||||
_eol: _ => choice('\0', '\n', '\r'),
|
||||
_nl: _ => choice('\n', '\r'),
|
||||
|
||||
body: $ => choice(
|
||||
repeat1($._eol),
|
||||
seq(
|
||||
repeat($._eol),
|
||||
repeat1(seq(
|
||||
choice(
|
||||
$._element,
|
||||
$.paragraph
|
||||
),
|
||||
repeat($._eol),
|
||||
)),
|
||||
)),
|
||||
|
||||
paragraph: $ => prec.right('paragraph',
|
||||
repeat1(seq(
|
||||
repeat1($._textelement),
|
||||
$._eol)
|
||||
)),
|
||||
|
||||
// ELEMENT AND TEXTELEMENT ============================= {{{1
|
||||
|
||||
_element: $ => choice(
|
||||
$.drawer,
|
||||
$.comment,
|
||||
$.fndef,
|
||||
$.directive,
|
||||
$.list,
|
||||
$.block,
|
||||
$.dynamic_block,
|
||||
// $.table,
|
||||
),
|
||||
|
||||
_textelement: $ => prec('textelement',
|
||||
choice(
|
||||
$._text,
|
||||
$.timestamp,
|
||||
$.footnote,
|
||||
$.link,
|
||||
$.bold,
|
||||
$.code,
|
||||
$.italic,
|
||||
$.verbatim,
|
||||
$.underline,
|
||||
$.strikethrough,
|
||||
// $.subscript
|
||||
// $.superscript
|
||||
// $.latexfragment
|
||||
)),
|
||||
|
||||
// HEADLINES =========================================== {{{1
|
||||
|
||||
headline: $ => seq(
|
||||
$.stars,
|
||||
$.item,
|
||||
optional($._taglist),
|
||||
),
|
||||
|
||||
item: $ => repeat1(choice($._text, ':')),
|
||||
|
||||
_taglist: $ => prec.dynamic(1, // otherwise just item
|
||||
seq(':',
|
||||
repeat1(seq(
|
||||
$.tag,
|
||||
token.immediate(':')
|
||||
)))),
|
||||
|
||||
tag: _ => token.immediate(/[\p{L}\p{N}_@#%]+/),
|
||||
|
||||
_propertyName: _ => /:\p{Z}*:/,
|
||||
|
||||
property_drawer: $ => seq(
|
||||
':PROPERTIES:', $._eol,
|
||||
repeat(prec.right(seq(optional($.property), repeat1($._eol)))),
|
||||
':END:',
|
||||
),
|
||||
|
||||
property: $ => seq(
|
||||
$._propertyName,
|
||||
repeat($._text),
|
||||
),
|
||||
|
||||
// PLANNING ============================================ {{{1
|
||||
|
||||
_scheduled: _ => 'SCHEDULED:',
|
||||
_deadline: _ => 'DEADLINE:',
|
||||
_closed: _ => 'CLOSED:',
|
||||
|
||||
plan: $ => repeat1(prec('plan',
|
||||
choice(
|
||||
$.timestamp,
|
||||
$.scheduled,
|
||||
$.deadline,
|
||||
$.closed,
|
||||
))),
|
||||
|
||||
scheduled: $ => seq($._scheduled, $.timestamp),
|
||||
deadline: $ => seq($._deadline, $.timestamp),
|
||||
closed: $ => seq(
|
||||
$._closed,
|
||||
alias(choice(
|
||||
$._inactiveTimestamp,
|
||||
$._inactiveTimestampRange,
|
||||
), $.timestamp),
|
||||
),
|
||||
|
||||
// TIMESTAMP =========================================== {{{1
|
||||
|
||||
_activeStart: _ => '<',
|
||||
_activeEnd: _ => '>',
|
||||
_inactiveStart: _ => '[',
|
||||
_inactiveEnd: _ => ']',
|
||||
_tsSeparator: _ => '--',
|
||||
_ymd: _ => /\p{N}{1,4}-\p{N}{1,2}-\p{N}{1,4}/,
|
||||
time: _ => /\p{N}?\p{N}:\p{N}\p{N}/,
|
||||
repeater: _ => /[.+]?\+\p{N}+\p{L}/,
|
||||
delay: _ => /--?\p{N}+\p{L}/,
|
||||
|
||||
date: $ => seq($._ymd, optional(/\p{L}+/)),
|
||||
|
||||
timestamp: $ => choice(
|
||||
$._activeTimestamp,
|
||||
$._activeTimestampRange,
|
||||
$._inactiveTimestamp,
|
||||
$._inactiveTimestampRange,
|
||||
),
|
||||
|
||||
_activeTimestamp: $ => seq(
|
||||
$._activeStart,
|
||||
$.date,
|
||||
optional($.time),
|
||||
optional($.repeater),
|
||||
optional($.delay),
|
||||
$._activeEnd,
|
||||
),
|
||||
|
||||
_inactiveTimestamp: $ => seq(
|
||||
$._inactiveStart,
|
||||
$.date,
|
||||
optional($.time),
|
||||
optional($.repeater),
|
||||
optional($.delay),
|
||||
$._inactiveEnd,
|
||||
),
|
||||
|
||||
_activeTimestampRange: $ => choice(
|
||||
seq(
|
||||
alias($._activeTimestamp, $.timestamp),
|
||||
$._tsSeparator,
|
||||
alias($._activeTimestamp, $.timestamp)),
|
||||
seq(
|
||||
$._activeStart,
|
||||
$.date,
|
||||
$.time, '-', $.time,
|
||||
optional($.repeater),
|
||||
optional($.delay),
|
||||
$._activeEnd,
|
||||
)
|
||||
),
|
||||
|
||||
_inactiveTimestampRange: $ => choice(
|
||||
seq($._inactiveTimestamp, $._tsSeparator, $._inactiveTimestamp),
|
||||
seq(
|
||||
$._inactiveStart,
|
||||
$.date,
|
||||
$.time, '-', $.time,
|
||||
optional($.repeater),
|
||||
optional($.delay),
|
||||
$._inactiveEnd,
|
||||
)
|
||||
),
|
||||
|
||||
// MARKUP ============================================== {{{1
|
||||
|
||||
bold: make_markup('*'),
|
||||
italic: make_markup('/'),
|
||||
underline: make_markup('_'),
|
||||
strikethrough: make_markup('+'),
|
||||
code: make_markup('~', true),
|
||||
verbatim: make_markup('=', true),
|
||||
|
||||
// LINK ================================================ {{{1
|
||||
|
||||
_linkstart: _ => '[[',
|
||||
_linksep: _ => '][',
|
||||
_linkend: _ => ']]',
|
||||
|
||||
link: $ => seq(
|
||||
$._linkstart,
|
||||
optional(seq(field('uri', $.linktext), $._linksep)),
|
||||
field('description', $.linktext),
|
||||
$._linkend,
|
||||
),
|
||||
linktext: _ => /[^\]]*/,
|
||||
|
||||
// FOOTNOTE ============================================ {{{1
|
||||
|
||||
_fn_label: _ => /[^\p{Z}\[\]]+/,
|
||||
_fn: _ => '[fn:',
|
||||
|
||||
fndef: $ => prec('fn_definition',
|
||||
seq(
|
||||
$._fn,
|
||||
$._fn_label,
|
||||
']',
|
||||
$.paragraph,
|
||||
)),
|
||||
|
||||
footnote: $ => prec('footnote',
|
||||
seq(
|
||||
$._fn,
|
||||
choice(
|
||||
$._fn_label,
|
||||
seq(optional($._fn_label), ':', repeat1($._fn_label)),
|
||||
),
|
||||
']',
|
||||
)),
|
||||
|
||||
// DIRECTIVE =========================================== {{{1
|
||||
|
||||
directive: $ => seq(
|
||||
'#+',
|
||||
token.immediate(/[^\p{Z}:]+/), // name
|
||||
token.immediate(':'),
|
||||
repeat($._text),
|
||||
$._eol,
|
||||
),
|
||||
|
||||
// COMMENTS ============================================ {{{1
|
||||
|
||||
comment: $ => prec.right(repeat1(seq(
|
||||
'# ', repeat($._text), $._eol
|
||||
))),
|
||||
|
||||
// DRAWER ============================================== {{{1
|
||||
|
||||
drawer: $ => seq(
|
||||
':',
|
||||
token.immediate(/[\p{L}\p{N}\p{Pd}\p{Pc}]+/),
|
||||
token.immediate(':'),
|
||||
$._eol,
|
||||
optional($.body),
|
||||
':END:',
|
||||
$._eol,
|
||||
),
|
||||
|
||||
// BLOCK =============================================== {{{1
|
||||
|
||||
block: $ => seq(
|
||||
'#+BEGIN_',
|
||||
alias($._name, $.name),
|
||||
optional($.parameters),
|
||||
$._nl,
|
||||
alias(
|
||||
repeat(seq(
|
||||
repeat($._textonly),
|
||||
$._nl,
|
||||
)),
|
||||
$.contents),
|
||||
'#+END_', $._name, // \P{Z} does not match newlines
|
||||
repeat($._junk), // FIXME
|
||||
$._eol,
|
||||
),
|
||||
|
||||
_name: _ => token.immediate(/[^\p{Z}\n\r]+/),
|
||||
|
||||
// DYNAMIC BLOCK ======================================= {{{1
|
||||
|
||||
dynamic_block: $ => prec(1, seq( // FIXME why is this precedence required?
|
||||
'#+BEGIN:',
|
||||
optional(alias($._text, $.name)),
|
||||
optional($.parameters),
|
||||
// optional(alias(repeat1(/\S+/), $.parameters)),
|
||||
$._eol,
|
||||
alias(repeat(seq(
|
||||
repeat($._textonly),
|
||||
$._nl,
|
||||
)), $.contents),
|
||||
'#+END:',
|
||||
repeat($._junk), // FIXME
|
||||
$._eol,
|
||||
)),
|
||||
|
||||
parameters: $ => repeat1($._text),
|
||||
|
||||
// LISTS =============================================== {{{1
|
||||
|
||||
list: $ => seq(
|
||||
$._liststart,
|
||||
repeat(seq($.listitem, optional($._eol))),
|
||||
alias($._lastitem, $.listitem),
|
||||
),
|
||||
|
||||
listitem: $ => seq(
|
||||
$._bullet,
|
||||
optional($._checkbox),
|
||||
optional($._itemtag),
|
||||
optional($._itemtext),
|
||||
$._listitemend,
|
||||
$._eol,
|
||||
),
|
||||
|
||||
_lastitem: $ => seq(
|
||||
$._bullet,
|
||||
optional($._checkbox),
|
||||
optional($._itemtag),
|
||||
optional($._itemtext),
|
||||
$._listend,
|
||||
optional($._eol),
|
||||
),
|
||||
|
||||
_checkbox: _ => /\[[ xX-]\]/,
|
||||
_itemtag: $ => seq(repeat($._textelement), '::'),
|
||||
|
||||
_itemtext: $ => seq(
|
||||
repeat1($._textelement),
|
||||
repeat(seq(
|
||||
$._eol,
|
||||
optional($._eol),
|
||||
choice(repeat1($._textelement), $.list)
|
||||
)),
|
||||
),
|
||||
|
||||
|
||||
// TEXT ================================================ {{{1
|
||||
|
||||
// TODO: inline word/numbers/junk. Causes precedence issues
|
||||
// A repeat would also be nice.
|
||||
_textonly: $ => choice($._word,
|
||||
$._numbers,
|
||||
$._junk,
|
||||
),
|
||||
|
||||
_text: $ => choice(
|
||||
$._word,
|
||||
$._numbers,
|
||||
$._junk,
|
||||
|
||||
$._activeStart, // Causes conflicts, so they get marked as text.
|
||||
$._inactiveStart,
|
||||
|
||||
seq($._markup, '*'),
|
||||
seq($._markup, '/'),
|
||||
seq($._markup, '_'),
|
||||
seq($._markup, '+'),
|
||||
seq($._markup, '~'),
|
||||
seq($._markup, '='),
|
||||
|
||||
'#', // comment collision
|
||||
),
|
||||
|
||||
|
||||
_word: _ => /\p{L}+/,
|
||||
_numbers: _ => /\p{N}+/,
|
||||
_junk: _ => /[^\p{Z}\p{L}\p{N}]/,
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
function make_markup(delim, textonly = false) { // {{{1
|
||||
return $ => prec.left(seq(
|
||||
$._markup,
|
||||
delim,
|
||||
repeat1(textonly ? $._text : $._textelement),
|
||||
repeat(seq($._eol, repeat1(textonly ? $._text : $._textelement))),
|
||||
token.immediate(delim),
|
||||
))
|
||||
}
|
||||
|
||||
// }}}
|
||||
|
||||
module.exports = grammar(org_grammar);
|
||||
325
src/scanner.cc
Normal file
325
src/scanner.cc
Normal file
|
|
@ -0,0 +1,325 @@
|
|||
#include <tree_sitter/parser.h>
|
||||
#include <vector>
|
||||
#include <cwctype>
|
||||
#include <cstring>
|
||||
#include <cassert>
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
|
||||
namespace {
|
||||
|
||||
using std::vector;
|
||||
using std::iswspace;
|
||||
|
||||
enum TokenType { // {{{1
|
||||
LISTSTART,
|
||||
LISTEND,
|
||||
LISTITEMEND,
|
||||
BULLET,
|
||||
HLSTARS,
|
||||
SECTIONEND,
|
||||
MARKUP,
|
||||
};
|
||||
|
||||
enum Bullet { // {{{1
|
||||
NOTABULLET,
|
||||
DASH,
|
||||
PLUS,
|
||||
STAR,
|
||||
LOWERDOT,
|
||||
UPPERDOT,
|
||||
LOWERPAREN,
|
||||
UPPERPAREN,
|
||||
NUMDOT,
|
||||
NUMPAREN,
|
||||
};
|
||||
|
||||
struct Scanner { // {{{1
|
||||
vector<int16_t> indent_length_stack;
|
||||
vector<int16_t> bullet_stack;
|
||||
vector<int16_t> section_stack;
|
||||
|
||||
Scanner() {
|
||||
deserialize(NULL, 0);
|
||||
}
|
||||
|
||||
unsigned serialize(char *buffer) { // {{{1
|
||||
size_t i = 0;
|
||||
|
||||
size_t indent_count = indent_length_stack.size() - 1;
|
||||
if (indent_count > UINT8_MAX) indent_count = UINT8_MAX;
|
||||
buffer[i++] = indent_count;
|
||||
|
||||
vector<int16_t>::iterator
|
||||
iter = indent_length_stack.begin() + 1,
|
||||
end = indent_length_stack.end();
|
||||
|
||||
for (; iter != end && i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; ++iter) {
|
||||
buffer[i++] = *iter;
|
||||
}
|
||||
|
||||
iter = bullet_stack.begin() + 1;
|
||||
end = bullet_stack.end();
|
||||
for (; iter != end && i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; ++iter) {
|
||||
buffer[i++] = *iter;
|
||||
}
|
||||
|
||||
iter = section_stack.begin() + 1;
|
||||
end = section_stack.end();
|
||||
|
||||
for (; iter != end && i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; ++iter) {
|
||||
buffer[i++] = *iter;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
void deserialize(const char *buffer, unsigned length) { // {{{1
|
||||
section_stack.clear();
|
||||
section_stack.push_back(0);
|
||||
indent_length_stack.clear();
|
||||
indent_length_stack.push_back(-1);
|
||||
bullet_stack.clear();
|
||||
bullet_stack.push_back(NOTABULLET);
|
||||
|
||||
if (length == 0) return;
|
||||
|
||||
size_t i = 0;
|
||||
|
||||
size_t indent_count = (uint8_t)buffer[i++];
|
||||
|
||||
for (; i <= indent_count ; i++) indent_length_stack.push_back(buffer[i]);
|
||||
for (; i <= 2 * indent_count; i++) bullet_stack.push_back(buffer[i]);
|
||||
for (; i < length ; i++) section_stack.push_back(buffer[i]);
|
||||
|
||||
}
|
||||
|
||||
void advance(TSLexer *lexer) { // {{{1
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
void skip(TSLexer *lexer) { // {{{1
|
||||
lexer->advance(lexer, true);
|
||||
}
|
||||
|
||||
bool dedent(TSLexer *lexer) { // {{{1
|
||||
indent_length_stack.pop_back();
|
||||
bullet_stack.pop_back();
|
||||
lexer->result_symbol = LISTEND;
|
||||
// std::cout << " == Dedent~" << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
Bullet getbullet(TSLexer *lexer) { // {{{1
|
||||
if (lexer->lookahead == '-') {
|
||||
skip(lexer);
|
||||
if (iswspace(lexer->lookahead)) return DASH;
|
||||
} else if (lexer->lookahead == '+') {
|
||||
skip(lexer);
|
||||
if (iswspace(lexer->lookahead)) return PLUS;
|
||||
} else if (lexer->lookahead == '*') {
|
||||
skip(lexer);
|
||||
if (iswspace(lexer->lookahead)) return STAR;
|
||||
} else if ('a' <= lexer->lookahead && lexer->lookahead <= 'z') {
|
||||
skip(lexer);
|
||||
if (lexer->lookahead == '.') {
|
||||
skip(lexer);
|
||||
if (iswspace(lexer->lookahead)) return LOWERDOT;
|
||||
} else if (lexer->lookahead == ')') {
|
||||
skip(lexer);
|
||||
if (iswspace(lexer->lookahead)) return LOWERPAREN;
|
||||
}
|
||||
} else if ('A' <= lexer->lookahead && lexer->lookahead <= 'Z') {
|
||||
skip(lexer);
|
||||
if (lexer->lookahead == '.') {
|
||||
skip(lexer);
|
||||
if (iswspace(lexer->lookahead)) return UPPERDOT;
|
||||
} else if (lexer->lookahead == ')') {
|
||||
skip(lexer);
|
||||
if (iswspace(lexer->lookahead)) return UPPERPAREN;
|
||||
}
|
||||
} else if ('0' <= lexer->lookahead && lexer->lookahead <= '9') {
|
||||
do {
|
||||
skip(lexer);
|
||||
} while ('0' <= lexer->lookahead && lexer->lookahead <= '9');
|
||||
if (lexer->lookahead == '.') {
|
||||
skip(lexer);
|
||||
if (iswspace(lexer->lookahead)) return NUMDOT;
|
||||
} else if (lexer->lookahead == ')') {
|
||||
skip(lexer);
|
||||
if (iswspace(lexer->lookahead)) return NUMPAREN;
|
||||
}
|
||||
}
|
||||
return NOTABULLET;
|
||||
}
|
||||
|
||||
bool scan(TSLexer *lexer, const bool *valid_symbols) { // {{{1
|
||||
|
||||
// std::cout << " == " << valid_symbols[LISTSTART] << ", " << valid_symbols[LISTEND] << ", " << valid_symbols[LISTITEMEND] << ", " << valid_symbols[BULLET] << ", " << valid_symbols[HLSTARS] << ", " << valid_symbols[SECTIONEND] << ", " << valid_symbols[MARKUP] << std::endl;
|
||||
if (valid_symbols[SECTIONEND] && lexer->lookahead == '\0' && section_stack.back() > 0) {
|
||||
lexer->result_symbol = SECTIONEND;
|
||||
section_stack.pop_back();
|
||||
return true;
|
||||
}
|
||||
|
||||
int16_t indent_length = 0;
|
||||
// - Listiem ends {{{1
|
||||
// Listend -> end of a line, looking for:
|
||||
// 1. dedent
|
||||
// 2. same indent, not a bullet
|
||||
// 3. three eols
|
||||
if (valid_symbols[LISTEND] || valid_symbols[LISTITEMEND]) {
|
||||
int16_t newlines = 0;
|
||||
lexer->mark_end(lexer);
|
||||
for (;;) {
|
||||
if (lexer->lookahead == ' ' && newlines > 0) {
|
||||
indent_length++;
|
||||
} else if (lexer->lookahead == '\t' && newlines > 0) {
|
||||
indent_length += 8;
|
||||
} else if (lexer->lookahead == '\0') {
|
||||
return dedent(lexer);
|
||||
} else if (lexer->lookahead == '\n') {
|
||||
if (++newlines > 2) return dedent(lexer);
|
||||
indent_length = 0;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
skip(lexer);
|
||||
}
|
||||
if (newlines == 0) return false;
|
||||
|
||||
if (indent_length < indent_length_stack.back()) {
|
||||
return dedent(lexer);
|
||||
} else if (indent_length == indent_length_stack.back()) {
|
||||
if (getbullet(lexer) == bullet_stack.back()) {
|
||||
// std::cout << " == Item end~" << std::endl;
|
||||
lexer->result_symbol = LISTITEMEND;
|
||||
return true;
|
||||
}
|
||||
return dedent(lexer);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// - Count whitespace {{{1
|
||||
lexer->mark_end(lexer);
|
||||
for (;;) {
|
||||
if (lexer->lookahead == ' ') {
|
||||
indent_length++;
|
||||
} else if (lexer->lookahead == '\t') {
|
||||
indent_length += 8;
|
||||
} else if (lexer->lookahead == '\n') {
|
||||
return false;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
skip(lexer);
|
||||
}
|
||||
|
||||
// std::cout << " == indent: " << indent_length << " next: '" << char(lexer->lookahead) << "'" << std::endl;
|
||||
|
||||
// - Col=0 star {{{1
|
||||
if (indent_length == 0 && lexer->lookahead == '*') {
|
||||
lexer->mark_end(lexer);
|
||||
int16_t stars = 1;
|
||||
skip(lexer);
|
||||
while (lexer->lookahead == '*') {
|
||||
stars++;
|
||||
skip(lexer);
|
||||
}
|
||||
|
||||
if (valid_symbols[SECTIONEND] && stars <= section_stack.back()) {
|
||||
section_stack.pop_back();
|
||||
lexer->result_symbol = SECTIONEND;
|
||||
// std::cout << " == Section End~" << std::endl;
|
||||
return true;
|
||||
} else if (valid_symbols[HLSTARS] && lexer->lookahead == ' ' || lexer->lookahead == '\t') {
|
||||
section_stack.push_back(stars);
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = HLSTARS;
|
||||
// std::cout << " == Stars~" << std::endl;
|
||||
return true;
|
||||
} else if (valid_symbols[MARKUP] && stars == 1 && (!iswspace(lexer->lookahead) && lexer->lookahead != '\0')) {
|
||||
lexer->result_symbol = MARKUP;
|
||||
// std::cout << " == Bold~" << std::endl;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// - Liststart and bullets {{{1
|
||||
|
||||
if (valid_symbols[LISTSTART] || valid_symbols[BULLET]) {
|
||||
|
||||
bool plus = lexer->lookahead == '+'; // requires special treatment, like *
|
||||
Bullet bullet = getbullet(lexer);
|
||||
|
||||
// std::cout << " == bullet: " << bullet << " back indent: " << indent_length_stack.back() << std::endl;
|
||||
// std::cout << " == il gt back: " << (indent_length > indent_length_stack.back()) << std::endl;
|
||||
if (valid_symbols[BULLET] && bullet == bullet_stack.back() && indent_length == indent_length_stack.back()) {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = BULLET;
|
||||
// std::cout << " == Bullet~" << std::endl;
|
||||
return true;
|
||||
} else if (valid_symbols[LISTSTART] && bullet != NOTABULLET && indent_length > indent_length_stack.back()) {
|
||||
indent_length_stack.push_back(indent_length);
|
||||
bullet_stack.push_back(bullet);
|
||||
lexer->result_symbol = LISTSTART;
|
||||
// std::cout << " == Liststart~" << std::endl;
|
||||
return true;
|
||||
} else if (valid_symbols[MARKUP] && bullet == NOTABULLET && plus) {
|
||||
lexer->result_symbol = MARKUP;
|
||||
// std::cout << " == Markup~" << std::endl;
|
||||
return (!iswspace(lexer->lookahead) && lexer->lookahead != '\0');
|
||||
}
|
||||
}
|
||||
|
||||
// - Markup {{{1
|
||||
if (valid_symbols[MARKUP] && (indent_length > 0 || lexer->get_column(lexer) == 0)
|
||||
&& (lexer->lookahead == '*'
|
||||
|| lexer->lookahead == '/'
|
||||
|| lexer->lookahead == '_'
|
||||
|| lexer->lookahead == '+'
|
||||
|| lexer->lookahead == '~'
|
||||
|| lexer->lookahead == '=')) {
|
||||
lexer->mark_end(lexer);
|
||||
skip(lexer);
|
||||
lexer->result_symbol = MARKUP;
|
||||
// std::cout << " == Markup~" << std::endl;
|
||||
return (!iswspace(lexer->lookahead) && lexer->lookahead != '\0');
|
||||
}
|
||||
// - Default {{{1
|
||||
// std::cout << " == False~" << std::endl;
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
extern "C" { // {{{1
|
||||
|
||||
void *tree_sitter_org_external_scanner_create() {
|
||||
return new Scanner();
|
||||
}
|
||||
|
||||
bool tree_sitter_org_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
|
||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
||||
return scanner->scan(lexer, valid_symbols);
|
||||
}
|
||||
|
||||
unsigned tree_sitter_org_external_scanner_serialize(void *payload, char *buffer) {
|
||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
||||
return scanner->serialize(buffer);
|
||||
}
|
||||
|
||||
void tree_sitter_org_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
|
||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
||||
scanner->deserialize(buffer, length);
|
||||
}
|
||||
|
||||
void tree_sitter_org_external_scanner_destroy(void *payload) {
|
||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
||||
delete scanner;
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue