Compare commits
10 commits
4d2b327744
...
64cfbc213f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
64cfbc213f | ||
|
|
081179c52b | ||
|
|
eb1e080361 | ||
|
|
698bb1a343 | ||
|
|
76a70a8b35 | ||
|
|
a2c1946d78 | ||
|
|
bc8a040492 | ||
|
|
031031fe6c | ||
|
|
428fd9aeb9 | ||
|
|
53a61b6b62 |
14 changed files with 78094 additions and 66515 deletions
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
name = "tree-sitter-org"
|
||||
description = "org grammar for the tree-sitter parsing library"
|
||||
version = "1.0.1"
|
||||
version = "1.3.3"
|
||||
keywords = ["incremental", "parsing", "org"]
|
||||
categories = ["parsing", "text-editors"]
|
||||
repository = "https://github.com/milisims/tree-sitter-org"
|
||||
|
|
|
|||
28
README.md
28
README.md
|
|
@ -23,7 +23,7 @@ Like in many regex systems, `*/+` is read as "0/1 or more", and `?` is 0 or 1.
|
|||
|
||||
## Example
|
||||
|
||||
``` org
|
||||
```org
|
||||
#+TITLE: Example
|
||||
|
||||
Some *marked up* words
|
||||
|
|
@ -32,9 +32,9 @@ Some *marked up* words
|
|||
<2020-06-07 Sun>
|
||||
|
||||
- list a
|
||||
- [ ] list a
|
||||
- [ ] list b
|
||||
- [-] list a
|
||||
- [ ] list b
|
||||
- [x] list b
|
||||
- list a
|
||||
|
||||
** Subsection :tag:
|
||||
|
|
@ -43,6 +43,7 @@ Text
|
|||
```
|
||||
|
||||
Parses as:
|
||||
|
||||
```
|
||||
(document [0, 0] - [16, 0]
|
||||
body: (body [0, 0] - [4, 0]
|
||||
|
|
@ -75,24 +76,23 @@ Parses as:
|
|||
(expr [7, 9] - [7, 10])))
|
||||
(listitem [8, 2] - [11, 0]
|
||||
bullet: (bullet [8, 2] - [8, 3])
|
||||
contents: (paragraph [8, 4] - [9, 0]
|
||||
(expr [8, 4] - [8, 5])
|
||||
(expr [8, 6] - [8, 7])
|
||||
checkbox: (checkbox [8, 4] - [8, 7]
|
||||
status: (expr [8, 5] - [8, 6]))
|
||||
contents: (paragraph [8, 8] - [9, 0]
|
||||
(expr [8, 8] - [8, 12])
|
||||
(expr [8, 13] - [8, 14]))
|
||||
contents: (list [9, 0] - [11, 0]
|
||||
(listitem [9, 4] - [10, 0]
|
||||
bullet: (bullet [9, 4] - [9, 5])
|
||||
contents: (paragraph [9, 6] - [10, 0]
|
||||
(expr [9, 6] - [9, 7])
|
||||
(expr [9, 8] - [9, 9])
|
||||
checkbox: (checkbox [9, 6] - [9, 9])
|
||||
contents: (paragraph [9, 10] - [10, 0]
|
||||
(expr [9, 10] - [9, 14])
|
||||
(expr [9, 15] - [9, 16])))
|
||||
(listitem [10, 4] - [11, 0]
|
||||
bullet: (bullet [10, 4] - [10, 5])
|
||||
contents: (paragraph [10, 6] - [11, 0]
|
||||
(expr [10, 6] - [10, 7])
|
||||
(expr [10, 8] - [10, 9])
|
||||
checkbox: (checkbox [10, 6] - [10, 9]
|
||||
status: (expr [10, 7] - [10, 8]))
|
||||
contents: (paragraph [10, 10] - [11, 0]
|
||||
(expr [10, 10] - [10, 14])
|
||||
(expr [10, 15] - [10, 16])))))
|
||||
(listitem [11, 2] - [12, 0]
|
||||
|
|
@ -118,13 +118,13 @@ For manual install, use `make`.
|
|||
|
||||
For neovim, using `nvim-treesitter/nvim-treesitter`, add to your configuration:
|
||||
|
||||
``` lua
|
||||
```lua
|
||||
local parser_config = require "nvim-treesitter.parsers".get_parser_configs()
|
||||
parser_config.org = {
|
||||
install_info = {
|
||||
url = 'https://github.com/milisims/tree-sitter-org',
|
||||
revision = 'main',
|
||||
files = { 'src/parser.c', 'src/scanner.cc' },
|
||||
files = { 'src/parser.c', 'src/scanner.c' },
|
||||
},
|
||||
filetype = 'org',
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
"sources": [
|
||||
"src/parser.c",
|
||||
"bindings/node/binding.cc",
|
||||
"src/scanner.cc"
|
||||
"src/scanner.c"
|
||||
],
|
||||
"cflags_c": [
|
||||
"-std=c99",
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ fn main() {
|
|||
let src_dir = std::path::Path::new("src");
|
||||
|
||||
let mut c_config = cc::Build::new();
|
||||
c_config.include(&src_dir);
|
||||
c_config.include(src_dir);
|
||||
c_config
|
||||
.flag_if_supported("-Wno-unused-parameter")
|
||||
.flag_if_supported("-Wno-unused-but-set-variable")
|
||||
|
|
@ -10,29 +10,10 @@ fn main() {
|
|||
let parser_path = src_dir.join("parser.c");
|
||||
c_config.file(&parser_path);
|
||||
|
||||
// If your language uses an external scanner written in C,
|
||||
// then include this block of code:
|
||||
|
||||
/*
|
||||
let scanner_path = src_dir.join("scanner.c");
|
||||
c_config.file(&scanner_path);
|
||||
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
|
||||
*/
|
||||
|
||||
c_config.compile("parser");
|
||||
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
|
||||
|
||||
// If your language uses an external scanner written in C++,
|
||||
// then include this block of code:
|
||||
|
||||
let mut cpp_config = cc::Build::new();
|
||||
cpp_config.cpp(true);
|
||||
cpp_config.include(&src_dir);
|
||||
cpp_config
|
||||
.flag_if_supported("-Wno-unused-parameter")
|
||||
.flag_if_supported("-Wno-unused-but-set-variable");
|
||||
let scanner_path = src_dir.join("scanner.cc");
|
||||
cpp_config.file(&scanner_path);
|
||||
cpp_config.compile("scanner");
|
||||
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
|
||||
}
|
||||
|
|
|
|||
1353
corpus/basic.tst
1353
corpus/basic.tst
File diff suppressed because it is too large
Load diff
32
grammar.js
32
grammar.js
|
|
@ -172,12 +172,12 @@ org_grammar = {
|
|||
date: $ => /\p{N}{1,4}-\p{N}{1,4}-\p{N}{1,4}/,
|
||||
|
||||
_ts_element: $ => choice(
|
||||
field('day', alias(/\p{L}[^\]>\p{Z}\n\r]*/, $.day)),
|
||||
field('day', alias(/\p{L}[^\]>\p{Z}\t\n\r]*/, $.day)),
|
||||
field('time', alias(/\p{N}?\p{N}[:.]\p{N}\p{N}( ?\p{L}{1,2})?/, $.time)),
|
||||
field('duration', alias(/\p{N}?\p{N}[:.]\p{N}\p{N}( ?\p{L}{1,2})?-\p{N}?\p{N}[:.]\p{N}\p{N}( ?\p{L}{1,2})?/, $.duration)),
|
||||
field('repeat', alias(/[.+]?\+\p{N}+\p{L}/, $.repeat)),
|
||||
field('delay', alias(/--?\p{N}+\p{L}/, $.delay)),
|
||||
alias(prec(-1, /[^\[<\]>\p{Z}\n\r]+/), $.expr),
|
||||
alias(prec(-1, /[^\[<\]>\p{Z}\t\n\r]+/), $.expr),
|
||||
),
|
||||
|
||||
paragraph: $ => seq(optional($._directive_list), $._multiline_text),
|
||||
|
|
@ -186,7 +186,7 @@ org_grammar = {
|
|||
optional($._directive_list),
|
||||
seq(
|
||||
caseInsensitive('[fn:'),
|
||||
field('label', alias(/[^\p{Z}\n\r\]]+/, $.expr)),
|
||||
field('label', alias(/[^\p{Z}\t\n\r\]]+/, $.expr)),
|
||||
']',
|
||||
),
|
||||
field('description', alias($._multiline_text, $.description))
|
||||
|
|
@ -222,7 +222,7 @@ org_grammar = {
|
|||
$._nl,
|
||||
optional(field('contents', $.contents)),
|
||||
caseInsensitive('#+end_'),
|
||||
$._immediate_expr,
|
||||
field('end_name',alias($._immediate_expr, $.expr)),
|
||||
$._eol,
|
||||
),
|
||||
|
||||
|
|
@ -234,6 +234,7 @@ org_grammar = {
|
|||
$._nl,
|
||||
optional(field('contents', $.contents)),
|
||||
caseInsensitive('#+end:'),
|
||||
optional(field('end_name', $.expr)),
|
||||
$._eol,
|
||||
),
|
||||
|
||||
|
|
@ -246,12 +247,22 @@ org_grammar = {
|
|||
|
||||
listitem: $ => seq(
|
||||
field('bullet', $.bullet),
|
||||
optional(field('checkbox', $.checkbox)),
|
||||
choice(
|
||||
$._eof,
|
||||
field('contents', $._body_contents),
|
||||
),
|
||||
),
|
||||
|
||||
checkbox: $ => choice(
|
||||
'[ ]',
|
||||
seq(
|
||||
token(prec('non-immediate', '[')),
|
||||
field('status', alias($._checkbox_status_expr, $.expr)),
|
||||
token.immediate(prec('special', ']')),
|
||||
),
|
||||
),
|
||||
|
||||
table: $ => prec.right(seq(
|
||||
optional($._directive_list),
|
||||
repeat1(choice($.row, $.hr)),
|
||||
|
|
@ -294,14 +305,12 @@ org_grammar = {
|
|||
token.immediate('}'),
|
||||
),
|
||||
seq(
|
||||
caseInsensitive('\\['),
|
||||
$._nl,
|
||||
token(seq(caseInsensitive('\\['), choice('\n', '\r'))),
|
||||
optional(field('contents', $.contents)),
|
||||
caseInsensitive('\\]'),
|
||||
),
|
||||
seq(
|
||||
caseInsensitive('\\('),
|
||||
$._nl,
|
||||
token(seq(caseInsensitive('\\('), choice('\n', '\r'))),
|
||||
optional(field('contents', $.contents)),
|
||||
caseInsensitive('\\)'),
|
||||
),
|
||||
|
|
@ -324,6 +333,8 @@ org_grammar = {
|
|||
_immediate_expr: $ => repeat1(expr('immediate', token.immediate)),
|
||||
_noc_expr: $ => repeat1(expr('immediate', token.immediate, ':')),
|
||||
|
||||
_checkbox_status_expr: $ => expr('immediate', token.immediate, ']'),
|
||||
|
||||
_ts_expr: $ => seq(
|
||||
expr('non-immediate', token, '>]'),
|
||||
repeat(expr('immediate', token.immediate, '>]'))
|
||||
|
|
@ -343,7 +354,10 @@ function expr(pr, tfunc, skip = '') {
|
|||
...asciiSymbols.filter(c => !skip.includes(c)).map(c => tfunc(prec(pr, c))),
|
||||
alias(tfunc(prec(pr, /\p{L}+/)), 'str'),
|
||||
alias(tfunc(prec(pr, /\p{N}+/)), 'num'),
|
||||
alias(tfunc(prec(pr, /[^\p{Z}\p{L}\p{N}\n\r]/)), 'sym'),
|
||||
alias(tfunc(prec(pr, /[^\p{Z}\p{L}\p{N}\t\n\r]/)), 'sym'),
|
||||
// for checkboxes: ugly, but makes them work..
|
||||
// alias(tfunc(prec(pr, 'x')), 'str'),
|
||||
// alias(tfunc(prec(pr, 'X')), 'str'),
|
||||
)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "tree-sitter-org",
|
||||
"version": "1.0.1",
|
||||
"version": "1.3.3",
|
||||
"description": "Org grammar for tree-sitter",
|
||||
"main": "bindings/node",
|
||||
"keywords": [
|
||||
|
|
|
|||
|
|
@ -61,10 +61,10 @@
|
|||
(bullet) @OrgListBullet
|
||||
|
||||
; Get different colors for different statuses as follows
|
||||
(listitem . (bullet) . (paragraph . (expr "[" "str" @OrgCheckDone "]") @OrgCheckbox (#match? @OrgCheckbox "^\[[xX]\]$")))
|
||||
(listitem . (bullet) . (paragraph . (expr "[" "-" @OrgCheckInProgress "]") @OrgCheckbox (#eq? @OrgCheckbox "[-]")))
|
||||
(listitem . (bullet) . (paragraph . (expr "[") @OrgCheckbox.left (#eq? @OrgCheckbox.left "[") . (expr "]") @OrgCheckbox.right (#eq? @OrgCheckbox.right "]")))
|
||||
; (listitem . (bullet) . (paragraph (expr ":" ":") @OrgListDescriptionSeparator (#eq? @OrgListDescriptionSeparator "::"))) -- matches multiple, requires a special search.
|
||||
(checkbox) @OrgCheckbox
|
||||
(checkbox status: (expr "-") @OrgCheckInProgress)
|
||||
(checkbox status: (expr "str") @OrgCheckDone (#any-of? @OrgCheckDone "x" "X"))
|
||||
(checkbox status: (expr) @Error (#not-any-of? @Error "x" "X" "-"))
|
||||
|
||||
; If you want the ruler one color and the separators a different color,
|
||||
; something like this would do it:
|
||||
|
|
|
|||
580
src/grammar.json
580
src/grammar.json
|
|
@ -833,7 +833,7 @@
|
|||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "\\p{L}[^\\]>\\p{Z}\\n\\r]*"
|
||||
"value": "\\p{L}[^\\]>\\p{Z}\\t\\n\\r]*"
|
||||
},
|
||||
"named": true,
|
||||
"value": "day"
|
||||
|
|
@ -898,7 +898,7 @@
|
|||
"value": -1,
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "[^\\[<\\]>\\p{Z}\\n\\r]+"
|
||||
"value": "[^\\[<\\]>\\p{Z}\\t\\n\\r]+"
|
||||
}
|
||||
},
|
||||
"named": true,
|
||||
|
|
@ -961,7 +961,7 @@
|
|||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "[^\\p{Z}\\n\\r\\]]+"
|
||||
"value": "[^\\p{Z}\\t\\n\\r\\]]+"
|
||||
},
|
||||
"named": true,
|
||||
"value": "expr"
|
||||
|
|
@ -1250,8 +1250,17 @@
|
|||
"value": "#+end_"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_immediate_expr"
|
||||
"type": "FIELD",
|
||||
"name": "end_name",
|
||||
"content": {
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_immediate_expr"
|
||||
},
|
||||
"named": true,
|
||||
"value": "expr"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
|
|
@ -1331,6 +1340,22 @@
|
|||
"named": false,
|
||||
"value": "#+end:"
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "FIELD",
|
||||
"name": "end_name",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "expr"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "BLANK"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_eol"
|
||||
|
|
@ -1405,6 +1430,22 @@
|
|||
"name": "bullet"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "FIELD",
|
||||
"name": "checkbox",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "checkbox"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "BLANK"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
|
|
@ -1424,6 +1465,55 @@
|
|||
}
|
||||
]
|
||||
},
|
||||
"checkbox": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "[ ]"
|
||||
},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "non-immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "["
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "FIELD",
|
||||
"name": "status",
|
||||
"content": {
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_checkbox_status_expr"
|
||||
},
|
||||
"named": true,
|
||||
"value": "expr"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "special",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "]"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"table": {
|
||||
"type": "PREC_RIGHT",
|
||||
"value": 0,
|
||||
|
|
@ -1730,17 +1820,34 @@
|
|||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"type": "TOKEN",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "\\\\\\["
|
||||
},
|
||||
"named": false,
|
||||
"value": "\\["
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_nl"
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "\\\\\\["
|
||||
},
|
||||
"named": false,
|
||||
"value": "\\["
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "\n"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "\r"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
|
|
@ -1773,17 +1880,34 @@
|
|||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"type": "TOKEN",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "\\\\\\("
|
||||
},
|
||||
"named": false,
|
||||
"value": "\\("
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_nl"
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "\\\\\\("
|
||||
},
|
||||
"named": false,
|
||||
"value": "\\("
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "\n"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "\r"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
|
|
@ -2317,7 +2441,7 @@
|
|||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "[^\\p{Z}\\p{L}\\p{N}\\n\\r]"
|
||||
"value": "[^\\p{Z}\\p{L}\\p{N}\\t\\n\\r]"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
@ -2714,7 +2838,7 @@
|
|||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "[^\\p{Z}\\p{L}\\p{N}\\n\\r]"
|
||||
"value": "[^\\p{Z}\\p{L}\\p{N}\\t\\n\\r]"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
@ -2724,6 +2848,400 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"_checkbox_status_expr": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "!"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "\""
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "#"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "$"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "%"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "&"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "'"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "("
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": ")"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "*"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "+"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": ","
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "-"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "."
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "/"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": ":"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": ";"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "<"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "="
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": ">"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "?"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "@"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "["
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "\\"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "^"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "_"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "`"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "{"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "|"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "}"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "~"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "\\p{L}+"
|
||||
}
|
||||
}
|
||||
},
|
||||
"named": false,
|
||||
"value": "str"
|
||||
},
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "\\p{N}+"
|
||||
}
|
||||
}
|
||||
},
|
||||
"named": false,
|
||||
"value": "num"
|
||||
},
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "IMMEDIATE_TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "[^\\p{Z}\\p{L}\\p{N}\\t\\n\\r]"
|
||||
}
|
||||
}
|
||||
},
|
||||
"named": false,
|
||||
"value": "sym"
|
||||
}
|
||||
]
|
||||
},
|
||||
"_ts_expr": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
|
|
@ -3101,7 +3619,7 @@
|
|||
"value": "non-immediate",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "[^\\p{Z}\\p{L}\\p{N}\\n\\r]"
|
||||
"value": "[^\\p{Z}\\p{L}\\p{N}\\t\\n\\r]"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
@ -3486,7 +4004,7 @@
|
|||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "[^\\p{Z}\\p{L}\\p{N}\\n\\r]"
|
||||
"value": "[^\\p{Z}\\p{L}\\p{N}\\t\\n\\r]"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
@ -3897,7 +4415,7 @@
|
|||
"value": "non-immediate",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "[^\\p{Z}\\p{L}\\p{N}\\n\\r]"
|
||||
"value": "[^\\p{Z}\\p{L}\\p{N}\\t\\n\\r]"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
@ -4304,7 +4822,7 @@
|
|||
"value": "immediate",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "[^\\p{Z}\\p{L}\\p{N}\\n\\r]"
|
||||
"value": "[^\\p{Z}\\p{L}\\p{N}\\t\\n\\r]"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
|
|||
|
|
@ -23,6 +23,16 @@
|
|||
}
|
||||
]
|
||||
},
|
||||
"end_name": {
|
||||
"multiple": false,
|
||||
"required": true,
|
||||
"types": [
|
||||
{
|
||||
"type": "expr",
|
||||
"named": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"name": {
|
||||
"multiple": false,
|
||||
"required": true,
|
||||
|
|
@ -119,6 +129,22 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "checkbox",
|
||||
"named": true,
|
||||
"fields": {
|
||||
"status": {
|
||||
"multiple": false,
|
||||
"required": false,
|
||||
"types": [
|
||||
{
|
||||
"type": "expr",
|
||||
"named": true
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "comment",
|
||||
"named": true,
|
||||
|
|
@ -276,6 +302,16 @@
|
|||
}
|
||||
]
|
||||
},
|
||||
"end_name": {
|
||||
"multiple": false,
|
||||
"required": false,
|
||||
"types": [
|
||||
{
|
||||
"type": "expr",
|
||||
"named": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"name": {
|
||||
"multiple": false,
|
||||
"required": true,
|
||||
|
|
@ -523,6 +559,16 @@
|
|||
}
|
||||
]
|
||||
},
|
||||
"checkbox": {
|
||||
"multiple": false,
|
||||
"required": false,
|
||||
"types": [
|
||||
{
|
||||
"type": "checkbox",
|
||||
"named": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"contents": {
|
||||
"multiple": true,
|
||||
"required": false,
|
||||
|
|
@ -1052,6 +1098,10 @@
|
|||
"type": "[",
|
||||
"named": false
|
||||
},
|
||||
{
|
||||
"type": "[ ]",
|
||||
"named": false
|
||||
},
|
||||
{
|
||||
"type": "[%%",
|
||||
"named": false
|
||||
|
|
@ -1064,18 +1114,10 @@
|
|||
"type": "\\",
|
||||
"named": false
|
||||
},
|
||||
{
|
||||
"type": "\\(",
|
||||
"named": false
|
||||
},
|
||||
{
|
||||
"type": "\\)",
|
||||
"named": false
|
||||
},
|
||||
{
|
||||
"type": "\\[",
|
||||
"named": false
|
||||
},
|
||||
{
|
||||
"type": "\\]",
|
||||
"named": false
|
||||
|
|
|
|||
141901
src/parser.c
141901
src/parser.c
File diff suppressed because it is too large
Load diff
342
src/scanner.c
Normal file
342
src/scanner.c
Normal file
|
|
@ -0,0 +1,342 @@
|
|||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <tree_sitter/parser.h>
|
||||
#include <wctype.h>
|
||||
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
#define VEC_RESIZE(vec, _cap) \
|
||||
{ \
|
||||
(vec)->data = realloc((vec)->data, (_cap) * sizeof((vec)->data[0])); \
|
||||
assert((vec)->data != NULL); \
|
||||
(vec)->cap = (_cap); \
|
||||
}
|
||||
|
||||
#define VEC_PUSH(vec, el) \
|
||||
{ \
|
||||
if ((vec)->cap == (vec)->len) { \
|
||||
VEC_RESIZE((vec), MAX(16, (vec)->len * 2)); \
|
||||
} \
|
||||
(vec)->data[(vec)->len++] = (el); \
|
||||
}
|
||||
|
||||
#define VEC_POP(vec) (vec)->len--;
|
||||
|
||||
#define VEC_BACK(vec) ((vec)->data[(vec)->len - 1])
|
||||
|
||||
#define VEC_FREE(vec) \
|
||||
{ \
|
||||
if ((vec)->data != NULL) \
|
||||
free((vec)->data); \
|
||||
}
|
||||
|
||||
#define VEC_CLEAR(vec) \
|
||||
{ (vec)->len = 0; }
|
||||
|
||||
enum TokenType {
|
||||
LISTSTART,
|
||||
LISTEND,
|
||||
LISTITEMEND,
|
||||
BULLET,
|
||||
HLSTARS,
|
||||
SECTIONEND,
|
||||
ENDOFFILE,
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
NOTABULLET,
|
||||
DASH,
|
||||
PLUS,
|
||||
STAR,
|
||||
LOWERDOT,
|
||||
UPPERDOT,
|
||||
LOWERPAREN,
|
||||
UPPERPAREN,
|
||||
NUMDOT,
|
||||
NUMPAREN,
|
||||
} Bullet;
|
||||
|
||||
typedef struct {
|
||||
uint32_t len;
|
||||
uint32_t cap;
|
||||
int16_t *data;
|
||||
} stack;
|
||||
|
||||
typedef struct {
|
||||
stack *indent_length_stack;
|
||||
stack *bullet_stack;
|
||||
stack *section_stack;
|
||||
} Scanner;
|
||||
|
||||
static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
|
||||
|
||||
static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
|
||||
|
||||
unsigned serialize(Scanner *scanner, char *buffer) {
|
||||
size_t i = 0;
|
||||
|
||||
size_t indent_count = scanner->indent_length_stack->len - 1;
|
||||
if (indent_count > UINT8_MAX)
|
||||
indent_count = UINT8_MAX;
|
||||
buffer[i++] = indent_count;
|
||||
|
||||
int iter = 1;
|
||||
for (; iter < scanner->indent_length_stack->len &&
|
||||
i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE;
|
||||
++iter) {
|
||||
buffer[i++] = scanner->indent_length_stack->data[iter];
|
||||
}
|
||||
|
||||
iter = 1;
|
||||
for (; iter < scanner->bullet_stack->len &&
|
||||
i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE;
|
||||
++iter) {
|
||||
buffer[i++] = scanner->bullet_stack->data[iter];
|
||||
}
|
||||
|
||||
iter = 1;
|
||||
for (; iter < scanner->section_stack->len &&
|
||||
i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE;
|
||||
++iter) {
|
||||
buffer[i++] = scanner->section_stack->data[iter];
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
void deserialize(Scanner *scanner, const char *buffer, unsigned length) {
|
||||
VEC_CLEAR(scanner->section_stack);
|
||||
VEC_PUSH(scanner->section_stack, 0);
|
||||
VEC_CLEAR(scanner->indent_length_stack);
|
||||
VEC_PUSH(scanner->indent_length_stack, -1);
|
||||
VEC_CLEAR(scanner->bullet_stack);
|
||||
VEC_PUSH(scanner->bullet_stack, NOTABULLET);
|
||||
|
||||
if (length == 0)
|
||||
return;
|
||||
|
||||
size_t i = 0;
|
||||
|
||||
size_t indent_count = (uint8_t)buffer[i++];
|
||||
|
||||
for (; i <= indent_count; i++)
|
||||
VEC_PUSH(scanner->indent_length_stack, buffer[i]);
|
||||
for (; i <= 2 * indent_count; i++)
|
||||
VEC_PUSH(scanner->bullet_stack, buffer[i]);
|
||||
for (; i < length; i++)
|
||||
VEC_PUSH(scanner->section_stack, buffer[i]);
|
||||
}
|
||||
|
||||
static bool dedent(Scanner *scanner, TSLexer *lexer) {
|
||||
VEC_POP(scanner->indent_length_stack);
|
||||
VEC_POP(scanner->bullet_stack);
|
||||
lexer->result_symbol = LISTEND;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool in_error_recovery(const bool *valid_symbols) {
|
||||
return (valid_symbols[LISTSTART] && valid_symbols[LISTEND] &&
|
||||
valid_symbols[LISTITEMEND] && valid_symbols[BULLET] &&
|
||||
valid_symbols[HLSTARS] && valid_symbols[SECTIONEND] &&
|
||||
valid_symbols[ENDOFFILE]);
|
||||
}
|
||||
|
||||
Bullet getbullet(TSLexer *lexer) {
|
||||
if (lexer->lookahead == '-') {
|
||||
advance(lexer);
|
||||
if (iswspace(lexer->lookahead))
|
||||
return DASH;
|
||||
} else if (lexer->lookahead == '+') {
|
||||
advance(lexer);
|
||||
if (iswspace(lexer->lookahead))
|
||||
return PLUS;
|
||||
} else if (lexer->lookahead == '*') {
|
||||
advance(lexer);
|
||||
if (iswspace(lexer->lookahead))
|
||||
return STAR;
|
||||
} else if ('a' <= lexer->lookahead && lexer->lookahead <= 'z') {
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '.') {
|
||||
advance(lexer);
|
||||
if (iswspace(lexer->lookahead))
|
||||
return LOWERDOT;
|
||||
} else if (lexer->lookahead == ')') {
|
||||
advance(lexer);
|
||||
if (iswspace(lexer->lookahead))
|
||||
return LOWERPAREN;
|
||||
}
|
||||
} else if ('A' <= lexer->lookahead && lexer->lookahead <= 'Z') {
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '.') {
|
||||
advance(lexer);
|
||||
if (iswspace(lexer->lookahead))
|
||||
return UPPERDOT;
|
||||
} else if (lexer->lookahead == ')') {
|
||||
advance(lexer);
|
||||
if (iswspace(lexer->lookahead))
|
||||
return UPPERPAREN;
|
||||
}
|
||||
} else if ('0' <= lexer->lookahead && lexer->lookahead <= '9') {
|
||||
do {
|
||||
advance(lexer);
|
||||
} while ('0' <= lexer->lookahead && lexer->lookahead <= '9');
|
||||
if (lexer->lookahead == '.') {
|
||||
advance(lexer);
|
||||
if (iswspace(lexer->lookahead))
|
||||
return NUMDOT;
|
||||
} else if (lexer->lookahead == ')') {
|
||||
advance(lexer);
|
||||
if (iswspace(lexer->lookahead))
|
||||
return NUMPAREN;
|
||||
}
|
||||
}
|
||||
return NOTABULLET;
|
||||
}
|
||||
|
||||
bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
|
||||
if (in_error_recovery(valid_symbols))
|
||||
return false;
|
||||
|
||||
// - Section ends
|
||||
int16_t indent_length = 0;
|
||||
lexer->mark_end(lexer);
|
||||
for (;;) {
|
||||
if (lexer->lookahead == ' ') {
|
||||
indent_length++;
|
||||
} else if (lexer->lookahead == '\t') {
|
||||
indent_length += 8;
|
||||
} else if (lexer->lookahead == '\0') {
|
||||
if (valid_symbols[LISTEND]) {
|
||||
lexer->result_symbol = LISTEND;
|
||||
} else if (valid_symbols[SECTIONEND]) {
|
||||
lexer->result_symbol = SECTIONEND;
|
||||
} else if (valid_symbols[ENDOFFILE]) {
|
||||
lexer->result_symbol = ENDOFFILE;
|
||||
} else
|
||||
return false;
|
||||
|
||||
return true;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
skip(lexer);
|
||||
}
|
||||
|
||||
// - Listiem ends
|
||||
// Listend -> end of a line, looking for:
|
||||
// 1. dedent
|
||||
// 2. same indent, not a bullet
|
||||
// 3. two eols
|
||||
int16_t newlines = 0;
|
||||
if (valid_symbols[LISTEND] || valid_symbols[LISTITEMEND]) {
|
||||
for (;;) {
|
||||
if (lexer->lookahead == ' ') {
|
||||
indent_length++;
|
||||
} else if (lexer->lookahead == '\t') {
|
||||
indent_length += 8;
|
||||
} else if (lexer->lookahead == '\0') {
|
||||
return dedent(scanner, lexer);
|
||||
} else if (lexer->lookahead == '\n') {
|
||||
if (++newlines > 1)
|
||||
return dedent(scanner, lexer);
|
||||
indent_length = 0;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
skip(lexer);
|
||||
}
|
||||
|
||||
if (indent_length < VEC_BACK(scanner->indent_length_stack)) {
|
||||
return dedent(scanner, lexer);
|
||||
} else if (indent_length == VEC_BACK(scanner->indent_length_stack)) {
|
||||
if (getbullet(lexer) == VEC_BACK(scanner->bullet_stack)) {
|
||||
lexer->result_symbol = LISTITEMEND;
|
||||
return true;
|
||||
}
|
||||
return dedent(scanner, lexer);
|
||||
}
|
||||
}
|
||||
|
||||
// - Col=0 star
|
||||
if (indent_length == 0 && lexer->lookahead == '*') {
|
||||
lexer->mark_end(lexer);
|
||||
int16_t stars = 1;
|
||||
skip(lexer);
|
||||
while (lexer->lookahead == '*') {
|
||||
stars++;
|
||||
skip(lexer);
|
||||
}
|
||||
|
||||
if (valid_symbols[SECTIONEND] && iswspace(lexer->lookahead) &&
|
||||
stars > 0 && stars <= VEC_BACK(scanner->section_stack)) {
|
||||
VEC_POP(scanner->section_stack);
|
||||
lexer->result_symbol = SECTIONEND;
|
||||
return true;
|
||||
} else if (valid_symbols[HLSTARS] && iswspace(lexer->lookahead)) {
|
||||
VEC_PUSH(scanner->section_stack, stars);
|
||||
lexer->result_symbol = HLSTARS;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// - Liststart and bullets
|
||||
if ((valid_symbols[LISTSTART] || valid_symbols[BULLET]) && newlines == 0) {
|
||||
Bullet bullet = getbullet(lexer);
|
||||
|
||||
if (valid_symbols[BULLET] &&
|
||||
bullet == VEC_BACK(scanner->bullet_stack) &&
|
||||
indent_length == VEC_BACK(scanner->indent_length_stack)) {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = BULLET;
|
||||
return true;
|
||||
} else if (valid_symbols[LISTSTART] && bullet != NOTABULLET &&
|
||||
indent_length > VEC_BACK(scanner->indent_length_stack)) {
|
||||
VEC_PUSH(scanner->indent_length_stack, indent_length);
|
||||
VEC_PUSH(scanner->bullet_stack, bullet);
|
||||
lexer->result_symbol = LISTSTART;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false; // default
|
||||
}
|
||||
|
||||
void *tree_sitter_org_external_scanner_create() {
|
||||
Scanner *scanner = (Scanner *)calloc(1, sizeof(Scanner));
|
||||
scanner->indent_length_stack = (stack *)calloc(1, sizeof(stack));
|
||||
scanner->bullet_stack = (stack *)calloc(1, sizeof(stack));
|
||||
scanner->section_stack = (stack *)calloc(1, sizeof(stack));
|
||||
deserialize(scanner, NULL, 0);
|
||||
return scanner;
|
||||
}
|
||||
|
||||
bool tree_sitter_org_external_scanner_scan(void *payload, TSLexer *lexer,
|
||||
const bool *valid_symbols) {
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
return scan(scanner, lexer, valid_symbols);
|
||||
}
|
||||
|
||||
unsigned tree_sitter_org_external_scanner_serialize(void *payload,
|
||||
char *buffer) {
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
return serialize(scanner, buffer);
|
||||
}
|
||||
|
||||
void tree_sitter_org_external_scanner_deserialize(void *payload,
|
||||
const char *buffer,
|
||||
unsigned length) {
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
deserialize(scanner, buffer, length);
|
||||
}
|
||||
|
||||
void tree_sitter_org_external_scanner_destroy(void *payload) {
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
VEC_FREE(scanner->indent_length_stack);
|
||||
VEC_FREE(scanner->bullet_stack);
|
||||
VEC_FREE(scanner->section_stack);
|
||||
free(scanner->indent_length_stack);
|
||||
free(scanner->bullet_stack);
|
||||
free(scanner->section_stack);
|
||||
free(scanner);
|
||||
}
|
||||
279
src/scanner.cc
279
src/scanner.cc
|
|
@ -1,279 +0,0 @@
|
|||
#include <tree_sitter/parser.h>
|
||||
#include <vector>
|
||||
#include <cwctype>
|
||||
|
||||
namespace {
|
||||
|
||||
using std::vector;
|
||||
using std::iswspace;
|
||||
|
||||
enum TokenType {
|
||||
LISTSTART,
|
||||
LISTEND,
|
||||
LISTITEMEND,
|
||||
BULLET,
|
||||
HLSTARS,
|
||||
SECTIONEND,
|
||||
ENDOFFILE,
|
||||
};
|
||||
|
||||
enum Bullet {
|
||||
NOTABULLET,
|
||||
DASH,
|
||||
PLUS,
|
||||
STAR,
|
||||
LOWERDOT,
|
||||
UPPERDOT,
|
||||
LOWERPAREN,
|
||||
UPPERPAREN,
|
||||
NUMDOT,
|
||||
NUMPAREN,
|
||||
};
|
||||
|
||||
struct Scanner {
|
||||
vector<int16_t> indent_length_stack;
|
||||
vector<int16_t> bullet_stack;
|
||||
vector<int16_t> section_stack;
|
||||
|
||||
Scanner() {
|
||||
deserialize(NULL, 0);
|
||||
}
|
||||
|
||||
unsigned serialize(char *buffer) {
|
||||
size_t i = 0;
|
||||
|
||||
size_t indent_count = indent_length_stack.size() - 1;
|
||||
if (indent_count > UINT8_MAX) indent_count = UINT8_MAX;
|
||||
buffer[i++] = indent_count;
|
||||
|
||||
vector<int16_t>::iterator
|
||||
iter = indent_length_stack.begin() + 1,
|
||||
end = indent_length_stack.end();
|
||||
|
||||
for (; iter != end && i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; ++iter) {
|
||||
buffer[i++] = *iter;
|
||||
}
|
||||
|
||||
iter = bullet_stack.begin() + 1;
|
||||
end = bullet_stack.end();
|
||||
for (; iter != end && i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; ++iter) {
|
||||
buffer[i++] = *iter;
|
||||
}
|
||||
|
||||
iter = section_stack.begin() + 1;
|
||||
end = section_stack.end();
|
||||
|
||||
for (; iter != end && i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; ++iter) {
|
||||
buffer[i++] = *iter;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
void deserialize(const char *buffer, unsigned length) {
|
||||
section_stack.clear();
|
||||
section_stack.push_back(0);
|
||||
indent_length_stack.clear();
|
||||
indent_length_stack.push_back(-1);
|
||||
bullet_stack.clear();
|
||||
bullet_stack.push_back(NOTABULLET);
|
||||
|
||||
if (length == 0) return;
|
||||
|
||||
size_t i = 0;
|
||||
|
||||
size_t indent_count = (uint8_t)buffer[i++];
|
||||
|
||||
for (; i <= indent_count ; i++) indent_length_stack.push_back(buffer[i]);
|
||||
for (; i <= 2 * indent_count; i++) bullet_stack.push_back(buffer[i]);
|
||||
for (; i < length ; i++) section_stack.push_back(buffer[i]);
|
||||
|
||||
}
|
||||
|
||||
void advance(TSLexer *lexer) {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
void skip(TSLexer *lexer) {
|
||||
lexer->advance(lexer, true);
|
||||
}
|
||||
|
||||
bool dedent(TSLexer *lexer) {
|
||||
indent_length_stack.pop_back();
|
||||
bullet_stack.pop_back();
|
||||
lexer->result_symbol = LISTEND;
|
||||
return true;
|
||||
}
|
||||
|
||||
Bullet getbullet(TSLexer *lexer) {
|
||||
if (lexer->lookahead == '-') {
|
||||
advance(lexer);
|
||||
if (iswspace(lexer->lookahead)) return DASH;
|
||||
} else if (lexer->lookahead == '+') {
|
||||
advance(lexer);
|
||||
if (iswspace(lexer->lookahead)) return PLUS;
|
||||
} else if (lexer->lookahead == '*') {
|
||||
advance(lexer);
|
||||
if (iswspace(lexer->lookahead)) return STAR;
|
||||
} else if ('a' <= lexer->lookahead && lexer->lookahead <= 'z') {
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '.') {
|
||||
advance(lexer);
|
||||
if (iswspace(lexer->lookahead)) return LOWERDOT;
|
||||
} else if (lexer->lookahead == ')') {
|
||||
advance(lexer);
|
||||
if (iswspace(lexer->lookahead)) return LOWERPAREN;
|
||||
}
|
||||
} else if ('A' <= lexer->lookahead && lexer->lookahead <= 'Z') {
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '.') {
|
||||
advance(lexer);
|
||||
if (iswspace(lexer->lookahead)) return UPPERDOT;
|
||||
} else if (lexer->lookahead == ')') {
|
||||
advance(lexer);
|
||||
if (iswspace(lexer->lookahead)) return UPPERPAREN;
|
||||
}
|
||||
} else if ('0' <= lexer->lookahead && lexer->lookahead <= '9') {
|
||||
do {
|
||||
advance(lexer);
|
||||
} while ('0' <= lexer->lookahead && lexer->lookahead <= '9');
|
||||
if (lexer->lookahead == '.') {
|
||||
advance(lexer);
|
||||
if (iswspace(lexer->lookahead)) return NUMDOT;
|
||||
} else if (lexer->lookahead == ')') {
|
||||
advance(lexer);
|
||||
if (iswspace(lexer->lookahead)) return NUMPAREN;
|
||||
}
|
||||
}
|
||||
return NOTABULLET;
|
||||
}
|
||||
|
||||
bool scan(TSLexer *lexer, const bool *valid_symbols) {
|
||||
|
||||
// - Section ends
|
||||
int16_t indent_length = 0;
|
||||
lexer->mark_end(lexer);
|
||||
for (;;) {
|
||||
if (lexer->lookahead == ' ') {
|
||||
indent_length++;
|
||||
} else if (lexer->lookahead == '\t') {
|
||||
indent_length += 8;
|
||||
} else if (lexer->lookahead == '\0') {
|
||||
|
||||
if (valid_symbols[LISTEND]) { lexer->result_symbol = LISTEND; }
|
||||
else if (valid_symbols[SECTIONEND]) { lexer->result_symbol = SECTIONEND; }
|
||||
else if (valid_symbols[ENDOFFILE]) { lexer->result_symbol = ENDOFFILE; }
|
||||
else return false;
|
||||
|
||||
return true;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
skip(lexer);
|
||||
}
|
||||
|
||||
// - Listiem ends
|
||||
// Listend -> end of a line, looking for:
|
||||
// 1. dedent
|
||||
// 2. same indent, not a bullet
|
||||
// 3. two eols
|
||||
int16_t newlines = 0;
|
||||
if (valid_symbols[LISTEND] || valid_symbols[LISTITEMEND]) {
|
||||
for (;;) {
|
||||
if (lexer->lookahead == ' ') {
|
||||
indent_length++;
|
||||
} else if (lexer->lookahead == '\t') {
|
||||
indent_length += 8;
|
||||
} else if (lexer->lookahead == '\0') {
|
||||
return dedent(lexer);
|
||||
} else if (lexer->lookahead == '\n') {
|
||||
if (++newlines > 1) return dedent(lexer);
|
||||
indent_length = 0;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
skip(lexer);
|
||||
}
|
||||
|
||||
if (indent_length < indent_length_stack.back()) {
|
||||
return dedent(lexer);
|
||||
} else if (indent_length == indent_length_stack.back()) {
|
||||
if (getbullet(lexer) == bullet_stack.back()) {
|
||||
lexer->result_symbol = LISTITEMEND;
|
||||
return true;
|
||||
}
|
||||
return dedent(lexer);
|
||||
}
|
||||
}
|
||||
|
||||
// - Col=0 star
|
||||
if (indent_length == 0 && lexer->lookahead == '*') {
|
||||
lexer->mark_end(lexer);
|
||||
int16_t stars = 1;
|
||||
skip(lexer);
|
||||
while (lexer->lookahead == '*') {
|
||||
stars++;
|
||||
skip(lexer);
|
||||
}
|
||||
|
||||
if (valid_symbols[SECTIONEND] && iswspace(lexer->lookahead) && stars > 0 && stars <= section_stack.back()) {
|
||||
section_stack.pop_back();
|
||||
lexer->result_symbol = SECTIONEND;
|
||||
return true;
|
||||
} else if (valid_symbols[HLSTARS] && iswspace(lexer->lookahead)) {
|
||||
section_stack.push_back(stars);
|
||||
lexer->result_symbol = HLSTARS;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// - Liststart and bullets
|
||||
if ((valid_symbols[LISTSTART] || valid_symbols[BULLET]) && newlines == 0) {
|
||||
Bullet bullet = getbullet(lexer);
|
||||
|
||||
if (valid_symbols[BULLET] && bullet == bullet_stack.back() && indent_length == indent_length_stack.back()) {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = BULLET;
|
||||
return true;
|
||||
} else if (valid_symbols[LISTSTART] && bullet != NOTABULLET && indent_length > indent_length_stack.back()) {
|
||||
indent_length_stack.push_back(indent_length);
|
||||
bullet_stack.push_back(bullet);
|
||||
lexer->result_symbol = LISTSTART;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false; // default
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
|
||||
void *tree_sitter_org_external_scanner_create() {
|
||||
return new Scanner();
|
||||
}
|
||||
|
||||
bool tree_sitter_org_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
|
||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
||||
return scanner->scan(lexer, valid_symbols);
|
||||
}
|
||||
|
||||
unsigned tree_sitter_org_external_scanner_serialize(void *payload, char *buffer) {
|
||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
||||
return scanner->serialize(buffer);
|
||||
}
|
||||
|
||||
void tree_sitter_org_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
|
||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
||||
scanner->deserialize(buffer, length);
|
||||
}
|
||||
|
||||
void tree_sitter_org_external_scanner_destroy(void *payload) {
|
||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
||||
delete scanner;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -123,6 +123,7 @@ struct TSLanguage {
|
|||
unsigned (*serialize)(void *, char *);
|
||||
void (*deserialize)(void *, const char *, unsigned);
|
||||
} external_scanner;
|
||||
const TSStateId *primary_state_ids;
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue