Add a tree-sitter based parser
This commit is contained in:
parent
be4902eccc
commit
2b2cf4effd
3 changed files with 189 additions and 0 deletions
115
src/ts-parser/ts-parser.cc
Normal file
115
src/ts-parser/ts-parser.cc
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
#include "ts-parser.hh"
|
||||
|
||||
#include "tree_sitter/api.h"
|
||||
#include <codecvt>
|
||||
#include <cstring>
|
||||
#include <locale>
|
||||
#include <string>
|
||||
|
||||
extern "C" {
|
||||
const TSLanguage *tree_sitter_org(void);
|
||||
}
|
||||
|
||||
namespace sextant
|
||||
{
|
||||
namespace parser
|
||||
{
|
||||
static TSParser *parser;
|
||||
|
||||
uint32_t decode_ecl_character(const uint8_t *str, uint32_t length,
|
||||
int32_t *code_point) {
|
||||
assert(length >= sizeof(ecl_character));
|
||||
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t>
|
||||
converter;
|
||||
|
||||
char32_t c32 = *reinterpret_cast<const char32_t*>(str);
|
||||
std::string str8 = converter.to_bytes(c32);
|
||||
*code_point = 0;
|
||||
std::memcpy(code_point, str8.c_str(), str8.size());
|
||||
return sizeof(ecl_character);
|
||||
}
|
||||
|
||||
const char *read_from_gap_buffer(void *payload, uint32_t byte_offset,
|
||||
TSPoint position, uint32_t *bytes_read) {
|
||||
cl_object l_gap_buffer = reinterpret_cast<cl_object>(payload);
|
||||
cl_object l_str = ECL_CONS_CAR(l_gap_buffer);
|
||||
cl_object l_gap_start = ECL_CONS_CAR(ECL_CONS_CDR(l_gap_buffer));
|
||||
assert(ECL_FIXNUMP(l_gap_start));
|
||||
|
||||
if (ECL_BASE_STRING_P(l_str)) {
|
||||
uint32_t gap_start = ecl_fixnum(l_gap_start);
|
||||
|
||||
if (byte_offset >= ecl_fixnum(l_gap_start)) {
|
||||
cl_object l_gap_end =
|
||||
ECL_CONS_CAR(ECL_CONS_CDR(ECL_CONS_CDR(l_gap_buffer)));
|
||||
assert(ECL_FIXNUMP(l_gap_end));
|
||||
uint32_t gap_end = ecl_fixnum(l_gap_end);
|
||||
|
||||
byte_offset += gap_end - gap_start;
|
||||
}
|
||||
|
||||
if(byte_offset < l_str->base_string.dim) {
|
||||
*bytes_read = sizeof(ecl_base_char);
|
||||
return reinterpret_cast<char *>(l_str->base_string.self +
|
||||
byte_offset);
|
||||
}
|
||||
} else {
|
||||
uint32_t gap_start =
|
||||
ecl_fixnum(l_gap_start) * sizeof(ecl_character);
|
||||
|
||||
if (byte_offset >= ecl_fixnum(l_gap_start)) {
|
||||
cl_object l_gap_end =
|
||||
ECL_CONS_CAR(ECL_CONS_CDR(ECL_CONS_CDR(l_gap_buffer)));
|
||||
assert(ECL_FIXNUMP(l_gap_end));
|
||||
uint32_t gap_end =
|
||||
ecl_fixnum(l_gap_end) * sizeof(ecl_character);
|
||||
|
||||
byte_offset += gap_end - gap_start;
|
||||
}
|
||||
|
||||
if(byte_offset < l_str->string.dim * sizeof(ecl_character)) {
|
||||
*bytes_read = sizeof(ecl_character);
|
||||
return reinterpret_cast<char *>(l_str->string.self + byte_offset / (sizeof(ecl_character)));
|
||||
}
|
||||
}
|
||||
|
||||
*bytes_read = 0;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
cl_object parse_document(cl_object l_gap_buffer) {
|
||||
TSInput input = [](cl_object l_gap_buffer) -> TSInput {
|
||||
assert(ECL_LISTP(l_gap_buffer));
|
||||
cl_object l_str = ECL_CONS_CAR(l_gap_buffer);
|
||||
if (ECL_BASE_STRING_P(l_str))
|
||||
return {
|
||||
l_gap_buffer, &read_from_gap_buffer, TSInputEncodingUTF8,
|
||||
nullptr
|
||||
};
|
||||
else
|
||||
return {
|
||||
l_gap_buffer, &read_from_gap_buffer, TSInputEncodingCustom,
|
||||
&decode_ecl_character
|
||||
};
|
||||
} (l_gap_buffer);
|
||||
|
||||
TSTree *tree = ts_parser_parse(parser, nullptr, input);
|
||||
char* sexp = ts_node_string(ts_tree_root_node(tree));
|
||||
|
||||
return ecl_make_pointer(tree);
|
||||
}
|
||||
|
||||
void init_parser_lib()
|
||||
{
|
||||
cl_object ts = ecl_make_constant_base_string("TS", 6);
|
||||
if (cl_find_package(ts) == ECL_NIL)
|
||||
ecl_make_package(ts, ECL_NIL, ECL_NIL, ECL_NIL);
|
||||
si_select_package(ts);
|
||||
ecl_def_c_function(ecl_read_from_cstring("parse-document"),
|
||||
(cl_objectfn_fixed)parse_document, 1);
|
||||
|
||||
parser = ts_parser_new();
|
||||
ts_parser_set_language(parser, tree_sitter_org());
|
||||
}
|
||||
} // namespace parser
|
||||
} // namespace sextant
|
||||
15
src/ts-parser/ts-parser.hh
Normal file
15
src/ts-parser/ts-parser.hh
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
#ifndef TS_PARSER_H
|
||||
#define TS_PARSER_H
|
||||
|
||||
#include <ecl/ecl.h>
|
||||
|
||||
namespace sextant
|
||||
{
|
||||
namespace parser
|
||||
{
|
||||
cl_object parse_document(cl_object l_gap_buffer);
|
||||
void init_parer_lib();
|
||||
}
|
||||
}
|
||||
|
||||
#endif // TS_PARSER_H
|
||||
59
ts-parser.pro
Normal file
59
ts-parser.pro
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
TEMPLATE = lib
|
||||
CONFIG += staticlib debug
|
||||
TARGET = sextant-parser
|
||||
DESTDIR = $$PWD
|
||||
OBJECTS_DIR = $$PWD/tmp/parser/
|
||||
INCLUDEPATH += $$PWD/external/tree-sitter/lib/src $$PWD/external/tree-sitter/lib/include
|
||||
INCLUDEPATH += $$PWD/external/tree-sitter-org/src
|
||||
LIBS += -lecl
|
||||
QMAKE_CXXFLAGS += -std=c++2a -Wno-parentheses -Wno-unused-local-typedefs -Wno-array-bounds -Wno-maybe-uninitialized -Wno-restrict
|
||||
|
||||
target.path = $$[QT_INSTALL_LIBS]
|
||||
|
||||
INSTALLS = target
|
||||
|
||||
SOURCES += \
|
||||
src/parser/ts-parser.cc \
|
||||
external/tree-sitter/lib/src/lib.c \
|
||||
external/tree-sitter/lib/src/alloc.c \
|
||||
external/tree-sitter/lib/src/get_changed_ranges.c \
|
||||
external/tree-sitter/lib/src/language.c \
|
||||
external/tree-sitter/lib/src/lexer.c \
|
||||
external/tree-sitter/lib/src/node.c \
|
||||
external/tree-sitter/lib/src/parser.c \
|
||||
external/tree-sitter/lib/src/query.c \
|
||||
external/tree-sitter/lib/src/stack.c \
|
||||
external/tree-sitter/lib/src/subtree.c \
|
||||
external/tree-sitter/lib/src/tree.c \
|
||||
external/tree-sitter/lib/src/tree_cursor.c \
|
||||
external/tree-sitter/lib/src/wasm_store.c \
|
||||
external/tree-sitter-org/src/parser.c \
|
||||
external/tree-sitter-org/src/scanner.c
|
||||
|
||||
HEADERS+= \
|
||||
src/parser/ts-parser.hh \
|
||||
external/tree-sitter/lib/src/alloc.h \
|
||||
external/tree-sitter/lib/src/array.h \
|
||||
external/tree-sitter/lib/src/atomic.h \
|
||||
external/tree-sitter/lib/src/clock.h \
|
||||
external/tree-sitter/lib/src/error_costs.h \
|
||||
external/tree-sitter/lib/src/get_changed_ranges.h \
|
||||
external/tree-sitter/lib/src/host.h \
|
||||
external/tree-sitter/lib/src/language.h \
|
||||
external/tree-sitter/lib/src/length.h \
|
||||
external/tree-sitter/lib/src/lexer.h \
|
||||
external/tree-sitter/lib/src/parser.h \
|
||||
external/tree-sitter/lib/src/point.h \
|
||||
external/tree-sitter/lib/src/reduce_action.h \
|
||||
external/tree-sitter/lib/src/reusable_node.h \
|
||||
external/tree-sitter/lib/src/stack.h \
|
||||
external/tree-sitter/lib/src/subtree.h \
|
||||
external/tree-sitter/lib/src/tree_cursor.h \
|
||||
external/tree-sitter/lib/src/tree.h \
|
||||
external/tree-sitter/lib/src/ts_assert.h \
|
||||
external/tree-sitter/lib/src/unicode.h \
|
||||
external/tree-sitter/lib/src/wasm_store.h \
|
||||
external/tree-sitter/lib/include/tree_sitter/api.h \
|
||||
external/tree-sitter-org/src/tree_sitter/alloc.h \
|
||||
external/tree-sitter-org/src/tree_sitter/array.h \
|
||||
external/tree-sitter-org/src/tree_sitter/parser.h
|
||||
Loading…
Add table
Reference in a new issue