Add a tree-sitter based parser

This commit is contained in:
Renaud Casenave-Péré 2025-08-26 21:16:51 +02:00
parent be4902eccc
commit 0d8207a8fe
3 changed files with 189 additions and 0 deletions

115
src/ts-parser/ts-parser.cc Normal file
View file

@ -0,0 +1,115 @@
#include "ts-parser.hh"
#include "tree_sitter/api.h"
#include <codecvt>
#include <cstring>
#include <locale>
#include <string>
extern "C" {
const TSLanguage *tree_sitter_org(void);
}
namespace sextant
{
namespace parser
{
static TSParser *parser;
uint32_t decode_ecl_character(const uint8_t *str, uint32_t length,
int32_t *code_point) {
assert(length >= sizeof(ecl_character));
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t>
converter;
char32_t c32 = *reinterpret_cast<const char32_t*>(str);
std::string str8 = converter.to_bytes(c32);
*code_point = 0;
std::memcpy(code_point, str8.c_str(), str8.size());
return sizeof(ecl_character);
}
const char *read_from_gap_buffer(void *payload, uint32_t byte_offset,
TSPoint position, uint32_t *bytes_read) {
cl_object l_gap_buffer = reinterpret_cast<cl_object>(payload);
cl_object l_str = ECL_CONS_CAR(l_gap_buffer);
cl_object l_gap_start = ECL_CONS_CAR(ECL_CONS_CDR(l_gap_buffer));
assert(ECL_FIXNUMP(l_gap_start));
if (ECL_BASE_STRING_P(l_str)) {
uint32_t gap_start = ecl_fixnum(l_gap_start);
if (byte_offset >= ecl_fixnum(l_gap_start)) {
cl_object l_gap_end =
ECL_CONS_CAR(ECL_CONS_CDR(ECL_CONS_CDR(l_gap_buffer)));
assert(ECL_FIXNUMP(l_gap_end));
uint32_t gap_end = ecl_fixnum(l_gap_end);
byte_offset += gap_end - gap_start;
}
if(byte_offset < l_str->base_string.dim) {
*bytes_read = sizeof(ecl_base_char);
return reinterpret_cast<char *>(l_str->base_string.self +
byte_offset);
}
} else {
uint32_t gap_start =
ecl_fixnum(l_gap_start) * sizeof(ecl_character);
if (byte_offset >= ecl_fixnum(l_gap_start)) {
cl_object l_gap_end =
ECL_CONS_CAR(ECL_CONS_CDR(ECL_CONS_CDR(l_gap_buffer)));
assert(ECL_FIXNUMP(l_gap_end));
uint32_t gap_end =
ecl_fixnum(l_gap_end) * sizeof(ecl_character);
byte_offset += gap_end - gap_start;
}
if(byte_offset < l_str->string.dim * sizeof(ecl_character)) {
*bytes_read = sizeof(ecl_character);
return reinterpret_cast<char *>(l_str->string.self + byte_offset / (sizeof(ecl_character)));
}
}
*bytes_read = 0;
return nullptr;
}
cl_object parse_document(cl_object l_gap_buffer) {
TSInput input = [](cl_object l_gap_buffer) -> TSInput {
assert(ECL_LISTP(l_gap_buffer));
cl_object l_str = ECL_CONS_CAR(l_gap_buffer);
if (ECL_BASE_STRING_P(l_str))
return {
l_gap_buffer, &read_from_gap_buffer, TSInputEncodingUTF8,
nullptr
};
else
return {
l_gap_buffer, &read_from_gap_buffer, TSInputEncodingCustom,
&decode_ecl_character
};
} (l_gap_buffer);
TSTree *tree = ts_parser_parse(parser, nullptr, input);
char* sexp = ts_node_string(ts_tree_root_node(tree));
return ecl_make_pointer(tree);
}
void init_parser_lib()
{
cl_object ts = ecl_make_constant_base_string("TS", 6);
if (cl_find_package(ts) == ECL_NIL)
ecl_make_package(ts, ECL_NIL, ECL_NIL, ECL_NIL);
si_select_package(ts);
ecl_def_c_function(ecl_read_from_cstring("parse-document"),
(cl_objectfn_fixed)parse_document, 1);
parser = ts_parser_new();
ts_parser_set_language(parser, tree_sitter_org());
}
} // namespace parser
} // namespace sextant

View file

@ -0,0 +1,15 @@
#ifndef TS_PARSER_H
#define TS_PARSER_H
#include <ecl/ecl.h>
namespace sextant
{
namespace parser
{
cl_object parse_document(cl_object l_gap_buffer);
void init_parer_lib();
}
}
#endif // TS_PARSER_H

59
ts-parser.pro Normal file
View file

@ -0,0 +1,59 @@
TEMPLATE = lib
CONFIG += debug
TARGET = sextant-parser
DESTDIR = $$PWD
OBJECTS_DIR = $$PWD/tmp/parser/
INCLUDEPATH += $$PWD/external/tree-sitter/lib/src $$PWD/external/tree-sitter/lib/include
INCLUDEPATH += $$PWD/external/tree-sitter-org/src
LIBS += -lecl
QMAKE_CXXFLAGS += -std=c++2a -Wno-parentheses -Wno-unused-local-typedefs -Wno-array-bounds -Wno-maybe-uninitialized -Wno-restrict
target.path = $$[QT_INSTALL_LIBS]
INSTALLS = target
SOURCES += \
src/parser/ts-parser.cc \
external/tree-sitter/lib/src/lib.c \
external/tree-sitter/lib/src/alloc.c \
external/tree-sitter/lib/src/get_changed_ranges.c \
external/tree-sitter/lib/src/language.c \
external/tree-sitter/lib/src/lexer.c \
external/tree-sitter/lib/src/node.c \
external/tree-sitter/lib/src/parser.c \
external/tree-sitter/lib/src/query.c \
external/tree-sitter/lib/src/stack.c \
external/tree-sitter/lib/src/subtree.c \
external/tree-sitter/lib/src/tree.c \
external/tree-sitter/lib/src/tree_cursor.c \
external/tree-sitter/lib/src/wasm_store.c \
external/tree-sitter-org/src/parser.c \
external/tree-sitter-org/src/scanner.c
HEADERS+= \
src/parser/ts-parser.hh \
external/tree-sitter/lib/src/alloc.h \
external/tree-sitter/lib/src/array.h \
external/tree-sitter/lib/src/atomic.h \
external/tree-sitter/lib/src/clock.h \
external/tree-sitter/lib/src/error_costs.h \
external/tree-sitter/lib/src/get_changed_ranges.h \
external/tree-sitter/lib/src/host.h \
external/tree-sitter/lib/src/language.h \
external/tree-sitter/lib/src/length.h \
external/tree-sitter/lib/src/lexer.h \
external/tree-sitter/lib/src/parser.h \
external/tree-sitter/lib/src/point.h \
external/tree-sitter/lib/src/reduce_action.h \
external/tree-sitter/lib/src/reusable_node.h \
external/tree-sitter/lib/src/stack.h \
external/tree-sitter/lib/src/subtree.h \
external/tree-sitter/lib/src/tree_cursor.h \
external/tree-sitter/lib/src/tree.h \
external/tree-sitter/lib/src/ts_assert.h \
external/tree-sitter/lib/src/unicode.h \
external/tree-sitter/lib/src/wasm_store.h \
external/tree-sitter/lib/include/tree_sitter/api.h \
external/tree-sitter-org/src/tree_sitter/alloc.h \
external/tree-sitter-org/src/tree_sitter/array.h \
external/tree-sitter-org/src/tree_sitter/parser.h