609 lines
14 KiB
C++
609 lines
14 KiB
C++
/*
|
|
* Copyright (c) 2003-2012, John Wiegley. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are
|
|
* met:
|
|
*
|
|
* - Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
*
|
|
* - Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* - Neither the name of New Artisans LLC nor the names of its
|
|
* contributors may be used to endorse or promote products derived from
|
|
* this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <system.hh>
|
|
|
|
#include "token.h"
|
|
#include "parser.h"
|
|
|
|
namespace ledger {
|
|
|
|
int expr_t::token_t::parse_reserved_word(std::istream& in)
|
|
{
|
|
char c = static_cast<char>(in.peek());
|
|
|
|
if (c == 'a' || c == 'd' || c == 'e' || c == 'f' ||
|
|
c == 'i' || c == 'o' || c == 'n' || c == 't') {
|
|
length = 0;
|
|
|
|
char buf[6];
|
|
READ_INTO_(in, buf, 5, c, length, std::isalpha(c));
|
|
|
|
switch (buf[0]) {
|
|
case 'a':
|
|
if (std::strcmp(buf, "and") == 0) {
|
|
symbol[0] = '&';
|
|
symbol[1] = '\0';
|
|
kind = KW_AND;
|
|
return 1;
|
|
}
|
|
break;
|
|
|
|
case 'd':
|
|
if (std::strcmp(buf, "div") == 0) {
|
|
symbol[0] = '/';
|
|
symbol[1] = '\0';
|
|
kind = KW_DIV;
|
|
return 1;
|
|
}
|
|
break;
|
|
|
|
case 'e':
|
|
if (std::strcmp(buf, "else") == 0) {
|
|
std::strcpy(symbol, "else");
|
|
kind = KW_ELSE;
|
|
return 1;
|
|
}
|
|
break;
|
|
|
|
case 'f':
|
|
if (std::strcmp(buf, "false") == 0) {
|
|
std::strcpy(symbol, "false");
|
|
kind = VALUE;
|
|
value = false;
|
|
return 1;
|
|
}
|
|
break;
|
|
|
|
case 'i':
|
|
if (std::strcmp(buf, "if") == 0) {
|
|
symbol[0] = 'i';
|
|
symbol[1] = 'f';
|
|
symbol[2] = '\0';
|
|
kind = KW_IF;
|
|
return 1;
|
|
}
|
|
break;
|
|
|
|
case 'o':
|
|
if (std::strcmp(buf, "or") == 0) {
|
|
symbol[0] = '|';
|
|
symbol[1] = '\0';
|
|
kind = KW_OR;
|
|
return 1;
|
|
}
|
|
break;
|
|
|
|
case 'n':
|
|
if (std::strcmp(buf, "not") == 0) {
|
|
symbol[0] = '!';
|
|
symbol[1] = '\0';
|
|
kind = EXCLAM;
|
|
return 1;
|
|
}
|
|
break;
|
|
|
|
case 't':
|
|
if (std::strcmp(buf, "true") == 0) {
|
|
std::strcpy(symbol, "true");
|
|
kind = VALUE;
|
|
value = true;
|
|
return 1;
|
|
}
|
|
break;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
void expr_t::token_t::parse_ident(std::istream& in)
|
|
{
|
|
kind = IDENT;
|
|
length = 0;
|
|
|
|
char c, buf[256];
|
|
READ_INTO_(in, buf, 255, c, length, std::isalnum(c) || c == '_');
|
|
|
|
value.set_string(buf);
|
|
}
|
|
|
|
void expr_t::token_t::next(std::istream& in, const parse_flags_t& pflags)
|
|
{
|
|
if (in.eof()) {
|
|
kind = TOK_EOF;
|
|
return;
|
|
}
|
|
if (! in.good())
|
|
throw_(parse_error, _("Input stream no longer valid"));
|
|
|
|
char c = peek_next_nonws(in);
|
|
|
|
if (in.eof()) {
|
|
kind = TOK_EOF;
|
|
return;
|
|
}
|
|
if (! in.good())
|
|
throw_(parse_error, _("Input stream no longer valid"));
|
|
|
|
symbol[0] = c;
|
|
symbol[1] = '\0';
|
|
|
|
length = 1;
|
|
|
|
switch (c) {
|
|
case '&':
|
|
in.get(c);
|
|
c = static_cast<char>(in.peek());
|
|
if (c == '&') {
|
|
in.get(c);
|
|
kind = KW_AND;
|
|
length = 2;
|
|
break;
|
|
}
|
|
kind = KW_AND;
|
|
break;
|
|
case '|':
|
|
in.get(c);
|
|
c = static_cast<char>(in.peek());
|
|
if (c == '|') {
|
|
in.get(c);
|
|
kind = KW_OR;
|
|
length = 2;
|
|
break;
|
|
}
|
|
kind = KW_OR;
|
|
break;
|
|
|
|
case '(':
|
|
in.get(c);
|
|
kind = LPAREN;
|
|
break;
|
|
case ')':
|
|
in.get(c);
|
|
kind = RPAREN;
|
|
break;
|
|
|
|
case '[': {
|
|
in.get(c);
|
|
|
|
char buf[256];
|
|
READ_INTO_(in, buf, 255, c, length, c != ']');
|
|
if (c != ']')
|
|
expected(']', c);
|
|
|
|
in.get(c);
|
|
length++;
|
|
|
|
date_interval_t timespan(buf);
|
|
optional<date_t> begin = timespan.begin();
|
|
if (! begin)
|
|
throw_(parse_error,
|
|
_("Date specifier does not refer to a starting date"));
|
|
kind = VALUE;
|
|
value = *begin;
|
|
break;
|
|
}
|
|
|
|
case '\'':
|
|
case '"': {
|
|
char delim;
|
|
in.get(delim);
|
|
char buf[4096];
|
|
READ_INTO_(in, buf, 4095, c, length, c != delim);
|
|
if (c != delim)
|
|
expected(delim, c);
|
|
in.get(c);
|
|
length++;
|
|
kind = VALUE;
|
|
value.set_string(buf);
|
|
break;
|
|
}
|
|
|
|
case '{': {
|
|
in.get(c);
|
|
amount_t temp;
|
|
temp.parse(in, PARSE_NO_MIGRATE);
|
|
in.get(c);
|
|
if (c != '}')
|
|
expected('}', c);
|
|
length++;
|
|
kind = VALUE;
|
|
value = temp;
|
|
break;
|
|
}
|
|
|
|
case '!':
|
|
in.get(c);
|
|
c = static_cast<char>(in.peek());
|
|
if (c == '=') {
|
|
in.get(c);
|
|
symbol[1] = c;
|
|
symbol[2] = '\0';
|
|
kind = NEQUAL;
|
|
length = 2;
|
|
break;
|
|
}
|
|
else if (c == '~') {
|
|
in.get(c);
|
|
symbol[1] = c;
|
|
symbol[2] = '\0';
|
|
kind = NMATCH;
|
|
length = 2;
|
|
break;
|
|
}
|
|
kind = EXCLAM;
|
|
break;
|
|
|
|
case '-':
|
|
in.get(c);
|
|
c = static_cast<char>(in.peek());
|
|
if (c == '>') {
|
|
in.get(c);
|
|
symbol[1] = c;
|
|
symbol[2] = '\0';
|
|
kind = ARROW;
|
|
length = 2;
|
|
break;
|
|
}
|
|
kind = MINUS;
|
|
break;
|
|
case '+':
|
|
in.get(c);
|
|
kind = PLUS;
|
|
break;
|
|
|
|
case '*':
|
|
in.get(c);
|
|
kind = STAR;
|
|
break;
|
|
|
|
case '?':
|
|
in.get(c);
|
|
kind = QUERY;
|
|
break;
|
|
case ':':
|
|
in.get(c);
|
|
c = static_cast<char>(in.peek());
|
|
kind = COLON;
|
|
break;
|
|
|
|
case '/': {
|
|
in.get(c);
|
|
if (pflags.has_flags(PARSE_OP_CONTEXT)) { // operator context
|
|
kind = SLASH;
|
|
} else { // terminal context
|
|
// Read in the regexp
|
|
char buf[256];
|
|
READ_INTO_(in, buf, 255, c, length, c != '/');
|
|
if (c != '/')
|
|
expected('/', c);
|
|
in.get(c);
|
|
length++;
|
|
|
|
kind = VALUE;
|
|
value.set_mask(buf);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case '=':
|
|
in.get(c);
|
|
c = static_cast<char>(in.peek());
|
|
if (c == '~') {
|
|
in.get(c);
|
|
symbol[1] = c;
|
|
symbol[2] = '\0';
|
|
kind = MATCH;
|
|
length = 2;
|
|
break;
|
|
}
|
|
else if (c == '=') {
|
|
in.get(c);
|
|
symbol[1] = c;
|
|
symbol[2] = '\0';
|
|
kind = EQUAL;
|
|
length = 2;
|
|
break;
|
|
}
|
|
kind = ASSIGN;
|
|
break;
|
|
|
|
case '<':
|
|
in.get(c);
|
|
if (static_cast<char>(in.peek()) == '=') {
|
|
in.get(c);
|
|
symbol[1] = c;
|
|
symbol[2] = '\0';
|
|
kind = LESSEQ;
|
|
length = 2;
|
|
break;
|
|
}
|
|
kind = LESS;
|
|
break;
|
|
|
|
case '>':
|
|
in.get(c);
|
|
if (static_cast<char>(in.peek()) == '=') {
|
|
in.get(c);
|
|
symbol[1] = c;
|
|
symbol[2] = '\0';
|
|
kind = GREATEREQ;
|
|
length = 2;
|
|
break;
|
|
}
|
|
kind = GREATER;
|
|
break;
|
|
|
|
case '.':
|
|
in.get(c);
|
|
kind = DOT;
|
|
break;
|
|
|
|
case ',':
|
|
in.get(c);
|
|
kind = COMMA;
|
|
break;
|
|
|
|
case ';':
|
|
in.get(c);
|
|
kind = SEMI;
|
|
break;
|
|
|
|
default: {
|
|
istream_pos_type pos = in.tellg();
|
|
|
|
// First, check to see if it's a reserved word, such as: and or not
|
|
int result = parse_reserved_word(in);
|
|
if (std::isalpha(c) && result == 1)
|
|
break;
|
|
|
|
// If not, rewind back to the beginning of the word to scan it
|
|
// again. If the result was -1, it means no identifier was scanned
|
|
// so we don't have to rewind.
|
|
if (result == 0 || ! in.good()) {
|
|
in.clear();
|
|
in.seekg(pos, std::ios::beg);
|
|
if (in.fail())
|
|
throw_(parse_error, _("Failed to reset input stream"));
|
|
}
|
|
|
|
assert(in.good());
|
|
assert(! in.eof());
|
|
assert(static_cast<int>(in.tellg()) != -1);
|
|
|
|
// When in relaxed parsing mode, we want to migrate commodity flags
|
|
// so that any precision specified by the user updates the current
|
|
// maximum displayed precision.
|
|
parse_flags_t parse_flags;
|
|
|
|
parse_flags.add_flags(PARSE_NO_ANNOT);
|
|
if (pflags.has_flags(PARSE_NO_MIGRATE))
|
|
parse_flags.add_flags(PARSE_NO_MIGRATE);
|
|
if (pflags.has_flags(PARSE_NO_REDUCE))
|
|
parse_flags.add_flags(PARSE_NO_REDUCE);
|
|
|
|
try {
|
|
amount_t temp;
|
|
if (! temp.parse(in, parse_flags.plus_flags(PARSE_SOFT_FAIL))) {
|
|
// If the amount had no commodity, it must be an unambiguous
|
|
// variable reference
|
|
|
|
in.clear();
|
|
in.seekg(pos, std::ios::beg);
|
|
if (in.fail())
|
|
throw_(parse_error, _("Failed to reset input stream"));
|
|
|
|
c = static_cast<char>(in.peek());
|
|
if (! std::isalpha(c))
|
|
expected('\0', c);
|
|
|
|
parse_ident(in);
|
|
|
|
if (value.as_string().length() == 0) {
|
|
kind = ERROR;
|
|
symbol[0] = c;
|
|
symbol[1] = '\0';
|
|
throw_(parse_error, _("Failed to parse identifier"));
|
|
}
|
|
} else {
|
|
if (! in.good()) {
|
|
in.clear();
|
|
in.seekg(0, std::ios::end);
|
|
if (in.fail())
|
|
throw_(parse_error, _("Failed to reset input stream"));
|
|
}
|
|
|
|
kind = VALUE;
|
|
value = temp;
|
|
length = static_cast<std::size_t>(in.tellg() - pos);
|
|
}
|
|
}
|
|
catch (const std::exception&) {
|
|
kind = ERROR;
|
|
length = static_cast<std::size_t>(in.tellg() - pos);
|
|
throw;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void expr_t::token_t::rewind(std::istream& in)
|
|
{
|
|
in.clear();
|
|
in.seekg(- int(length), std::ios::cur);
|
|
if (in.fail())
|
|
throw_(parse_error, _("Failed to rewind input stream"));
|
|
}
|
|
|
|
|
|
void expr_t::token_t::unexpected(const char wanted)
|
|
{
|
|
kind_t prev_kind = kind;
|
|
|
|
kind = ERROR;
|
|
|
|
if (wanted == '\0') {
|
|
switch (prev_kind) {
|
|
case TOK_EOF:
|
|
throw_(parse_error, _("Unexpected end of expression"));
|
|
case IDENT:
|
|
throw_(parse_error, _("Unexpected symbol '%1'") << value);
|
|
case VALUE:
|
|
throw_(parse_error, _("Unexpected value '%1'") << value);
|
|
default:
|
|
throw_(parse_error, _("Unexpected expression token '%1'") << symbol);
|
|
}
|
|
} else {
|
|
switch (prev_kind) {
|
|
case TOK_EOF:
|
|
throw_(parse_error,
|
|
_("Unexpected end of expression (wanted '%1')") << wanted);
|
|
case IDENT:
|
|
throw_(parse_error,
|
|
_("Unexpected symbol '%1' (wanted '%2')") << value << wanted);
|
|
case VALUE:
|
|
throw_(parse_error,
|
|
_("Unexpected value '%1' (wanted '%2')") << value << wanted);
|
|
default:
|
|
throw_(parse_error, _("Unexpected expression token '%1' (wanted '%2')")
|
|
<< symbol << wanted);
|
|
}
|
|
}
|
|
}
|
|
|
|
void expr_t::token_t::expected(const char wanted, char c)
|
|
{
|
|
if (c == '\0' || c == -1) {
|
|
if (wanted == '\0' || wanted == -1)
|
|
throw_(parse_error, _("Unexpected end"));
|
|
else
|
|
throw_(parse_error, _("Missing '%1'") << wanted);
|
|
} else {
|
|
if (wanted == '\0' || wanted == -1)
|
|
throw_(parse_error, _("Invalid char '%1'") << c);
|
|
else
|
|
throw_(parse_error,
|
|
_("Invalid char '%1' (wanted '%2')") << c << wanted);
|
|
}
|
|
}
|
|
|
|
void expr_t::token_t::expected(const kind_t wanted)
|
|
{
|
|
try {
|
|
if (wanted == '\0' || wanted == -1)
|
|
throw_(parse_error, _("Invalid token '%1'") << *this);
|
|
else
|
|
throw_(parse_error,
|
|
_("Invalid token '%1' (wanted '%2')") << *this << wanted);
|
|
}
|
|
catch (...) {
|
|
kind = ERROR;
|
|
throw;
|
|
}
|
|
}
|
|
|
|
std::ostream& operator<<(std::ostream& out, const expr_t::token_t::kind_t& kind)
|
|
{
|
|
switch (kind) {
|
|
case expr_t::token_t::ERROR: out << "<error token>"; break;
|
|
case expr_t::token_t::VALUE: out << "<value>"; break;
|
|
case expr_t::token_t::IDENT: out << "<identifier>"; break;
|
|
case expr_t::token_t::MASK: out << "<regex mask>"; break;
|
|
|
|
case expr_t::token_t::LPAREN: out << "("; break;
|
|
case expr_t::token_t::RPAREN: out << ")"; break;
|
|
case expr_t::token_t::LBRACE: out << "{"; break;
|
|
case expr_t::token_t::RBRACE: out << "}"; break;
|
|
|
|
case expr_t::token_t::EQUAL: out << "=="; break;
|
|
case expr_t::token_t::NEQUAL: out << "!="; break;
|
|
case expr_t::token_t::LESS: out << "<"; break;
|
|
case expr_t::token_t::LESSEQ: out << "<="; break;
|
|
case expr_t::token_t::GREATER: out << ">"; break;
|
|
case expr_t::token_t::GREATEREQ: out << ">="; break;
|
|
|
|
case expr_t::token_t::ASSIGN: out << "="; break;
|
|
case expr_t::token_t::MATCH: out << "=~"; break;
|
|
case expr_t::token_t::NMATCH: out << "!~"; break;
|
|
case expr_t::token_t::MINUS: out << "-"; break;
|
|
case expr_t::token_t::PLUS: out << "+"; break;
|
|
case expr_t::token_t::STAR: out << "*"; break;
|
|
case expr_t::token_t::SLASH: out << "/"; break;
|
|
case expr_t::token_t::ARROW: out << "->"; break;
|
|
case expr_t::token_t::KW_DIV: out << "div"; break;
|
|
|
|
case expr_t::token_t::EXCLAM: out << "!"; break;
|
|
case expr_t::token_t::KW_AND: out << "and"; break;
|
|
case expr_t::token_t::KW_OR: out << "or"; break;
|
|
case expr_t::token_t::KW_MOD: out << "mod"; break;
|
|
|
|
case expr_t::token_t::KW_IF: out << "if"; break;
|
|
case expr_t::token_t::KW_ELSE: out << "else"; break;
|
|
|
|
case expr_t::token_t::QUERY: out << "?"; break;
|
|
case expr_t::token_t::COLON: out << ":"; break;
|
|
|
|
case expr_t::token_t::DOT: out << "."; break;
|
|
case expr_t::token_t::COMMA: out << ","; break;
|
|
case expr_t::token_t::SEMI: out << ";"; break;
|
|
|
|
case expr_t::token_t::TOK_EOF: out << "<end of input>"; break;
|
|
case expr_t::token_t::UNKNOWN: out << "<unknown>"; break;
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
std::ostream& operator<<(std::ostream& out, const expr_t::token_t& token)
|
|
{
|
|
switch (token.kind) {
|
|
case expr_t::token_t::VALUE:
|
|
out << "<value '" << token.value << "'>";
|
|
break;
|
|
case expr_t::token_t::IDENT:
|
|
out << "<ident '" << token.value << "'>";
|
|
break;
|
|
case expr_t::token_t::MASK:
|
|
out << "<mask '" << token.value << "'>";
|
|
break;
|
|
|
|
default:
|
|
out << token.kind;
|
|
break;
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
} // namespace ledger
|