ledger/src/query.cc
2014-02-02 12:36:22 +01:00

560 lines
16 KiB
C++

/*
* Copyright (c) 2003-2014, John Wiegley. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of New Artisans LLC nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <system.hh>
#include "query.h"
#include "op.h"
namespace ledger {
query_t::lexer_t::token_t
query_t::lexer_t::next_token(query_t::lexer_t::token_t::kind_t tok_context)
{
if (token_cache.kind != token_t::UNKNOWN) {
token_t tok = token_cache;
token_cache = token_t();
return tok;
}
if (arg_i == arg_end) {
if (begin == end || ++begin == end) {
return token_t(token_t::END_REACHED);
} else {
arg_i = (*begin).as_string().begin();
arg_end = (*begin).as_string().end();
}
}
resume:
switch (*arg_i) {
case '\0':
assert(false);
break;
case '\'':
case '"':
case '/': {
string pat;
char closing = *arg_i;
bool found_closing = false;
for (++arg_i; arg_i != arg_end; ++arg_i) {
if (*arg_i == '\\') {
if (++arg_i == arg_end)
throw_(parse_error, _("Unexpected '\\' at end of pattern"));
}
else if (*arg_i == closing) {
++arg_i;
found_closing = true;
break;
}
pat.push_back(*arg_i);
}
if (! found_closing)
throw_(parse_error, _f("Expected '%1%' at end of pattern") % closing);
if (pat.empty())
throw_(parse_error, _("Match pattern is empty"));
return token_t(token_t::TERM, pat);
}
}
if (multiple_args && consume_next_arg) {
consume_next_arg = false;
token_t tok(token_t::TERM, string(arg_i, arg_end));
prev_arg_i = arg_i;
arg_i = arg_end;
return tok;
}
bool consume_next = false;
switch (*arg_i) {
case '\0':
assert(false);
break;
case ' ':
case '\t':
case '\r':
case '\n':
if (++arg_i == arg_end)
return next_token(tok_context);
goto resume;
case '(':
++arg_i;
if (tok_context == token_t::TOK_EXPR)
consume_whitespace = true;
return token_t(token_t::LPAREN);
case ')':
++arg_i;
if (tok_context == token_t::TOK_EXPR)
consume_whitespace = false;
return token_t(token_t::RPAREN);
case '&': ++arg_i; return token_t(token_t::TOK_AND);
case '|': ++arg_i; return token_t(token_t::TOK_OR);
case '!': ++arg_i; return token_t(token_t::TOK_NOT);
case '@': ++arg_i; return token_t(token_t::TOK_PAYEE);
case '#': ++arg_i; return token_t(token_t::TOK_CODE);
case '%': ++arg_i; return token_t(token_t::TOK_META);
case '=':
++arg_i;
consume_next = true;
return token_t(token_t::TOK_EQ);
case '\\':
consume_next = true;
++arg_i;
// fall through...
default: {
string ident;
for (; arg_i != arg_end; ++arg_i) {
switch (*arg_i) {
case '\0':
assert(false);
break;
case ' ':
case '\t':
case '\n':
case '\r':
if (! multiple_args && ! consume_whitespace && ! consume_next_arg)
goto test_ident;
else
ident.push_back(*arg_i);
break;
case ')':
if (! consume_next && tok_context == token_t::TOK_EXPR)
goto test_ident;
case '(':
case '&':
case '|':
case '!':
case '@':
case '#':
case '%':
case '=':
if (! consume_next && tok_context != token_t::TOK_EXPR)
goto test_ident;
// fall through...
default:
ident.push_back(*arg_i);
break;
}
}
consume_whitespace = false;
test_ident:
if (ident == "and")
return token_t(token_t::TOK_AND);
else if (ident == "or")
return token_t(token_t::TOK_OR);
else if (ident == "not")
return token_t(token_t::TOK_NOT);
else if (ident == "code")
return token_t(token_t::TOK_CODE);
else if (ident == "desc")
return token_t(token_t::TOK_PAYEE);
else if (ident == "payee")
return token_t(token_t::TOK_PAYEE);
else if (ident == "note")
return token_t(token_t::TOK_NOTE);
else if (ident == "tag")
return token_t(token_t::TOK_META);
else if (ident == "meta")
return token_t(token_t::TOK_META);
else if (ident == "data")
return token_t(token_t::TOK_META);
else if (ident == "show")
return token_t(token_t::TOK_SHOW);
else if (ident == "only")
return token_t(token_t::TOK_ONLY);
else if (ident == "bold")
return token_t(token_t::TOK_BOLD);
else if (ident == "for")
return token_t(token_t::TOK_FOR);
else if (ident == "since")
return token_t(token_t::TOK_SINCE);
else if (ident == "until")
return token_t(token_t::TOK_UNTIL);
else if (ident == "expr") {
// The expr keyword takes the whole of the next string as its argument.
consume_next_arg = true;
return token_t(token_t::TOK_EXPR);
}
else
return token_t(token_t::TERM, ident);
}
}
return token_t(token_t::UNKNOWN);
}
void query_t::lexer_t::token_t::unexpected()
{
kind_t prev_kind = kind;
kind = UNKNOWN;
switch (prev_kind) {
case END_REACHED:
throw_(parse_error, _("Unexpected end of expression"));
case TERM:
throw_(parse_error, _f("Unexpected string '%1%'") % *value);
default:
throw_(parse_error, _f("Unexpected token '%1%'") % symbol());
}
}
void query_t::lexer_t::token_t::expected(char wanted, char c)
{
kind = UNKNOWN;
if (c == '\0' || c == -1) {
if (wanted == '\0' || wanted == -1)
throw_(parse_error, _("Unexpected end"));
else
throw_(parse_error, _f("Missing '%1%'") % wanted);
} else {
if (wanted == '\0' || wanted == -1)
throw_(parse_error, _f("Invalid char '%1%'") % c);
else
throw_(parse_error, _f("Invalid char '%1%' (wanted '%2%')") % c % wanted);
}
}
expr_t::ptr_op_t
query_t::parser_t::parse_query_term(query_t::lexer_t::token_t::kind_t tok_context)
{
expr_t::ptr_op_t node;
lexer_t::token_t tok = lexer.next_token(tok_context);
switch (tok.kind) {
case lexer_t::token_t::TOK_SHOW:
case lexer_t::token_t::TOK_ONLY:
case lexer_t::token_t::TOK_BOLD:
case lexer_t::token_t::TOK_FOR:
case lexer_t::token_t::TOK_SINCE:
case lexer_t::token_t::TOK_UNTIL:
case lexer_t::token_t::END_REACHED:
lexer.push_token(tok);
break;
case lexer_t::token_t::TOK_CODE:
case lexer_t::token_t::TOK_PAYEE:
case lexer_t::token_t::TOK_NOTE:
case lexer_t::token_t::TOK_ACCOUNT:
case lexer_t::token_t::TOK_META:
case lexer_t::token_t::TOK_EXPR:
node = parse_query_term(tok.kind);
if (! node)
throw_(parse_error,
_f("%1% operator not followed by argument") % tok.symbol());
break;
case lexer_t::token_t::TERM:
assert(tok.value);
switch (tok_context) {
case lexer_t::token_t::TOK_EXPR:
node = expr_t(*tok.value).get_op();
break;
case lexer_t::token_t::TOK_META: {
node = new expr_t::op_t(expr_t::op_t::O_CALL);
expr_t::ptr_op_t ident = new expr_t::op_t(expr_t::op_t::IDENT);
ident->set_ident("has_tag");
node->set_left(ident);
expr_t::ptr_op_t arg1 = new expr_t::op_t(expr_t::op_t::VALUE);
arg1->set_value(mask_t(*tok.value));
tok = lexer.peek_token(tok_context);
if (tok.kind == lexer_t::token_t::TOK_EQ) {
tok = lexer.next_token(tok_context);
tok = lexer.next_token(tok_context);
if (tok.kind != lexer_t::token_t::TERM)
throw_(parse_error,
_("Metadata equality operator not followed by term"));
expr_t::ptr_op_t arg2 = new expr_t::op_t(expr_t::op_t::VALUE);
assert(tok.value);
arg2->set_value(mask_t(*tok.value));
node->set_right(expr_t::op_t::new_node
(expr_t::op_t::O_SEQ,
expr_t::op_t::new_node
(expr_t::op_t::O_CONS, arg1, arg2)));
} else {
node->set_right(arg1);
}
break;
}
default: {
node = new expr_t::op_t(expr_t::op_t::O_MATCH);
expr_t::ptr_op_t ident = new expr_t::op_t(expr_t::op_t::IDENT);
switch (tok_context) {
case lexer_t::token_t::TOK_ACCOUNT:
ident->set_ident("account"); break;
case lexer_t::token_t::TOK_PAYEE:
ident->set_ident("payee"); break;
case lexer_t::token_t::TOK_CODE:
ident->set_ident("code"); break;
case lexer_t::token_t::TOK_NOTE:
ident->set_ident("note"); break;
default:
assert(false); break;
}
expr_t::ptr_op_t mask = new expr_t::op_t(expr_t::op_t::VALUE);
DEBUG("query.mask", "Mask from string: " << *tok.value);
mask->set_value(mask_t(*tok.value));
DEBUG("query.mask", "Mask is: " << mask->as_value().as_mask().str());
node->set_left(ident);
node->set_right(mask);
}
}
break;
case lexer_t::token_t::LPAREN:
node = parse_query_expr(tok_context, true);
tok = lexer.next_token(tok_context);
if (tok.kind != lexer_t::token_t::RPAREN)
tok.expected(')');
break;
default:
lexer.push_token(tok);
break;
}
return node;
}
expr_t::ptr_op_t
query_t::parser_t::parse_unary_expr(lexer_t::token_t::kind_t tok_context)
{
expr_t::ptr_op_t node;
lexer_t::token_t tok = lexer.next_token(tok_context);
switch (tok.kind) {
case lexer_t::token_t::TOK_NOT: {
expr_t::ptr_op_t term(parse_query_term(tok_context));
if (! term)
throw_(parse_error,
_f("%1% operator not followed by argument") % tok.symbol());
node = new expr_t::op_t(expr_t::op_t::O_NOT);
node->set_left(term);
break;
}
default:
lexer.push_token(tok);
node = parse_query_term(tok_context);
break;
}
return node;
}
expr_t::ptr_op_t
query_t::parser_t::parse_and_expr(lexer_t::token_t::kind_t tok_context)
{
if (expr_t::ptr_op_t node = parse_unary_expr(tok_context)) {
while (true) {
lexer_t::token_t tok = lexer.next_token(tok_context);
if (tok.kind == lexer_t::token_t::TOK_AND) {
expr_t::ptr_op_t prev(node);
node = new expr_t::op_t(expr_t::op_t::O_AND);
node->set_left(prev);
node->set_right(parse_unary_expr(tok_context));
if (! node->right())
throw_(parse_error,
_f("%1% operator not followed by argument") % tok.symbol());
} else {
lexer.push_token(tok);
break;
}
}
return node;
}
return expr_t::ptr_op_t();
}
expr_t::ptr_op_t
query_t::parser_t::parse_or_expr(lexer_t::token_t::kind_t tok_context)
{
if (expr_t::ptr_op_t node = parse_and_expr(tok_context)) {
while (true) {
lexer_t::token_t tok = lexer.next_token(tok_context);
if (tok.kind == lexer_t::token_t::TOK_OR) {
expr_t::ptr_op_t prev(node);
node = new expr_t::op_t(expr_t::op_t::O_OR);
node->set_left(prev);
node->set_right(parse_and_expr(tok_context));
if (! node->right())
throw_(parse_error,
_f("%1% operator not followed by argument") % tok.symbol());
} else {
lexer.push_token(tok);
break;
}
}
return node;
}
return expr_t::ptr_op_t();
}
expr_t::ptr_op_t
query_t::parser_t::parse_query_expr(lexer_t::token_t::kind_t tok_context,
bool subexpression)
{
expr_t::ptr_op_t limiter;
while (expr_t::ptr_op_t next = parse_or_expr(tok_context)) {
if (! limiter) {
limiter = next;
} else {
expr_t::ptr_op_t prev(limiter);
limiter = new expr_t::op_t(expr_t::op_t::O_OR);
limiter->set_left(prev);
limiter->set_right(next);
}
}
if (! subexpression) {
if (limiter)
query_map.insert
(query_map_t::value_type
(QUERY_LIMIT, predicate_t(limiter, what_to_keep).print_to_str()));
lexer_t::token_t tok = lexer.peek_token(tok_context);
while (tok.kind != lexer_t::token_t::END_REACHED) {
switch (tok.kind) {
case lexer_t::token_t::TOK_SHOW:
case lexer_t::token_t::TOK_ONLY:
case lexer_t::token_t::TOK_BOLD: {
lexer.next_token(tok_context);
kind_t kind;
switch (tok.kind) {
case lexer_t::token_t::TOK_SHOW:
kind = QUERY_SHOW;
break;
case lexer_t::token_t::TOK_ONLY:
kind = QUERY_ONLY;
break;
case lexer_t::token_t::TOK_BOLD:
kind = QUERY_BOLD;
break;
default:
break;
}
expr_t::ptr_op_t node;
while (expr_t::ptr_op_t next = parse_or_expr(tok_context)) {
if (! node) {
node = next;
} else {
expr_t::ptr_op_t prev(node);
node = new expr_t::op_t(expr_t::op_t::O_OR);
node->set_left(prev);
node->set_right(next);
}
}
if (node)
query_map.insert
(query_map_t::value_type
(kind, predicate_t(node, what_to_keep).print_to_str()));
break;
}
case lexer_t::token_t::TOK_FOR:
case lexer_t::token_t::TOK_SINCE:
case lexer_t::token_t::TOK_UNTIL: {
tok = lexer.next_token(tok_context);
string for_string;
if (tok.kind == lexer_t::token_t::TOK_SINCE)
for_string = "since";
else if (tok.kind == lexer_t::token_t::TOK_UNTIL)
for_string = "until";
lexer.consume_next_arg = true;
tok = lexer.peek_token(tok_context);
while (tok.kind != lexer_t::token_t::END_REACHED) {
tok = lexer.next_token(tok_context);
assert(tok.kind == lexer_t::token_t::TERM);
if (*tok.value == "show" || *tok.value == "bold" ||
*tok.value == "for" || *tok.value == "since" ||
*tok.value == "until") {
lexer.token_cache = lexer_t::token_t();
lexer.arg_i = lexer.prev_arg_i;
lexer.consume_next_arg = false;
break;
}
if (! for_string.empty())
for_string += " ";
for_string += *tok.value;
lexer.consume_next_arg = true;
tok = lexer.peek_token(tok_context);
}
if (! for_string.empty())
query_map.insert(query_map_t::value_type(QUERY_FOR, for_string));
break;
}
default:
goto done;
}
tok = lexer.peek_token(tok_context);
}
done:
;
}
return limiter;
}
} // namespace ledger