221 lines
5.5 KiB
Text
221 lines
5.5 KiB
Text
/**
|
|
* @file grammar.y
|
|
* @version 3.0
|
|
* @author John Wiegley
|
|
*
|
|
* @brief Canonical BNF grammar for Ledger data files
|
|
*
|
|
* Extensions are permitted if: they are not required, and they are
|
|
* backwards-compatible with this grammar.
|
|
*/
|
|
|
|
/*
|
|
* There are three special terminals in this grammar, which violate its
|
|
* context free nature:
|
|
*
|
|
* TEXT -- consumes all characters until the next terminal
|
|
* or EOL (end of line)
|
|
* WHITESPACE -- any amount of whitespace, not including EOL
|
|
* STRING -- characters up to the next WHITESPACE or EOL
|
|
*
|
|
* BIGINT -- a number of any width, matching [0-9]+
|
|
* INT4 -- a four digit wide number
|
|
* INT2 -- a two digit wide number
|
|
* INT1 -- a one digit wide number
|
|
*
|
|
* Except for 1) the 'spacer' production (see below), 2) EOL, and 3) the
|
|
* WHITESPACE required to begin a posting, whitespace is otherwise
|
|
* ignored.
|
|
*
|
|
* Yes, this grammar is confusing and not so happy for machine readers,
|
|
* but it was designed for the human author and reader. Once parsed,
|
|
* the contents must be unambiguous, which means they can be output to
|
|
* more rigorous formats for other programs to consume.
|
|
*/
|
|
|
|
/*
|
|
* Journals
|
|
*
|
|
* A journal is a file which primarily contains xacts, among other elements.
|
|
*/
|
|
|
|
journal:
|
|
journal_item journal |
|
|
/* epsilon */
|
|
;
|
|
|
|
journal_item:
|
|
whitespace
|
|
directive |
|
|
xact |
|
|
;
|
|
|
|
whitespace:
|
|
EOL |
|
|
WHITESPACE EOL |
|
|
';' TEXT EOL | /* these next four are all ignored */
|
|
'*' TEXT EOL |
|
|
;
|
|
|
|
directive:
|
|
'@' word_directive EOL |
|
|
'!' word_directive EOL |
|
|
word_directive EOL |
|
|
char_directive EOL
|
|
;
|
|
|
|
word_directive:
|
|
"include" TEXT |
|
|
"account" TEXT |
|
|
"end" |
|
|
"alias" STRING '=' TEXT |
|
|
"def" TEXT |
|
|
TEXT WHITESPACE TEXT /* looked up in session (aka maybe Python) */
|
|
;
|
|
|
|
char_directive:
|
|
'i' date time TEXT | /* a timeclock.el "check in" */
|
|
'I' date time TEXT |
|
|
'o' date time TEXT | /* a timeclock.el "check out" */
|
|
'O' date time TEXT |
|
|
'h' TEXT EOL |
|
|
'b' TEXT EOL |
|
|
'D' amount | /* sets display parameters for a commodity */
|
|
'A' TEXT | /* sets the "default balancing account" */
|
|
'C' commodity '=' amount | /* specifies a commodity conversion */
|
|
'P' date time commodity amount | /* a pricing history xact */
|
|
'N' commodity | /* commodity's price is never downloaded */
|
|
'Y' INT4 | /* sets the default year for date parsing */
|
|
'-' '-' STRING TEXT | /* specify command-line options in the file */
|
|
;
|
|
|
|
date: INT4 date_sep INT2 date_sep INT2 ;
|
|
date_opt: '=' date | /* epsilon */ ;
|
|
date_sep: '/' | '-' | '.' ;
|
|
|
|
time: INT2 ':' INT2 ':' INT2 ;
|
|
|
|
commodity:
|
|
'"' TEXT '"' |
|
|
STRING ;
|
|
|
|
/*
|
|
* Xacts
|
|
*
|
|
* Xacts are the atomic units of accounting, which are composed of
|
|
* multiple postings between accounts, so long as it all balances in
|
|
* the end.
|
|
*/
|
|
|
|
xact: plain_xact |
|
|
periodic_xact |
|
|
automated_xact ;
|
|
|
|
plain_xact:
|
|
date date_opt status_opt code_opt FULLSTRING note_opt EOL
|
|
postings ;
|
|
|
|
status_opt: status | /* epsilon */ ;
|
|
status: '*' | '!' | /* epsilon */ ;
|
|
|
|
code_opt: code | /* epsilon */ ;
|
|
code: '(' TEXT ')' ;
|
|
|
|
spacer: ' ' ' ' | '\t' | ' ' '\t' ;
|
|
|
|
note_opt: spacer note | /* epsilon */ ;
|
|
note: ';' TEXT ;
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
periodic_xact:
|
|
'~' period_expr note_opt EOL
|
|
posting postings ;
|
|
|
|
/*
|
|
* A period expression has its own sub-grammar, which I don't quite have
|
|
* the time to exhaustively describe now. See datetime.cc. It allows
|
|
* for lots and lots of things, and is probably horribly ambiguous.
|
|
*/
|
|
|
|
period_expr: FULLSTRING ;
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
automated_xact:
|
|
'=' value_expr note_opt EOL
|
|
posting postings ;
|
|
|
|
/*
|
|
* Value expressions are a algebraic math expressions very similar to
|
|
* XPath (minus the path traversal items). This grammar needs fleshing
|
|
* out also, since it's allowed in many places.
|
|
*/
|
|
|
|
value_expr: FULLSTRING ;
|
|
|
|
/*
|
|
* There is a serious ambiguity here which the parser resolves as
|
|
* follows: if an amount_expr can be parsed as an amount, it's an
|
|
* amount; otherwise, it's a value expression.
|
|
*/
|
|
|
|
quantity: neg_opt BIGINT decimal_opt ;
|
|
|
|
neg_opt: '-' | /* epsilon */ ;
|
|
decimal_opt: '.' BIGINT | /* epsilon */ ;
|
|
|
|
annotation: lot_price_opt lot_date_opt lot_note_opt ;
|
|
|
|
lot_date_opt: date | /* epsilon */ ;
|
|
lot_date: '[' date ']' ;
|
|
|
|
lot_price_opt: price | /* epsilon */ ;
|
|
lot_price: '{' amount '}' ;
|
|
|
|
lot_note_opt: note | /* epsilon */ ;
|
|
lot_note: '(' string ')' ;
|
|
|
|
amount:
|
|
neg_opt commodity quantity annotation |
|
|
quantity commodity annotation ;
|
|
|
|
amount_expr: amount | value_expr ;
|
|
|
|
/*
|
|
* Postings
|
|
*
|
|
* Postings are the fundamental unit of accounting, and represent
|
|
* the movement of commodities to or from an account. Thus, paying off
|
|
* your credit card consists of two balancing postings: one that
|
|
* withdraws money from your checking account, and another which pays
|
|
* money to your credit institution.
|
|
*/
|
|
|
|
postings:
|
|
posting postings |
|
|
/* epsilon */
|
|
;
|
|
|
|
posting:
|
|
WHITESPACE status_opt account values_opt note_opt EOL;
|
|
|
|
account_name: FULLSTRING ;
|
|
|
|
values_opt:
|
|
spacer amount_expr price_opt |
|
|
/* epsilon */
|
|
;
|
|
|
|
price_opt: price | /* epsilon */ ;
|
|
price:
|
|
'@' amount_expr |
|
|
'@@' amount_expr /* in this case, it's the whole price */
|
|
;
|
|
|
|
account:
|
|
account_name |
|
|
'(' account_name ')' |
|
|
'[' account_name ']' ;
|
|
|
|
/* grammar.y ends here */
|