ledger/doc/grammar.y
2010-06-11 17:02:25 -04:00

221 lines
5.5 KiB
Text

/**
* @file grammar.y
* @version 3.0
* @author John Wiegley
*
* @brief Canonical BNF grammar for Ledger data files
*
* Extensions are permitted if: they are not required, and they are
* backwards-compatible with this grammar.
*/
/*
* There are three special terminals in this grammar, which violate its
* context free nature:
*
* TEXT -- consumes all characters until the next terminal
* or EOL (end of line)
* WHITESPACE -- any amount of whitespace, not including EOL
* STRING -- characters up to the next WHITESPACE or EOL
*
* BIGINT -- a number of any width, matching [0-9]+
* INT4 -- a four digit wide number
* INT2 -- a two digit wide number
* INT1 -- a one digit wide number
*
* Except for 1) the 'spacer' production (see below), 2) EOL, and 3) the
* WHITESPACE required to begin a posting, whitespace is otherwise
* ignored.
*
* Yes, this grammar is confusing and not so happy for machine readers,
* but it was designed for the human author and reader. Once parsed,
* the contents must be unambiguous, which means they can be output to
* more rigorous formats for other programs to consume.
*/
/*
* Journals
*
* A journal is a file which primarily contains xacts, among other elements.
*/
journal:
journal_item journal |
/* epsilon */
;
journal_item:
whitespace
directive |
xact |
;
whitespace:
EOL |
WHITESPACE EOL |
';' TEXT EOL | /* these next four are all ignored */
'*' TEXT EOL |
;
directive:
'@' word_directive EOL |
'!' word_directive EOL |
word_directive EOL |
char_directive EOL
;
word_directive:
"include" TEXT |
"account" TEXT |
"end" |
"alias" STRING '=' TEXT |
"def" TEXT |
TEXT WHITESPACE TEXT /* looked up in session (aka maybe Python) */
;
char_directive:
'i' date time TEXT | /* a timeclock.el "check in" */
'I' date time TEXT |
'o' date time TEXT | /* a timeclock.el "check out" */
'O' date time TEXT |
'h' TEXT EOL |
'b' TEXT EOL |
'D' amount | /* sets display parameters for a commodity */
'A' TEXT | /* sets the "default balancing account" */
'C' commodity '=' amount | /* specifies a commodity conversion */
'P' date time commodity amount | /* a pricing history xact */
'N' commodity | /* commodity's price is never downloaded */
'Y' INT4 | /* sets the default year for date parsing */
'-' '-' STRING TEXT | /* specify command-line options in the file */
;
date: INT4 date_sep INT2 date_sep INT2 ;
date_opt: '=' date | /* epsilon */ ;
date_sep: '/' | '-' | '.' ;
time: INT2 ':' INT2 ':' INT2 ;
commodity:
'"' TEXT '"' |
STRING ;
/*
* Xacts
*
* Xacts are the atomic units of accounting, which are composed of
* multiple postings between accounts, so long as it all balances in
* the end.
*/
xact: plain_xact |
periodic_xact |
automated_xact ;
plain_xact:
date date_opt status_opt code_opt FULLSTRING note_opt EOL
postings ;
status_opt: status | /* epsilon */ ;
status: '*' | '!' | /* epsilon */ ;
code_opt: code | /* epsilon */ ;
code: '(' TEXT ')' ;
spacer: ' ' ' ' | '\t' | ' ' '\t' ;
note_opt: spacer note | /* epsilon */ ;
note: ';' TEXT ;
/* ---------------------------------------------------------------------- */
periodic_xact:
'~' period_expr note_opt EOL
posting postings ;
/*
* A period expression has its own sub-grammar, which I don't quite have
* the time to exhaustively describe now. See datetime.cc. It allows
* for lots and lots of things, and is probably horribly ambiguous.
*/
period_expr: FULLSTRING ;
/* ---------------------------------------------------------------------- */
automated_xact:
'=' value_expr note_opt EOL
posting postings ;
/*
* Value expressions are a algebraic math expressions very similar to
* XPath (minus the path traversal items). This grammar needs fleshing
* out also, since it's allowed in many places.
*/
value_expr: FULLSTRING ;
/*
* There is a serious ambiguity here which the parser resolves as
* follows: if an amount_expr can be parsed as an amount, it's an
* amount; otherwise, it's a value expression.
*/
quantity: neg_opt BIGINT decimal_opt ;
neg_opt: '-' | /* epsilon */ ;
decimal_opt: '.' BIGINT | /* epsilon */ ;
annotation: lot_price_opt lot_date_opt lot_note_opt ;
lot_date_opt: date | /* epsilon */ ;
lot_date: '[' date ']' ;
lot_price_opt: price | /* epsilon */ ;
lot_price: '{' amount '}' ;
lot_note_opt: note | /* epsilon */ ;
lot_note: '(' string ')' ;
amount:
neg_opt commodity quantity annotation |
quantity commodity annotation ;
amount_expr: amount | value_expr ;
/*
* Postings
*
* Postings are the fundamental unit of accounting, and represent
* the movement of commodities to or from an account. Thus, paying off
* your credit card consists of two balancing postings: one that
* withdraws money from your checking account, and another which pays
* money to your credit institution.
*/
postings:
posting postings |
/* epsilon */
;
posting:
WHITESPACE status_opt account values_opt note_opt EOL;
account_name: FULLSTRING ;
values_opt:
spacer amount_expr price_opt |
/* epsilon */
;
price_opt: price | /* epsilon */ ;
price:
'@' amount_expr |
'@@' amount_expr /* in this case, it's the whole price */
;
account:
account_name |
'(' account_name ')' |
'[' account_name ']' ;
/* grammar.y ends here */