ledger/src/format.cc
2013-02-18 06:51:21 -06:00

697 lines
22 KiB
C++

/*
* Copyright (c) 2003-2013, John Wiegley. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of New Artisans LLC nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <system.hh>
#include "format.h"
#include "scope.h"
#include "pstream.h"
namespace ledger {
format_t::elision_style_t format_t::default_style = TRUNCATE_TRAILING;
bool format_t::default_style_changed = false;
void format_t::element_t::dump(std::ostream& out) const
{
out << "Element: ";
switch (type) {
case STRING: out << " STRING"; break;
case EXPR: out << " EXPR"; break;
}
out << " flags: 0x" << std::hex << int(flags());
out << " min: ";
out << std::right;
out.width(2);
out << std::dec << int(min_width);
out << " max: ";
out << std::right;
out.width(2);
out << std::dec << int(max_width);
switch (type) {
case STRING:
out << " str: '" << boost::get<string>(data) << "'" << std::endl;
break;
case EXPR:
out << " expr: " << boost::get<expr_t>(data) << std::endl;
break;
}
}
namespace {
struct format_mapping_t {
char letter;
const char * expr;
} single_letter_mappings[] = {
{ 'd', "date" },
{ 'S', "filename" },
{ 'B', "beg_pos" },
{ 'b', "beg_line" },
{ 'E', "end_pos" },
{ 'e', "end_line" },
{ 'X', "cleared" },
{ 'Y', "xact.cleared" },
{ 'C', "code" },
{ 'P', "payee" },
{ 'a', "account.name" },
{ 'A', "account" },
{ 't', "justify(scrub(display_amount), $min, $max, $left, color)" },
{ 'T', "justify(scrub(display_total), $min, $max, $left, color)" },
{ 'N', "note" },
};
expr_t parse_single_expression(const char *& p, bool single_expr = true)
{
string temp(p);
ptristream str(const_cast<char *&>(p));
expr_t expr;
expr.parse(str, single_expr ? PARSE_SINGLE : PARSE_PARTIAL, temp);
if (str.eof()) {
expr.set_text(p);
p += std::strlen(p);
} else {
assert(str.good());
istream_pos_type pos = str.tellg();
expr.set_text(string(p, p + long(pos)));
p += long(pos) - 1;
// Don't gobble up any whitespace
const char * base = p;
while (p >= base && std::isspace(*p))
p--;
}
return expr;
}
inline expr_t::ptr_op_t ident_node(const string& ident)
{
expr_t::ptr_op_t node(new expr_t::op_t(expr_t::op_t::IDENT));
node->set_ident(ident);
return node;
}
}
format_t::element_t * format_t::parse_elements(const string& fmt,
const optional<format_t&>& tmpl)
{
unique_ptr<element_t> result;
element_t * current = NULL;
static char buf[65535];
char * q = buf;
for (const char * p = fmt.c_str(); *p; p++) {
if (*p != '%' && *p != '\\') {
*q++ = *p;
continue;
}
if (! result.get()) {
result.reset(new element_t);
current = result.get();
} else {
current->next.reset(new element_t);
current = current->next.get();
}
if (q != buf) {
current->type = element_t::STRING;
current->data = string(buf, q);
q = buf;
current->next.reset(new element_t);
current = current->next.get();
}
if (*p == '\\') {
p++;
current->type = element_t::STRING;
switch (*p) {
case 'b': current->data = string("\b"); break;
case 'f': current->data = string("\f"); break;
case 'n': current->data = string("\n"); break;
case 'r': current->data = string("\r"); break;
case 't': current->data = string("\t"); break;
case 'v': current->data = string("\v"); break;
case '\\': current->data = string("\\"); break;
default: current->data = string(1, *p); break;
}
continue;
}
++p;
while (*p == '-') {
switch (*p) {
case '-':
current->add_flags(ELEMENT_ALIGN_LEFT);
break;
}
++p;
}
std::size_t num = 0;
while (*p && std::isdigit(*p)) {
num *= 10;
num += static_cast<std::size_t>(*p++ - '0');
}
current->min_width = num;
if (*p == '.') {
++p;
num = 0;
while (*p && std::isdigit(*p)) {
num *= 10;
num += static_cast<std::size_t>(*p++ - '0');
}
current->max_width = num;
if (current->min_width == 0)
current->min_width = current->max_width;
}
if (std::isalpha(*p)) {
bool found = false;
for (std::size_t i = 0; i < (sizeof(single_letter_mappings) /
sizeof(format_mapping_t)); i++) {
if (*p == single_letter_mappings[i].letter) {
std::ostringstream expr;
for (const char * ptr = single_letter_mappings[i].expr; *ptr; ){
if (*ptr == '$') {
const char * beg = ++ptr;
while (*ptr && std::isalpha(*ptr))
++ptr;
string::size_type klen = static_cast<string::size_type>(ptr - beg);
string keyword(beg, 0, klen);
if (keyword == "min")
expr << (current->min_width > 0 ?
static_cast<int>(current->min_width) : -1);
else if (keyword == "max")
expr << (current->max_width > 0 ?
static_cast<int>(current->max_width) : -1);
else if (keyword == "left")
expr << (current->has_flags(ELEMENT_ALIGN_LEFT) ? "false" : "true");
#if DEBUG_ON
else
assert("Unrecognized format substitution keyword" == NULL);
#endif
} else {
expr << *ptr++;
}
}
current->type = element_t::EXPR;
current->data = expr_t(expr.str());
found = true;
break;
}
}
if (! found)
throw_(format_error, _f("Unrecognized formatting character: %1%") % *p);
} else {
switch (*p) {
case '%':
current->type = element_t::STRING;
current->data = string("%");
break;
case '$': {
if (! tmpl)
throw_(format_error, _("Prior field reference, but no template"));
p++;
if (*p == '0' || (! std::isdigit(*p) &&
*p != 'A' && *p != 'B' && *p != 'C' &&
*p != 'D' && *p != 'E' && *p != 'F'))
throw_(format_error, _("%$ field reference must be a digit from 1-9"));
int index = std::isdigit(*p) ? *p - '0' : (*p - 'A' + 10);
element_t * tmpl_elem = tmpl->elements.get();
for (int i = 1; i < index && tmpl_elem; i++) {
tmpl_elem = tmpl_elem->next.get();
while (tmpl_elem && tmpl_elem->type != element_t::EXPR)
tmpl_elem = tmpl_elem->next.get();
}
if (! tmpl_elem)
throw_(format_error, _("%$ reference to a non-existent prior field"));
*current = *tmpl_elem;
break;
}
case '(':
case '{': {
bool format_amount = *p == '{';
current->type = element_t::EXPR;
current->data = parse_single_expression(p);
// Wrap the subexpression in calls to justify and scrub
if (! format_amount)
break;
expr_t::ptr_op_t op = boost::get<expr_t>(current->data).get_op();
expr_t::ptr_op_t call2_node(new expr_t::op_t(expr_t::op_t::O_CALL));
{
call2_node->set_left(ident_node("justify"));
{
expr_t::ptr_op_t args3_node(new expr_t::op_t(expr_t::op_t::O_CONS));
{
{
expr_t::ptr_op_t call1_node(new expr_t::op_t(expr_t::op_t::O_CALL));
{
call1_node->set_left(ident_node("scrub"));
call1_node->set_right(op->kind == expr_t::op_t::O_CONS ? op->left() : op);
}
args3_node->set_left(call1_node);
}
expr_t::ptr_op_t args2_node(new expr_t::op_t(expr_t::op_t::O_CONS));
{
{
expr_t::ptr_op_t arg1_node(new expr_t::op_t(expr_t::op_t::VALUE));
arg1_node->set_value(current->min_width > 0 ?
long(current->min_width) : -1);
args2_node->set_left(arg1_node);
}
{
expr_t::ptr_op_t args1_node(new expr_t::op_t(expr_t::op_t::O_CONS));
{
{
expr_t::ptr_op_t arg2_node(new expr_t::op_t(expr_t::op_t::VALUE));
arg2_node->set_value(current->max_width > 0 ?
long(current->max_width) : -1);
args1_node->set_left(arg2_node);
}
{
expr_t::ptr_op_t arg3_node(new expr_t::op_t(expr_t::op_t::VALUE));
arg3_node->set_value(! current->has_flags(ELEMENT_ALIGN_LEFT));
args1_node->set_right(arg3_node);
}
}
args2_node->set_right(args1_node);
}
args3_node->set_right(args2_node);
}
}
call2_node->set_right(args3_node);
}
}
current->min_width = 0;
current->max_width = 0;
string prev_expr = boost::get<expr_t>(current->data).text();
expr_t::ptr_op_t colorize_op;
if (op->kind == expr_t::op_t::O_CONS)
colorize_op = op->has_right() ? op->right() : NULL;
if (colorize_op) {
expr_t::ptr_op_t call3_node(new expr_t::op_t(expr_t::op_t::O_CALL));
{
call3_node->set_left(ident_node("ansify_if"));
{
expr_t::ptr_op_t args4_node(new expr_t::op_t(expr_t::op_t::O_CONS));
{
args4_node->set_left(call2_node); // from above
args4_node->set_right(colorize_op);
}
call3_node->set_right(args4_node);
}
}
current->data = expr_t(call3_node);
} else {
current->data = expr_t(call2_node);
}
boost::get<expr_t>(current->data).set_text(prev_expr);
break;
}
default:
throw_(format_error, _f("Unrecognized formatting character: %1%") % *p);
}
}
}
if (q != buf) {
if (! result.get()) {
result.reset(new element_t);
current = result.get();
} else {
current->next.reset(new element_t);
current = current->next.get();
}
current->type = element_t::STRING;
current->data = string(buf, q);
}
return result.release();
}
string format_t::real_calc(scope_t& scope)
{
std::ostringstream out_str;
for (element_t * elem = elements.get(); elem; elem = elem->next.get()) {
std::ostringstream out;
string name;
if (elem->has_flags(ELEMENT_ALIGN_LEFT))
out << std::left;
else
out << std::right;
switch (elem->type) {
case element_t::STRING:
if (elem->min_width > 0)
out.width(static_cast<std::streamsize>(elem->min_width));
out << boost::get<string>(elem->data);
break;
case element_t::EXPR: {
expr_t& expr(boost::get<expr_t>(elem->data));
try {
expr.compile(scope);
value_t value;
if (expr.is_function()) {
call_scope_t args(scope);
args.push_back(long(elem->max_width));
value = expr.get_function()(args);
} else {
value = expr.calc(scope);
}
DEBUG("format.expr", "value = (" << value << ")");
if (elem->min_width > 0)
value.print(out, static_cast<int>(elem->min_width), -1,
! elem->has_flags(ELEMENT_ALIGN_LEFT));
else
out << value.to_string();
}
catch (const calc_error&) {
string current_context = error_context();
add_error_context(_("While calculating format expression:"));
add_error_context(expr.context_to_str());
if (! current_context.empty())
add_error_context(current_context);
throw;
}
break;
}
}
if (elem->max_width > 0 || elem->min_width > 0) {
unistring temp(out.str());
string result;
if (elem->max_width > 0 && elem->max_width < temp.length()) {
result = truncate(temp, elem->max_width);
} else {
result = temp.extract();
if (elem->min_width > temp.length())
for (std::size_t i = 0; i < elem->min_width - temp.length(); i++)
result += " ";
}
out_str << result;
} else {
out_str << out.str();
}
}
return out_str.str();
}
string format_t::truncate(const unistring& ustr,
const std::size_t width,
const std::size_t account_abbrev_length)
{
assert(width < 4095);
const std::size_t len = ustr.length();
if (width == 0 || len <= width)
return ustr.extract();
std::ostringstream buf;
elision_style_t style = default_style;
if (account_abbrev_length > 0 && ! default_style_changed)
style = ABBREVIATE;
switch (style) {
case TRUNCATE_LEADING:
// This method truncates at the beginning.
buf << ".." << ustr.extract(len - (width - 2), width - 2);
break;
case TRUNCATE_MIDDLE:
// This method truncates in the middle.
buf << ustr.extract(0, (width - 2) / 2)
<< ".."
<< ustr.extract(len - ((width - 2) / 2 + (width - 2) % 2),
(width - 2) / 2 + (width - 2) % 2);
break;
case ABBREVIATE:
if (account_abbrev_length > 0) {
// The algorithm here is complex, but aims to preserve the most
// information in the most useful places.
//
// Consider: You have an account name like
// 'Assets:Banking:Check:Register'. This account name, which is
// 29 characters long, must be shortened to fit in 20. How would
// you shorten it?
//
// The approach taken below is to compute the difference, or 9
// characters, and then distribute this difference semi-evenly
// among first three segments of the account name, by taking
// characters until the difference is gone. Further, earlier
// segments will give up more of their share of letters than later
// segments, since the later segments usually contain more useful
// information.
// First, chop up the Unicode string into individual segments.
std::list<string> parts;
string::size_type beg = 0;
string strcopy(ustr.extract());
for (string::size_type pos = strcopy.find(':');
pos != string::npos;
beg = pos + 1, pos = strcopy.find(':', beg))
parts.push_back(string(strcopy, beg, pos - beg));
parts.push_back(string(strcopy, beg));
DEBUG("format.abbrev", "Account name: " << strcopy);
DEBUG("format.abbrev",
"Must fit a " << len << " char string in " << width << " chars");
// Figure out the lengths of all the parts. The last part is
// always displayed in full, while the former parts are
// distributed, with the latter parts being longer than the
// former, but with none shorter than account_abbrev_length.
std::list<std::size_t> lens;
#if DEBUG_ON
int index = 0;
#endif
for (std::list<string>::iterator i = parts.begin();
i != parts.end();
i++) {
std::size_t l = unistring(*i).length();
DEBUG("format.abbrev",
"Segment " << ++index << " is " << l << " chars wide");
lens.push_back(l);
}
// Determine the "overflow", or how many chars in excess we are.
std::size_t overflow = len - width;
DEBUG("format.abbrev",
"There are " << overflow << " chars of overflow");
// Walk through the first n-1 segments, and start subtracting
// letters to decrease the overflow. This is done in multiple
// passes until the overflow is gone, or we cannot reduce any
// further. The calculation to find the amount to remove is:
//
// overflow * (((len(segment) + counter) * iteration) /
// (len(string) - len(last_segment) - counter))
//
// Where:
// overflow - the amount that needs to be removed
// counter - starts at n-1 for the first segment, then
// decreases by one until it reaches 0 for the
// last segment (which is never shortened).
// This value is used to weight the shrinkage
// so that earlier segments shrink faster.
// iteration - starts at 1, increase by 1 for every
// iteration of the loop
//
// In the example above, we have this account name:
//
// Assets:Banking:Check:Register
//
// Therefore, the amount to be removed from Assets is calculated as:
//
// 9 * (((6 + 3) * 1) / (29 - 8 - 3)) = ceil(4.5) = 5
//
// However, since removing 5 chars would make the length of the
// segment shorter than the default minimum of 2, we can only
// remove 4 chars from Assets to reduce the overflow. And on it
// goes.
//
// The final result will be: As:Ban:Chec:Register
std::size_t iteration = 1;
std::size_t len_minus_last = len - lens.back();
while (overflow > 0) {
std::size_t overflow_at_start = overflow;
DEBUG("format.abbrev",
"Overflow starting at " << overflow << " chars");
#if DEBUG_ON
index = 0;
#endif
std::size_t counter = lens.size();
std::list<string>::iterator x = parts.begin();
for (std::list<std::size_t>::iterator i = lens.begin();
i != lens.end();
i++) {
if (--counter == 0 || overflow == 0)
break;
DEBUG("format.abbrev", "Overflow is " << overflow << " chars");
std::size_t adjust;
if (overflow == 1)
adjust = 1;
else
adjust = std::size_t
(std::ceil(double(overflow) *
((double(*i + counter*3) * double(iteration)) /
(double(len_minus_last) - double(counter)))));
DEBUG("format.abbrev", "Weight calc: (" << overflow
<< " * (((" << *i << " + " << counter << ") * "
<< iteration << ") / (" << len_minus_last
<< " - " << counter << ")))");
if (adjust == 0)
adjust = 1;
else if (adjust > overflow)
adjust = overflow;
DEBUG("format.abbrev", "The weighted part is " << adjust << " chars");
std::size_t slack = *i - std::min(*i, account_abbrev_length);
if (adjust > slack)
adjust = slack;
if (adjust > 0) {
DEBUG("format.abbrev",
"Reducing segment " << ++index << " by " << adjust << " chars");
while (std::isspace((*x)[*i - adjust - 1]) && adjust < *i) {
DEBUG("format.abbrev",
"Segment ends in whitespace, adjusting down");
++adjust;
}
(*i) -= adjust;
DEBUG("format.abbrev",
"Segment " << index << " is now " << *i << " chars wide");
if (adjust > overflow)
overflow = 0;
else
overflow -= adjust;
DEBUG("format.abbrev", "Overflow is now " << overflow << " chars");
}
++x;
}
DEBUG("format.abbrev",
"Overflow ending this time at " << overflow << " chars");
if (overflow == overflow_at_start)
break;
iteration++;
}
assert(parts.size() == lens.size());
std::list<string>::iterator i = parts.begin();
std::list<std::size_t>::iterator l = lens.begin();
std::ostringstream result;
for (; i != parts.end() && l != lens.end(); i++, l++) {
std::list<string>::iterator x = i;
if (++x == parts.end()) {
result << *i;
break;
}
unistring temp(*i);
if (temp.length() > *l)
result << temp.extract(0, *l) << ":";
else
result << *i << ":";
}
if (overflow > 0) {
// Even abbreviated its too big to show the last account, so
// abbreviate all but the last and truncate at the beginning.
unistring temp(result.str());
assert(temp.length() > width - 2);
buf << ".." << temp.extract(temp.length() - (width - 2), width - 2);
} else {
buf << result.str();
}
break;
}
// fall through...
case TRUNCATE_TRAILING:
// This method truncates at the end (the default).
buf << ustr.extract(0, width - 2) << "..";
break;
}
return buf.str();
}
} // namespace ledger