Properly handle UTF-8 characters in commodity strings.

This commit is contained in:
John Wiegley 2009-02-12 02:34:39 -04:00
parent 9c9320bc58
commit 6f2e3b8864
11 changed files with 136 additions and 123 deletions

View file

@ -84,6 +84,7 @@ pkginclude_HEADERS = \
src/mask.h \
src/stream.h \
src/pstream.h \
src/unistring.h \
\
src/amount.h \
src/commodity.h \

View file

@ -1,42 +1,42 @@
N $
= account =~ /^Expenses:Books/
(Liabilities:Taxes) -0.10
(Liabilities:Taxes) -0.10
~ Monthly
Assets:Bank:Checking $500.00
Income:Salary
Assets:Bank:Checking $500.00
Income:Salary
2004/05/01 * Checking balance
Assets:Bank:Checking $1,000.00
Equity:Opening Balances
Assets:Bank:Checking $1,000.00
Equity:Opening Balances
2004/05/03=2004/05/01 * Investment balance
Assets:Brokerage 50 AAPL @ $30.00
Equity:Opening Balances
Assets:Brokerage 50 AAPL @ $30.00
Equity:Opening Balances
2004/05/14 * Páy dày
Assets:Bank:Checking $500.00
Income:Salary
Assets:Bank:Checking 500.00
Income:Salary
2004/05/14 * Another dày in which there is Páying
Asséts:Bánk:Chécking:Asséts:Bánk:Chécking $500.00
Income:Salary
Asséts:Bánk:Chécking:Asséts:Bánk:Chécking $500.00
Income:Salary
2004/05/14 * Another dày in which there is Páying
Русский язык:Русский язык:Русский язык:Русский язык $1000.00
Income:Salary
Русский язык:Русский язык:Русский язык:Русский язык $1000.00
Income:Salary
2004/05/27 Book Store
Expenses:Books $20.00
Liabilities:MasterCard
Expenses:Books $20.00
Liabilities:MasterCard
2004/05/27 (100) Credit card company
; This is an entry note!
; Sample: Value
Liabilities:MasterCard $20.00
; This is a transaction note!
; Sample: Another Value
; :MyTag:
Assets:Bank:Checking
; :AnotherTag:
; This is an entry note!
; Sample: Value
Liabilities:MasterCard $20.00
; This is a transaction note!
; Sample: Another Value
; :MyTag:
Assets:Bank:Checking
; :AnotherTag:

View file

@ -31,6 +31,7 @@
#include "amount.h"
#include "commodity.h"
#include "unistring.h"
namespace ledger {
@ -1001,6 +1002,20 @@ void amount_t::print(std::ostream& _out) const
_out << out.str();
}
void amount_t::right_justify(std::ostream& out, int width) const
{
std::ostringstream buf;
buf << *this;
unistring temp(buf.str());
int spacing = width - int(temp.length());
while (spacing-- > 0)
out << ' ';
out << temp.extract();
}
bool amount_t::valid() const
{
if (quantity) {

View file

@ -640,6 +640,7 @@ public:
of its commodity's display precision.
*/
void print(std::ostream& out) const;
void right_justify(std::ostream& out, int width) const;
/*@}*/

View file

@ -240,17 +240,11 @@ void balance_t::print(std::ostream& out,
first = false;
width = first_width;
}
out.width(width);
out.fill(' ');
out << std::right << *amount;
amount->right_justify(out, width);
}
if (first) {
out.width(first_width);
out.fill(' ');
out << std::right << "0";
}
if (first)
amount_t(0L).right_justify(out, first_width);
}
} // namespace ledger

View file

@ -571,7 +571,55 @@ void commodity_t::parse_symbol(std::istream& in, string& symbol)
else
throw_(amount_error, "Quoted commodity symbol lacks closing quote");
} else {
READ_INTO(in, buf, 255, c, ! invalid_chars[static_cast<unsigned char>(c)]);
char * _p = buf;
c = in.peek();
while (_p - buf < 255 && in.good() && ! in.eof() && c != '\n') {
int bytes = 0;
int size = _p - buf;
unsigned char d = c;
// Check for the start of a UTF-8 multi-byte encoded string
if (d >= 192 && d <= 223 && size < 254)
bytes = 2;
else if (d >= 224 && d <= 239 && size < 253)
bytes = 3;
else if (d >= 240 && d <= 247 && size < 252)
bytes = 4;
else if (d >= 248 && d <= 251 && size < 251)
bytes = 5;
else if (d >= 252 && d <= 253 && size < 250)
bytes = 6;
else if (d >= 254) // UTF-8 encoding error
break;
if (bytes > 0) { // we're looking at a UTF-8 encoding
for (int i = 0; i < bytes; i++) {
in.get(c);
if (in.bad() || in.eof())
break;
*_p++ = c;
}
}
else if (invalid_chars[static_cast<unsigned char>(c)]) {
break;
}
else {
in.get(c);
if (in.eof())
break;
if (c == '\\') {
in.get(c);
if (in.eof())
break;
}
*_p++ = c;
}
c = in.peek();
}
*_p = '\0';
if (is_reserved_token(buf))
buf[0] = '\0';
}

View file

@ -47,58 +47,12 @@
#define _FORMAT_H
#include "expr.h"
#include "unistring.h"
namespace ledger {
DECLARE_EXCEPTION(format_error, std::runtime_error);
#if defined(SUPPORT_UNICODE)
/**
* @class unistring
*
* @brief Abstract working with UTF-32 encoded Unicode strings
*
* The input to the string is a UTF8 encoded ledger::string, which can
* then have its true length be taken, or characters extracted.
*/
class unistring
{
std::vector<uint32_t> utf32chars;
public:
unistring(const string& input)
{
TRACE_CTOR(unistring, "");
const char * p = input.c_str();
std::size_t len = input.length();
VERIFY(utf8::is_valid(p, p + len));
utf8::utf8to32(p, p + len, std::back_inserter(utf32chars));
}
~unistring() {
TRACE_DTOR(unistring);
}
std::size_t length() const {
return utf32chars.size();
}
string extract(const std::size_t begin = 0,
const std::size_t len = 0) const
{
string utf8result;
utf8::utf32to8(utf32chars.begin() + begin,
utf32chars.begin() + begin + (len ? len : length()),
std::back_inserter(utf8result));
return utf8result;
}
};
#endif // SUPPORT_UNICODE
class report_t;
/**

View file

@ -68,7 +68,8 @@ report_t::report_t(session_t& _session) : session(_session)
" %12(amount)%(comment | \"\")\n%/\n");
HANDLER(balance_format_).on(
"%20(strip(display_total)) %(depth_spacer)%-(partial_account)\n");
"%20(print_balance(strip(display_total), 20))"
" %(depth_spacer)%-(partial_account)\n");
HANDLER(equity_format_).on("\n%D %Y%C%P\n%/ %-34W %12t\n");
@ -191,7 +192,7 @@ value_t report_t::fn_print_balance(call_scope_t& args)
std::ostringstream out;
args[0].strip_annotations(what_to_keep())
.print(out, *first_width, *latter_width,
.print(out, *first_width, latter_width ? *latter_width : -1,
HANDLED(date_format_) ?
HANDLER(date_format_).str() : optional<string>());

View file

@ -138,10 +138,7 @@ typedef std::ostream::pos_type ostream_pos_type;
#include <gmp.h>
#include <mpfr.h>
#include "sha1.h"
#define SUPPORT_UNICODE 1
#if defined(SUPPORT_UNICODE)
#include "utf8.h"
#endif
#ifdef HAVE_LIBEDIT
#include <editline/readline.h>

View file

@ -546,50 +546,52 @@ inline char * next_element(char * buf, bool variable = false) {
inline char peek_next_nonws(std::istream& in) {
char c = in.peek();
while (! in.eof() && std::isspace(c)) {
while (in.good() && ! in.eof() && std::isspace(c)) {
in.get(c);
c = in.peek();
}
return c;
}
#define READ_INTO(str, targ, size, var, cond) { \
char * _p = targ; \
var = str.peek(); \
while (! str.eof() && var != '\n' && (cond) && _p - targ < size) { \
str.get(var); \
if (str.eof()) \
break; \
if (var == '\\') { \
str.get(var); \
if (in.eof()) \
break; \
} \
*_p++ = var; \
var = str.peek(); \
} \
*_p = '\0'; \
}
#define READ_INTO(str, targ, size, var, cond) { \
char * _p = targ; \
var = str.peek(); \
while (str.good() && ! str.eof() && var != '\n' && \
(cond) && _p - targ < size) { \
str.get(var); \
if (str.eof()) \
break; \
if (var == '\\') { \
str.get(var); \
if (in.eof()) \
break; \
} \
*_p++ = var; \
var = str.peek(); \
} \
*_p = '\0'; \
}
#define READ_INTO_(str, targ, size, var, idx, cond) { \
char * _p = targ; \
var = str.peek(); \
while (! str.eof() && var != '\n' && (cond) && _p - targ < size) { \
str.get(var); \
if (str.eof()) \
break; \
idx++; \
if (var == '\\') { \
str.get(var); \
if (in.eof()) \
break; \
idx++; \
} \
*_p++ = var; \
var = str.peek(); \
} \
*_p = '\0'; \
}
#define READ_INTO_(str, targ, size, var, idx, cond) { \
char * _p = targ; \
var = str.peek(); \
while (str.good() && ! str.eof() && var != '\n' && \
(cond) && _p - targ < size) { \
str.get(var); \
if (str.eof()) \
break; \
idx++; \
if (var == '\\') { \
str.get(var); \
if (in.eof()) \
break; \
idx++; \
} \
*_p++ = var; \
var = str.peek(); \
} \
*_p = '\0'; \
}
} // namespace ledger

View file

@ -1271,7 +1271,7 @@ void value_t::print(std::ostream& out,
break;
case AMOUNT:
out << as_amount();
as_amount().right_justify(out, first_width);
break;
case STRING: