Properly handle UTF-8 characters in commodity strings.
This commit is contained in:
parent
9c9320bc58
commit
6f2e3b8864
11 changed files with 136 additions and 123 deletions
|
|
@ -84,6 +84,7 @@ pkginclude_HEADERS = \
|
|||
src/mask.h \
|
||||
src/stream.h \
|
||||
src/pstream.h \
|
||||
src/unistring.h \
|
||||
\
|
||||
src/amount.h \
|
||||
src/commodity.h \
|
||||
|
|
|
|||
|
|
@ -1,42 +1,42 @@
|
|||
N $
|
||||
|
||||
= account =~ /^Expenses:Books/
|
||||
(Liabilities:Taxes) -0.10
|
||||
(Liabilities:Taxes) -0.10
|
||||
|
||||
~ Monthly
|
||||
Assets:Bank:Checking $500.00
|
||||
Income:Salary
|
||||
Assets:Bank:Checking $500.00
|
||||
Income:Salary
|
||||
|
||||
2004/05/01 * Checking balance
|
||||
Assets:Bank:Checking $1,000.00
|
||||
Equity:Opening Balances
|
||||
Assets:Bank:Checking $1,000.00
|
||||
Equity:Opening Balances
|
||||
|
||||
2004/05/03=2004/05/01 * Investment balance
|
||||
Assets:Brokerage 50 AAPL @ $30.00
|
||||
Equity:Opening Balances
|
||||
Assets:Brokerage 50 AAPL @ $30.00
|
||||
Equity:Opening Balances
|
||||
|
||||
2004/05/14 * Páy dày
|
||||
Assets:Bank:Checking $500.00
|
||||
Income:Salary
|
||||
Assets:Bank:Checking 500.00€
|
||||
Income:Salary
|
||||
|
||||
2004/05/14 * Another dày in which there is Páying
|
||||
Asséts:Bánk:Chécking:Asséts:Bánk:Chécking $500.00
|
||||
Income:Salary
|
||||
Asséts:Bánk:Chécking:Asséts:Bánk:Chécking $500.00
|
||||
Income:Salary
|
||||
|
||||
2004/05/14 * Another dày in which there is Páying
|
||||
Русский язык:Русский язык:Русский язык:Русский язык $1000.00
|
||||
Income:Salary
|
||||
Русский язык:Русский язык:Русский язык:Русский язык $1000.00
|
||||
Income:Salary
|
||||
|
||||
2004/05/27 Book Store
|
||||
Expenses:Books $20.00
|
||||
Liabilities:MasterCard
|
||||
Expenses:Books $20.00
|
||||
Liabilities:MasterCard
|
||||
|
||||
2004/05/27 (100) Credit card company
|
||||
; This is an entry note!
|
||||
; Sample: Value
|
||||
Liabilities:MasterCard $20.00
|
||||
; This is a transaction note!
|
||||
; Sample: Another Value
|
||||
; :MyTag:
|
||||
Assets:Bank:Checking
|
||||
; :AnotherTag:
|
||||
; This is an entry note!
|
||||
; Sample: Value
|
||||
Liabilities:MasterCard $20.00
|
||||
; This is a transaction note!
|
||||
; Sample: Another Value
|
||||
; :MyTag:
|
||||
Assets:Bank:Checking
|
||||
; :AnotherTag:
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
|
||||
#include "amount.h"
|
||||
#include "commodity.h"
|
||||
#include "unistring.h"
|
||||
|
||||
namespace ledger {
|
||||
|
||||
|
|
@ -1001,6 +1002,20 @@ void amount_t::print(std::ostream& _out) const
|
|||
_out << out.str();
|
||||
}
|
||||
|
||||
void amount_t::right_justify(std::ostream& out, int width) const
|
||||
{
|
||||
std::ostringstream buf;
|
||||
buf << *this;
|
||||
|
||||
unistring temp(buf.str());
|
||||
|
||||
int spacing = width - int(temp.length());
|
||||
while (spacing-- > 0)
|
||||
out << ' ';
|
||||
|
||||
out << temp.extract();
|
||||
}
|
||||
|
||||
bool amount_t::valid() const
|
||||
{
|
||||
if (quantity) {
|
||||
|
|
|
|||
|
|
@ -640,6 +640,7 @@ public:
|
|||
of its commodity's display precision.
|
||||
*/
|
||||
void print(std::ostream& out) const;
|
||||
void right_justify(std::ostream& out, int width) const;
|
||||
|
||||
/*@}*/
|
||||
|
||||
|
|
|
|||
|
|
@ -240,17 +240,11 @@ void balance_t::print(std::ostream& out,
|
|||
first = false;
|
||||
width = first_width;
|
||||
}
|
||||
|
||||
out.width(width);
|
||||
out.fill(' ');
|
||||
out << std::right << *amount;
|
||||
amount->right_justify(out, width);
|
||||
}
|
||||
|
||||
if (first) {
|
||||
out.width(first_width);
|
||||
out.fill(' ');
|
||||
out << std::right << "0";
|
||||
}
|
||||
if (first)
|
||||
amount_t(0L).right_justify(out, first_width);
|
||||
}
|
||||
|
||||
} // namespace ledger
|
||||
|
|
|
|||
|
|
@ -571,7 +571,55 @@ void commodity_t::parse_symbol(std::istream& in, string& symbol)
|
|||
else
|
||||
throw_(amount_error, "Quoted commodity symbol lacks closing quote");
|
||||
} else {
|
||||
READ_INTO(in, buf, 255, c, ! invalid_chars[static_cast<unsigned char>(c)]);
|
||||
char * _p = buf;
|
||||
c = in.peek();
|
||||
while (_p - buf < 255 && in.good() && ! in.eof() && c != '\n') {
|
||||
int bytes = 0;
|
||||
int size = _p - buf;
|
||||
|
||||
unsigned char d = c;
|
||||
|
||||
// Check for the start of a UTF-8 multi-byte encoded string
|
||||
if (d >= 192 && d <= 223 && size < 254)
|
||||
bytes = 2;
|
||||
else if (d >= 224 && d <= 239 && size < 253)
|
||||
bytes = 3;
|
||||
else if (d >= 240 && d <= 247 && size < 252)
|
||||
bytes = 4;
|
||||
else if (d >= 248 && d <= 251 && size < 251)
|
||||
bytes = 5;
|
||||
else if (d >= 252 && d <= 253 && size < 250)
|
||||
bytes = 6;
|
||||
else if (d >= 254) // UTF-8 encoding error
|
||||
break;
|
||||
|
||||
if (bytes > 0) { // we're looking at a UTF-8 encoding
|
||||
for (int i = 0; i < bytes; i++) {
|
||||
in.get(c);
|
||||
if (in.bad() || in.eof())
|
||||
break;
|
||||
*_p++ = c;
|
||||
}
|
||||
}
|
||||
else if (invalid_chars[static_cast<unsigned char>(c)]) {
|
||||
break;
|
||||
}
|
||||
else {
|
||||
in.get(c);
|
||||
if (in.eof())
|
||||
break;
|
||||
if (c == '\\') {
|
||||
in.get(c);
|
||||
if (in.eof())
|
||||
break;
|
||||
}
|
||||
*_p++ = c;
|
||||
}
|
||||
|
||||
c = in.peek();
|
||||
}
|
||||
*_p = '\0';
|
||||
|
||||
if (is_reserved_token(buf))
|
||||
buf[0] = '\0';
|
||||
}
|
||||
|
|
|
|||
48
src/format.h
48
src/format.h
|
|
@ -47,58 +47,12 @@
|
|||
#define _FORMAT_H
|
||||
|
||||
#include "expr.h"
|
||||
#include "unistring.h"
|
||||
|
||||
namespace ledger {
|
||||
|
||||
DECLARE_EXCEPTION(format_error, std::runtime_error);
|
||||
|
||||
#if defined(SUPPORT_UNICODE)
|
||||
|
||||
/**
|
||||
* @class unistring
|
||||
*
|
||||
* @brief Abstract working with UTF-32 encoded Unicode strings
|
||||
*
|
||||
* The input to the string is a UTF8 encoded ledger::string, which can
|
||||
* then have its true length be taken, or characters extracted.
|
||||
*/
|
||||
class unistring
|
||||
{
|
||||
std::vector<uint32_t> utf32chars;
|
||||
|
||||
public:
|
||||
unistring(const string& input)
|
||||
{
|
||||
TRACE_CTOR(unistring, "");
|
||||
|
||||
const char * p = input.c_str();
|
||||
std::size_t len = input.length();
|
||||
|
||||
VERIFY(utf8::is_valid(p, p + len));
|
||||
|
||||
utf8::utf8to32(p, p + len, std::back_inserter(utf32chars));
|
||||
}
|
||||
~unistring() {
|
||||
TRACE_DTOR(unistring);
|
||||
}
|
||||
|
||||
std::size_t length() const {
|
||||
return utf32chars.size();
|
||||
}
|
||||
|
||||
string extract(const std::size_t begin = 0,
|
||||
const std::size_t len = 0) const
|
||||
{
|
||||
string utf8result;
|
||||
utf8::utf32to8(utf32chars.begin() + begin,
|
||||
utf32chars.begin() + begin + (len ? len : length()),
|
||||
std::back_inserter(utf8result));
|
||||
return utf8result;
|
||||
}
|
||||
};
|
||||
|
||||
#endif // SUPPORT_UNICODE
|
||||
|
||||
class report_t;
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -68,7 +68,8 @@ report_t::report_t(session_t& _session) : session(_session)
|
|||
" %12(amount)%(comment | \"\")\n%/\n");
|
||||
|
||||
HANDLER(balance_format_).on(
|
||||
"%20(strip(display_total)) %(depth_spacer)%-(partial_account)\n");
|
||||
"%20(print_balance(strip(display_total), 20))"
|
||||
" %(depth_spacer)%-(partial_account)\n");
|
||||
|
||||
HANDLER(equity_format_).on("\n%D %Y%C%P\n%/ %-34W %12t\n");
|
||||
|
||||
|
|
@ -191,7 +192,7 @@ value_t report_t::fn_print_balance(call_scope_t& args)
|
|||
|
||||
std::ostringstream out;
|
||||
args[0].strip_annotations(what_to_keep())
|
||||
.print(out, *first_width, *latter_width,
|
||||
.print(out, *first_width, latter_width ? *latter_width : -1,
|
||||
HANDLED(date_format_) ?
|
||||
HANDLER(date_format_).str() : optional<string>());
|
||||
|
||||
|
|
|
|||
|
|
@ -138,10 +138,7 @@ typedef std::ostream::pos_type ostream_pos_type;
|
|||
#include <gmp.h>
|
||||
#include <mpfr.h>
|
||||
#include "sha1.h"
|
||||
#define SUPPORT_UNICODE 1
|
||||
#if defined(SUPPORT_UNICODE)
|
||||
#include "utf8.h"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_LIBEDIT
|
||||
#include <editline/readline.h>
|
||||
|
|
|
|||
76
src/utils.h
76
src/utils.h
|
|
@ -546,50 +546,52 @@ inline char * next_element(char * buf, bool variable = false) {
|
|||
|
||||
inline char peek_next_nonws(std::istream& in) {
|
||||
char c = in.peek();
|
||||
while (! in.eof() && std::isspace(c)) {
|
||||
while (in.good() && ! in.eof() && std::isspace(c)) {
|
||||
in.get(c);
|
||||
c = in.peek();
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
#define READ_INTO(str, targ, size, var, cond) { \
|
||||
char * _p = targ; \
|
||||
var = str.peek(); \
|
||||
while (! str.eof() && var != '\n' && (cond) && _p - targ < size) { \
|
||||
str.get(var); \
|
||||
if (str.eof()) \
|
||||
break; \
|
||||
if (var == '\\') { \
|
||||
str.get(var); \
|
||||
if (in.eof()) \
|
||||
break; \
|
||||
} \
|
||||
*_p++ = var; \
|
||||
var = str.peek(); \
|
||||
} \
|
||||
*_p = '\0'; \
|
||||
}
|
||||
#define READ_INTO(str, targ, size, var, cond) { \
|
||||
char * _p = targ; \
|
||||
var = str.peek(); \
|
||||
while (str.good() && ! str.eof() && var != '\n' && \
|
||||
(cond) && _p - targ < size) { \
|
||||
str.get(var); \
|
||||
if (str.eof()) \
|
||||
break; \
|
||||
if (var == '\\') { \
|
||||
str.get(var); \
|
||||
if (in.eof()) \
|
||||
break; \
|
||||
} \
|
||||
*_p++ = var; \
|
||||
var = str.peek(); \
|
||||
} \
|
||||
*_p = '\0'; \
|
||||
}
|
||||
|
||||
#define READ_INTO_(str, targ, size, var, idx, cond) { \
|
||||
char * _p = targ; \
|
||||
var = str.peek(); \
|
||||
while (! str.eof() && var != '\n' && (cond) && _p - targ < size) { \
|
||||
str.get(var); \
|
||||
if (str.eof()) \
|
||||
break; \
|
||||
idx++; \
|
||||
if (var == '\\') { \
|
||||
str.get(var); \
|
||||
if (in.eof()) \
|
||||
break; \
|
||||
idx++; \
|
||||
} \
|
||||
*_p++ = var; \
|
||||
var = str.peek(); \
|
||||
} \
|
||||
*_p = '\0'; \
|
||||
}
|
||||
#define READ_INTO_(str, targ, size, var, idx, cond) { \
|
||||
char * _p = targ; \
|
||||
var = str.peek(); \
|
||||
while (str.good() && ! str.eof() && var != '\n' && \
|
||||
(cond) && _p - targ < size) { \
|
||||
str.get(var); \
|
||||
if (str.eof()) \
|
||||
break; \
|
||||
idx++; \
|
||||
if (var == '\\') { \
|
||||
str.get(var); \
|
||||
if (in.eof()) \
|
||||
break; \
|
||||
idx++; \
|
||||
} \
|
||||
*_p++ = var; \
|
||||
var = str.peek(); \
|
||||
} \
|
||||
*_p = '\0'; \
|
||||
}
|
||||
|
||||
} // namespace ledger
|
||||
|
||||
|
|
|
|||
|
|
@ -1271,7 +1271,7 @@ void value_t::print(std::ostream& out,
|
|||
break;
|
||||
|
||||
case AMOUNT:
|
||||
out << as_amount();
|
||||
as_amount().right_justify(out, first_width);
|
||||
break;
|
||||
|
||||
case STRING:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue