Properly handle UTF-8 characters in commodity strings.
This commit is contained in:
parent
9c9320bc58
commit
6f2e3b8864
11 changed files with 136 additions and 123 deletions
|
|
@ -84,6 +84,7 @@ pkginclude_HEADERS = \
|
||||||
src/mask.h \
|
src/mask.h \
|
||||||
src/stream.h \
|
src/stream.h \
|
||||||
src/pstream.h \
|
src/pstream.h \
|
||||||
|
src/unistring.h \
|
||||||
\
|
\
|
||||||
src/amount.h \
|
src/amount.h \
|
||||||
src/commodity.h \
|
src/commodity.h \
|
||||||
|
|
|
||||||
|
|
@ -1,42 +1,42 @@
|
||||||
N $
|
N $
|
||||||
|
|
||||||
= account =~ /^Expenses:Books/
|
= account =~ /^Expenses:Books/
|
||||||
(Liabilities:Taxes) -0.10
|
(Liabilities:Taxes) -0.10
|
||||||
|
|
||||||
~ Monthly
|
~ Monthly
|
||||||
Assets:Bank:Checking $500.00
|
Assets:Bank:Checking $500.00
|
||||||
Income:Salary
|
Income:Salary
|
||||||
|
|
||||||
2004/05/01 * Checking balance
|
2004/05/01 * Checking balance
|
||||||
Assets:Bank:Checking $1,000.00
|
Assets:Bank:Checking $1,000.00
|
||||||
Equity:Opening Balances
|
Equity:Opening Balances
|
||||||
|
|
||||||
2004/05/03=2004/05/01 * Investment balance
|
2004/05/03=2004/05/01 * Investment balance
|
||||||
Assets:Brokerage 50 AAPL @ $30.00
|
Assets:Brokerage 50 AAPL @ $30.00
|
||||||
Equity:Opening Balances
|
Equity:Opening Balances
|
||||||
|
|
||||||
2004/05/14 * Páy dày
|
2004/05/14 * Páy dày
|
||||||
Assets:Bank:Checking $500.00
|
Assets:Bank:Checking 500.00€
|
||||||
Income:Salary
|
Income:Salary
|
||||||
|
|
||||||
2004/05/14 * Another dày in which there is Páying
|
2004/05/14 * Another dày in which there is Páying
|
||||||
Asséts:Bánk:Chécking:Asséts:Bánk:Chécking $500.00
|
Asséts:Bánk:Chécking:Asséts:Bánk:Chécking $500.00
|
||||||
Income:Salary
|
Income:Salary
|
||||||
|
|
||||||
2004/05/14 * Another dày in which there is Páying
|
2004/05/14 * Another dày in which there is Páying
|
||||||
Русский язык:Русский язык:Русский язык:Русский язык $1000.00
|
Русский язык:Русский язык:Русский язык:Русский язык $1000.00
|
||||||
Income:Salary
|
Income:Salary
|
||||||
|
|
||||||
2004/05/27 Book Store
|
2004/05/27 Book Store
|
||||||
Expenses:Books $20.00
|
Expenses:Books $20.00
|
||||||
Liabilities:MasterCard
|
Liabilities:MasterCard
|
||||||
|
|
||||||
2004/05/27 (100) Credit card company
|
2004/05/27 (100) Credit card company
|
||||||
; This is an entry note!
|
; This is an entry note!
|
||||||
; Sample: Value
|
; Sample: Value
|
||||||
Liabilities:MasterCard $20.00
|
Liabilities:MasterCard $20.00
|
||||||
; This is a transaction note!
|
; This is a transaction note!
|
||||||
; Sample: Another Value
|
; Sample: Another Value
|
||||||
; :MyTag:
|
; :MyTag:
|
||||||
Assets:Bank:Checking
|
Assets:Bank:Checking
|
||||||
; :AnotherTag:
|
; :AnotherTag:
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,7 @@
|
||||||
|
|
||||||
#include "amount.h"
|
#include "amount.h"
|
||||||
#include "commodity.h"
|
#include "commodity.h"
|
||||||
|
#include "unistring.h"
|
||||||
|
|
||||||
namespace ledger {
|
namespace ledger {
|
||||||
|
|
||||||
|
|
@ -1001,6 +1002,20 @@ void amount_t::print(std::ostream& _out) const
|
||||||
_out << out.str();
|
_out << out.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void amount_t::right_justify(std::ostream& out, int width) const
|
||||||
|
{
|
||||||
|
std::ostringstream buf;
|
||||||
|
buf << *this;
|
||||||
|
|
||||||
|
unistring temp(buf.str());
|
||||||
|
|
||||||
|
int spacing = width - int(temp.length());
|
||||||
|
while (spacing-- > 0)
|
||||||
|
out << ' ';
|
||||||
|
|
||||||
|
out << temp.extract();
|
||||||
|
}
|
||||||
|
|
||||||
bool amount_t::valid() const
|
bool amount_t::valid() const
|
||||||
{
|
{
|
||||||
if (quantity) {
|
if (quantity) {
|
||||||
|
|
|
||||||
|
|
@ -640,6 +640,7 @@ public:
|
||||||
of its commodity's display precision.
|
of its commodity's display precision.
|
||||||
*/
|
*/
|
||||||
void print(std::ostream& out) const;
|
void print(std::ostream& out) const;
|
||||||
|
void right_justify(std::ostream& out, int width) const;
|
||||||
|
|
||||||
/*@}*/
|
/*@}*/
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -240,17 +240,11 @@ void balance_t::print(std::ostream& out,
|
||||||
first = false;
|
first = false;
|
||||||
width = first_width;
|
width = first_width;
|
||||||
}
|
}
|
||||||
|
amount->right_justify(out, width);
|
||||||
out.width(width);
|
|
||||||
out.fill(' ');
|
|
||||||
out << std::right << *amount;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (first) {
|
if (first)
|
||||||
out.width(first_width);
|
amount_t(0L).right_justify(out, first_width);
|
||||||
out.fill(' ');
|
|
||||||
out << std::right << "0";
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace ledger
|
} // namespace ledger
|
||||||
|
|
|
||||||
|
|
@ -571,7 +571,55 @@ void commodity_t::parse_symbol(std::istream& in, string& symbol)
|
||||||
else
|
else
|
||||||
throw_(amount_error, "Quoted commodity symbol lacks closing quote");
|
throw_(amount_error, "Quoted commodity symbol lacks closing quote");
|
||||||
} else {
|
} else {
|
||||||
READ_INTO(in, buf, 255, c, ! invalid_chars[static_cast<unsigned char>(c)]);
|
char * _p = buf;
|
||||||
|
c = in.peek();
|
||||||
|
while (_p - buf < 255 && in.good() && ! in.eof() && c != '\n') {
|
||||||
|
int bytes = 0;
|
||||||
|
int size = _p - buf;
|
||||||
|
|
||||||
|
unsigned char d = c;
|
||||||
|
|
||||||
|
// Check for the start of a UTF-8 multi-byte encoded string
|
||||||
|
if (d >= 192 && d <= 223 && size < 254)
|
||||||
|
bytes = 2;
|
||||||
|
else if (d >= 224 && d <= 239 && size < 253)
|
||||||
|
bytes = 3;
|
||||||
|
else if (d >= 240 && d <= 247 && size < 252)
|
||||||
|
bytes = 4;
|
||||||
|
else if (d >= 248 && d <= 251 && size < 251)
|
||||||
|
bytes = 5;
|
||||||
|
else if (d >= 252 && d <= 253 && size < 250)
|
||||||
|
bytes = 6;
|
||||||
|
else if (d >= 254) // UTF-8 encoding error
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (bytes > 0) { // we're looking at a UTF-8 encoding
|
||||||
|
for (int i = 0; i < bytes; i++) {
|
||||||
|
in.get(c);
|
||||||
|
if (in.bad() || in.eof())
|
||||||
|
break;
|
||||||
|
*_p++ = c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (invalid_chars[static_cast<unsigned char>(c)]) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
in.get(c);
|
||||||
|
if (in.eof())
|
||||||
|
break;
|
||||||
|
if (c == '\\') {
|
||||||
|
in.get(c);
|
||||||
|
if (in.eof())
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
*_p++ = c;
|
||||||
|
}
|
||||||
|
|
||||||
|
c = in.peek();
|
||||||
|
}
|
||||||
|
*_p = '\0';
|
||||||
|
|
||||||
if (is_reserved_token(buf))
|
if (is_reserved_token(buf))
|
||||||
buf[0] = '\0';
|
buf[0] = '\0';
|
||||||
}
|
}
|
||||||
|
|
|
||||||
48
src/format.h
48
src/format.h
|
|
@ -47,58 +47,12 @@
|
||||||
#define _FORMAT_H
|
#define _FORMAT_H
|
||||||
|
|
||||||
#include "expr.h"
|
#include "expr.h"
|
||||||
|
#include "unistring.h"
|
||||||
|
|
||||||
namespace ledger {
|
namespace ledger {
|
||||||
|
|
||||||
DECLARE_EXCEPTION(format_error, std::runtime_error);
|
DECLARE_EXCEPTION(format_error, std::runtime_error);
|
||||||
|
|
||||||
#if defined(SUPPORT_UNICODE)
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @class unistring
|
|
||||||
*
|
|
||||||
* @brief Abstract working with UTF-32 encoded Unicode strings
|
|
||||||
*
|
|
||||||
* The input to the string is a UTF8 encoded ledger::string, which can
|
|
||||||
* then have its true length be taken, or characters extracted.
|
|
||||||
*/
|
|
||||||
class unistring
|
|
||||||
{
|
|
||||||
std::vector<uint32_t> utf32chars;
|
|
||||||
|
|
||||||
public:
|
|
||||||
unistring(const string& input)
|
|
||||||
{
|
|
||||||
TRACE_CTOR(unistring, "");
|
|
||||||
|
|
||||||
const char * p = input.c_str();
|
|
||||||
std::size_t len = input.length();
|
|
||||||
|
|
||||||
VERIFY(utf8::is_valid(p, p + len));
|
|
||||||
|
|
||||||
utf8::utf8to32(p, p + len, std::back_inserter(utf32chars));
|
|
||||||
}
|
|
||||||
~unistring() {
|
|
||||||
TRACE_DTOR(unistring);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::size_t length() const {
|
|
||||||
return utf32chars.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
string extract(const std::size_t begin = 0,
|
|
||||||
const std::size_t len = 0) const
|
|
||||||
{
|
|
||||||
string utf8result;
|
|
||||||
utf8::utf32to8(utf32chars.begin() + begin,
|
|
||||||
utf32chars.begin() + begin + (len ? len : length()),
|
|
||||||
std::back_inserter(utf8result));
|
|
||||||
return utf8result;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif // SUPPORT_UNICODE
|
|
||||||
|
|
||||||
class report_t;
|
class report_t;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -68,7 +68,8 @@ report_t::report_t(session_t& _session) : session(_session)
|
||||||
" %12(amount)%(comment | \"\")\n%/\n");
|
" %12(amount)%(comment | \"\")\n%/\n");
|
||||||
|
|
||||||
HANDLER(balance_format_).on(
|
HANDLER(balance_format_).on(
|
||||||
"%20(strip(display_total)) %(depth_spacer)%-(partial_account)\n");
|
"%20(print_balance(strip(display_total), 20))"
|
||||||
|
" %(depth_spacer)%-(partial_account)\n");
|
||||||
|
|
||||||
HANDLER(equity_format_).on("\n%D %Y%C%P\n%/ %-34W %12t\n");
|
HANDLER(equity_format_).on("\n%D %Y%C%P\n%/ %-34W %12t\n");
|
||||||
|
|
||||||
|
|
@ -191,7 +192,7 @@ value_t report_t::fn_print_balance(call_scope_t& args)
|
||||||
|
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
args[0].strip_annotations(what_to_keep())
|
args[0].strip_annotations(what_to_keep())
|
||||||
.print(out, *first_width, *latter_width,
|
.print(out, *first_width, latter_width ? *latter_width : -1,
|
||||||
HANDLED(date_format_) ?
|
HANDLED(date_format_) ?
|
||||||
HANDLER(date_format_).str() : optional<string>());
|
HANDLER(date_format_).str() : optional<string>());
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -138,10 +138,7 @@ typedef std::ostream::pos_type ostream_pos_type;
|
||||||
#include <gmp.h>
|
#include <gmp.h>
|
||||||
#include <mpfr.h>
|
#include <mpfr.h>
|
||||||
#include "sha1.h"
|
#include "sha1.h"
|
||||||
#define SUPPORT_UNICODE 1
|
|
||||||
#if defined(SUPPORT_UNICODE)
|
|
||||||
#include "utf8.h"
|
#include "utf8.h"
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef HAVE_LIBEDIT
|
#ifdef HAVE_LIBEDIT
|
||||||
#include <editline/readline.h>
|
#include <editline/readline.h>
|
||||||
|
|
|
||||||
76
src/utils.h
76
src/utils.h
|
|
@ -546,50 +546,52 @@ inline char * next_element(char * buf, bool variable = false) {
|
||||||
|
|
||||||
inline char peek_next_nonws(std::istream& in) {
|
inline char peek_next_nonws(std::istream& in) {
|
||||||
char c = in.peek();
|
char c = in.peek();
|
||||||
while (! in.eof() && std::isspace(c)) {
|
while (in.good() && ! in.eof() && std::isspace(c)) {
|
||||||
in.get(c);
|
in.get(c);
|
||||||
c = in.peek();
|
c = in.peek();
|
||||||
}
|
}
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define READ_INTO(str, targ, size, var, cond) { \
|
#define READ_INTO(str, targ, size, var, cond) { \
|
||||||
char * _p = targ; \
|
char * _p = targ; \
|
||||||
var = str.peek(); \
|
var = str.peek(); \
|
||||||
while (! str.eof() && var != '\n' && (cond) && _p - targ < size) { \
|
while (str.good() && ! str.eof() && var != '\n' && \
|
||||||
str.get(var); \
|
(cond) && _p - targ < size) { \
|
||||||
if (str.eof()) \
|
str.get(var); \
|
||||||
break; \
|
if (str.eof()) \
|
||||||
if (var == '\\') { \
|
break; \
|
||||||
str.get(var); \
|
if (var == '\\') { \
|
||||||
if (in.eof()) \
|
str.get(var); \
|
||||||
break; \
|
if (in.eof()) \
|
||||||
} \
|
break; \
|
||||||
*_p++ = var; \
|
} \
|
||||||
var = str.peek(); \
|
*_p++ = var; \
|
||||||
} \
|
var = str.peek(); \
|
||||||
*_p = '\0'; \
|
} \
|
||||||
}
|
*_p = '\0'; \
|
||||||
|
}
|
||||||
|
|
||||||
#define READ_INTO_(str, targ, size, var, idx, cond) { \
|
#define READ_INTO_(str, targ, size, var, idx, cond) { \
|
||||||
char * _p = targ; \
|
char * _p = targ; \
|
||||||
var = str.peek(); \
|
var = str.peek(); \
|
||||||
while (! str.eof() && var != '\n' && (cond) && _p - targ < size) { \
|
while (str.good() && ! str.eof() && var != '\n' && \
|
||||||
str.get(var); \
|
(cond) && _p - targ < size) { \
|
||||||
if (str.eof()) \
|
str.get(var); \
|
||||||
break; \
|
if (str.eof()) \
|
||||||
idx++; \
|
break; \
|
||||||
if (var == '\\') { \
|
idx++; \
|
||||||
str.get(var); \
|
if (var == '\\') { \
|
||||||
if (in.eof()) \
|
str.get(var); \
|
||||||
break; \
|
if (in.eof()) \
|
||||||
idx++; \
|
break; \
|
||||||
} \
|
idx++; \
|
||||||
*_p++ = var; \
|
} \
|
||||||
var = str.peek(); \
|
*_p++ = var; \
|
||||||
} \
|
var = str.peek(); \
|
||||||
*_p = '\0'; \
|
} \
|
||||||
}
|
*_p = '\0'; \
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ledger
|
} // namespace ledger
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1271,7 +1271,7 @@ void value_t::print(std::ostream& out,
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case AMOUNT:
|
case AMOUNT:
|
||||||
out << as_amount();
|
as_amount().right_justify(out, first_width);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case STRING:
|
case STRING:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue