The CSV reader now auto-correlates fields by regex
This commit is contained in:
parent
89992ad4b8
commit
e070cdfc8d
2 changed files with 215 additions and 57 deletions
225
src/csv.cc
225
src/csv.cc
|
|
@ -40,7 +40,7 @@
|
|||
|
||||
namespace ledger {
|
||||
|
||||
string csv_reader::read_field()
|
||||
string csv_reader::read_field(std::istream& in)
|
||||
{
|
||||
string field;
|
||||
|
||||
|
|
@ -53,6 +53,9 @@ string csv_reader::read_field()
|
|||
if (x == '\\') {
|
||||
in.get(x);
|
||||
}
|
||||
else if (x == '"' && in.peek() == '"') {
|
||||
in.get(x);
|
||||
}
|
||||
else if (x == c) {
|
||||
if (x == '|')
|
||||
in.unget();
|
||||
|
|
@ -60,28 +63,86 @@ string csv_reader::read_field()
|
|||
in.get(c);
|
||||
break;
|
||||
}
|
||||
field += x;
|
||||
if (x != '\0')
|
||||
field += x;
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
||||
while (in.good() && ! in.eof()) {
|
||||
in.get(c);
|
||||
if (c == ',')
|
||||
break;
|
||||
if (c != '\0')
|
||||
field += c;
|
||||
}
|
||||
}
|
||||
trim(field);
|
||||
return field;
|
||||
}
|
||||
|
||||
xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
|
||||
char * csv_reader::next_line(std::istream& in)
|
||||
{
|
||||
static char linebuf[MAX_LINE + 1];
|
||||
|
||||
if (! in.good() || in.eof())
|
||||
return NULL;
|
||||
|
||||
std::auto_ptr<xact_t> xact;
|
||||
|
||||
while (in.good() && ! in.eof() && in.peek() == '#')
|
||||
in.getline(linebuf, MAX_LINE);
|
||||
|
||||
xact.reset(new xact_t);
|
||||
if (! in.good() || in.eof())
|
||||
return NULL;
|
||||
|
||||
in.getline(linebuf, MAX_LINE);
|
||||
|
||||
return linebuf;
|
||||
}
|
||||
|
||||
void csv_reader::read_index(std::istream& in)
|
||||
{
|
||||
char * line = next_line(in);
|
||||
if (! line)
|
||||
return;
|
||||
|
||||
std::istringstream instr(line);
|
||||
|
||||
while (instr.good() && ! instr.eof()) {
|
||||
string field = read_field(instr);
|
||||
names.push_back(field);
|
||||
|
||||
if (date_mask.match(field))
|
||||
index.push_back(FIELD_DATE);
|
||||
else if (date_eff_mask.match(field))
|
||||
index.push_back(FIELD_DATE_EFF);
|
||||
else if (code_mask.match(field))
|
||||
index.push_back(FIELD_CODE);
|
||||
else if (payee_mask.match(field))
|
||||
index.push_back(FIELD_PAYEE);
|
||||
else if (amount_mask.match(field))
|
||||
index.push_back(FIELD_AMOUNT);
|
||||
else if (cost_mask.match(field))
|
||||
index.push_back(FIELD_COST);
|
||||
else if (total_mask.match(field))
|
||||
index.push_back(FIELD_TOTAL);
|
||||
else if (note_mask.match(field))
|
||||
index.push_back(FIELD_NOTE);
|
||||
else
|
||||
index.push_back(FIELD_UNKNOWN);
|
||||
|
||||
DEBUG("csv.parse", "Header field: " << field);
|
||||
}
|
||||
}
|
||||
|
||||
xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
|
||||
{
|
||||
restart:
|
||||
char * line = next_line(in);
|
||||
if (! line || index.empty())
|
||||
return NULL;
|
||||
|
||||
std::istringstream instr(line);
|
||||
|
||||
std::auto_ptr<xact_t> xact(new xact_t);
|
||||
std::auto_ptr<post_t> post(new post_t);
|
||||
|
||||
xact->set_state(item_t::CLEARED);
|
||||
|
||||
xact->pos = position_t();
|
||||
xact->pos->pathname = "jww (2010-03-05): unknown";
|
||||
|
|
@ -89,36 +150,6 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
|
|||
xact->pos->beg_line = 0;
|
||||
xact->pos->sequence = 0;
|
||||
|
||||
string date = read_field(); trim(date);
|
||||
string code = read_field(); trim(code);
|
||||
string payee = read_field(); trim(payee);
|
||||
|
||||
if (date.empty())
|
||||
return NULL;
|
||||
|
||||
xact->set_state(item_t::CLEARED);
|
||||
xact->_date = parse_date(date);
|
||||
if (! code.empty())
|
||||
xact->code = code;
|
||||
|
||||
bool found = false;
|
||||
foreach (payee_mapping_t& value, journal.payee_mappings) {
|
||||
DEBUG("csv.mappings", "Looking for payee mapping: " << value.first);
|
||||
if (value.first.match(payee)) {
|
||||
xact->payee = value.second;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (! found)
|
||||
xact->payee = payee;
|
||||
|
||||
string amount = read_field(); trim(amount);
|
||||
string total = read_field(); trim(total);
|
||||
in.getline(linebuf, MAX_LINE); // skip to the next line
|
||||
|
||||
std::auto_ptr<post_t> post(new post_t);
|
||||
|
||||
post->xact = xact.get();
|
||||
|
||||
#if 0
|
||||
|
|
@ -130,7 +161,96 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
|
|||
#endif
|
||||
|
||||
post->set_state(item_t::CLEARED);
|
||||
post->account = journal.master->find_account(_("Expenses:Unknown"));
|
||||
post->account = NULL;
|
||||
|
||||
int n = 0;
|
||||
amount_t amt;
|
||||
string total;
|
||||
|
||||
while (instr.good() && ! instr.eof()) {
|
||||
string field = read_field(instr);
|
||||
|
||||
switch (index[n]) {
|
||||
case FIELD_DATE:
|
||||
if (field.empty())
|
||||
goto restart;
|
||||
try {
|
||||
xact->_date = parse_date(field);
|
||||
}
|
||||
catch (date_error&) {
|
||||
goto restart;
|
||||
}
|
||||
break;
|
||||
|
||||
case FIELD_DATE_EFF:
|
||||
xact->_date_eff = parse_date(field);
|
||||
break;
|
||||
|
||||
case FIELD_CODE:
|
||||
if (! field.empty())
|
||||
xact->code = field;
|
||||
break;
|
||||
|
||||
case FIELD_PAYEE: {
|
||||
bool found = false;
|
||||
foreach (payee_mapping_t& value, journal.payee_mappings) {
|
||||
DEBUG("csv.mappings", "Looking for payee mapping: " << value.first);
|
||||
if (value.first.match(field)) {
|
||||
xact->payee = value.second;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (! found)
|
||||
xact->payee = field;
|
||||
break;
|
||||
}
|
||||
|
||||
case FIELD_AMOUNT: {
|
||||
std::istringstream amount_str(field);
|
||||
amt.parse(amount_str, PARSE_NO_REDUCE);
|
||||
if (! amt.has_commodity() &&
|
||||
commodity_pool_t::current_pool->default_commodity)
|
||||
amt.set_commodity(*commodity_pool_t::current_pool->default_commodity);
|
||||
post->amount = amt;
|
||||
break;
|
||||
}
|
||||
|
||||
case FIELD_COST: {
|
||||
std::istringstream amount_str(field);
|
||||
amt.parse(amount_str, PARSE_NO_REDUCE);
|
||||
if (! amt.has_commodity() &&
|
||||
commodity_pool_t::current_pool->default_commodity)
|
||||
amt.set_commodity
|
||||
(*commodity_pool_t::current_pool->default_commodity);
|
||||
post->cost = amt;
|
||||
break;
|
||||
}
|
||||
|
||||
case FIELD_TOTAL:
|
||||
total = field;
|
||||
break;
|
||||
|
||||
case FIELD_NOTE:
|
||||
xact->note = field;
|
||||
break;
|
||||
|
||||
case FIELD_UNKNOWN:
|
||||
if (! names[n].empty() && ! field.empty())
|
||||
xact->set_tag(names[n], field);
|
||||
break;
|
||||
}
|
||||
n++;
|
||||
}
|
||||
|
||||
#if 0
|
||||
xact->set_tag(_("Imported"),
|
||||
string(format_date(CURRENT_DATE(), FMT_WRITTEN)));
|
||||
xact->set_tag(_("Original"), string(line));
|
||||
xact->set_tag(_("SHA1"), string(sha1sum(line)));
|
||||
#endif
|
||||
|
||||
// Translate the account name, if we have enough information to do so
|
||||
|
||||
foreach (account_mapping_t& value, journal.account_mappings) {
|
||||
if (value.first.match(xact->payee)) {
|
||||
|
|
@ -139,17 +259,10 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
|
|||
}
|
||||
}
|
||||
|
||||
std::istringstream amount_str(amount);
|
||||
amount_t amt;
|
||||
amt.parse(amount_str, PARSE_NO_REDUCE);
|
||||
if (! amt.has_commodity() &&
|
||||
commodity_pool_t::current_pool->default_commodity)
|
||||
amt.set_commodity
|
||||
(*commodity_pool_t::current_pool->default_commodity);
|
||||
post->amount = amt;
|
||||
|
||||
xact->add_post(post.release());
|
||||
|
||||
// Create the "balancing post", which refers to the account for this data
|
||||
|
||||
post.reset(new post_t);
|
||||
|
||||
post->xact = xact.get();
|
||||
|
|
@ -164,13 +277,17 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
|
|||
|
||||
post->set_state(item_t::CLEARED);
|
||||
post->account = bucket;
|
||||
post->amount = - amt;
|
||||
|
||||
if (! amt.is_null())
|
||||
post->amount = - amt;
|
||||
|
||||
if (! total.empty()) {
|
||||
std::istringstream assigned_amount_str(total);
|
||||
amount_t assigned_amount;
|
||||
assigned_amount.parse(assigned_amount_str, PARSE_NO_REDUCE);
|
||||
post->assigned_amount = assigned_amount;
|
||||
amt.parse(assigned_amount_str, PARSE_NO_REDUCE);
|
||||
if (! amt.has_commodity() &&
|
||||
commodity_pool_t::current_pool->default_commodity)
|
||||
amt.set_commodity(*commodity_pool_t::current_pool->default_commodity);
|
||||
post->assigned_amount = amt;
|
||||
}
|
||||
|
||||
xact->add_post(post.release());
|
||||
|
|
|
|||
47
src/csv.h
47
src/csv.h
|
|
@ -56,10 +56,51 @@ class csv_reader
|
|||
|
||||
std::istream& in;
|
||||
|
||||
public:
|
||||
csv_reader(std::istream& _in) : in(_in) {}
|
||||
enum headers_t {
|
||||
FIELD_DATE = 0,
|
||||
FIELD_DATE_EFF,
|
||||
FIELD_CODE,
|
||||
FIELD_PAYEE,
|
||||
FIELD_AMOUNT,
|
||||
FIELD_COST,
|
||||
FIELD_TOTAL,
|
||||
FIELD_NOTE,
|
||||
|
||||
string read_field();
|
||||
FIELD_UNKNOWN
|
||||
};
|
||||
|
||||
mask_t date_mask;
|
||||
mask_t date_eff_mask;
|
||||
mask_t code_mask;
|
||||
mask_t payee_mask;
|
||||
mask_t amount_mask;
|
||||
mask_t cost_mask;
|
||||
mask_t total_mask;
|
||||
mask_t note_mask;
|
||||
|
||||
std::vector<int> index;
|
||||
std::vector<string> names;
|
||||
std::vector<string> fields;
|
||||
|
||||
typedef std::map<string, string> string_map;
|
||||
|
||||
public:
|
||||
csv_reader(std::istream& _in)
|
||||
: in(_in),
|
||||
date_mask("date"),
|
||||
date_eff_mask("posted( ?date)?"),
|
||||
code_mask("code"),
|
||||
payee_mask("(payee|desc(ription)?|title)"),
|
||||
amount_mask("amount"),
|
||||
cost_mask("cost"),
|
||||
total_mask("total"),
|
||||
note_mask("note") {
|
||||
read_index(in);
|
||||
}
|
||||
|
||||
string read_field(std::istream& in);
|
||||
char * next_line(std::istream& in);
|
||||
void read_index(std::istream& in);
|
||||
|
||||
xact_t * read_xact(journal_t& journal, account_t * bucket);
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue