Added --rich-data for 'convert', and SHA1 checksum checking

This commit is contained in:
John Wiegley 2012-02-26 15:45:15 -06:00
parent 4cf95497f9
commit aa9b07d79b
12 changed files with 172 additions and 163 deletions

View file

@ -397,6 +397,7 @@ appeared in the original journal file.
.It Fl \-revalued
.It Fl \-revalued-only
.It Fl \-revalued-total Ar EXPR
.It Fl \-rich-data
.It Fl \-seed Ar INT
.It Fl \-script
.It Fl \-sort Ar EXPR Pq Fl S

View file

@ -56,72 +56,41 @@ value_t convert_command(call_scope_t& args)
account_t * bucket = journal.master->find_account(bucket_name);
account_t * unknown = journal.master->find_account(_("Expenses:Unknown"));
// Make an amounts mapping for the account under consideration
typedef std::map<value_t, std::list<post_t *> > post_map_t;
post_map_t post_map;
xacts_iterator journal_iter(journal);
while (xact_t * xact = *journal_iter++) {
post_t * post = NULL;
xact_posts_iterator xact_iter(*xact);
while ((post = *xact_iter++) != NULL) {
if (post->account == bucket)
break;
}
if (post) {
post_map_t::iterator i = post_map.find(post->amount);
if (i == post_map.end()) {
std::list<post_t *> post_list;
post_list.push_back(post);
post_map.insert(post_map_t::value_type(post->amount, post_list));
} else {
(*i).second.push_back(post);
}
}
}
// Create a flat list
xacts_list current_xacts(journal.xacts_begin(), journal.xacts_end());
// Read in the series of transactions from the CSV file
print_xacts formatter(report);
ifstream data(path(args.get<string>(0)));
csv_reader reader(data);
path csv_file_path(args.get<string>(0));
ifstream data(csv_file_path);
csv_reader reader(data, csv_file_path);
while (xact_t * xact = reader.read_xact(journal, bucket)) {
try {
while (xact_t * xact = reader.read_xact(journal, bucket,
report.HANDLED(rich_data))) {
if (report.HANDLED(invert)) {
foreach (post_t * post, xact->posts)
post->amount.in_place_negate();
}
bool matched = false;
if (! xact->posts.front()->amount.is_null()) {
post_map_t::iterator i = post_map.find(- xact->posts.front()->amount);
if (i != post_map.end()) {
std::list<post_t *>& post_list((*i).second);
foreach (post_t * post, post_list) {
if (xact->code && post->xact->code &&
*xact->code == *post->xact->code) {
matched = true;
break;
}
else if (xact->actual_date() == post->actual_date()) {
matched = true;
break;
}
}
}
string ref = (xact->has_tag(_("SHA1")) ?
xact->get_tag(_("SHA1"))->to_string() :
sha1sum(reader.get_last_line()));
checksum_map_t::const_iterator entry = journal.checksum_map.find(ref);
if (entry != journal.checksum_map.end()) {
INFO(file_context(reader.get_pathname(),
reader.get_linenum())
<< "Ignoring known SHA1 " << ref);
checked_delete(xact); // ignore it
continue;
}
if (matched) {
DEBUG("convert.csv", "Ignored xact with code: " << *xact->code);
checked_delete(xact); // ignore it
}
else {
if (report.HANDLED(rich_data) && ! xact->has_tag(_("SHA1")))
xact->set_tag(_("SHA1"), string_value(ref));
if (xact->posts.front()->account == NULL) {
// jww (2010-03-07): Bind this logic to an option: --auto-match
if (account_t * acct =
(report.HANDLED(auto_match) ?
lookup_probable_account(xact->payee, current_xacts.rbegin(),
@ -143,8 +112,16 @@ value_t convert_command(call_scope_t& args)
formatter(*post);
}
}
}
formatter.flush();
}
catch (const std::exception&) {
add_error_context(_("While parsing file %1")
<< file_context(reader.get_pathname(),
reader.get_linenum()));
add_error_context(_("While parsing CSV line:"));
add_error_context(line_context(reader.get_last_line()));
throw;
}
// If not, transform the payee according to regexps

View file

@ -70,20 +70,20 @@ string csv_reader::read_field(std::istream& sin)
else {
while (sin.good() && ! sin.eof()) {
sin.get(c);
if (sin.good()) {
if (c == ',')
break;
if (c != '\0')
field += c;
}
}
}
trim(field);
return field;
}
char * csv_reader::next_line(std::istream& sin)
{
static char linebuf[MAX_LINE + 1];
while (sin.good() && ! sin.eof() && sin.peek() == '#')
sin.getline(linebuf, MAX_LINE);
@ -130,11 +130,13 @@ void csv_reader::read_index(std::istream& sin)
}
}
xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket,
bool rich_data)
{
char * line = next_line(in);
if (! line || index.empty())
return NULL;
linenum++;
std::istringstream instr(line);
@ -144,20 +146,18 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
xact->set_state(item_t::CLEARED);
xact->pos = position_t();
xact->pos->pathname = "jww (2010-03-05): unknown";
xact->pos->pathname = pathname;
xact->pos->beg_pos = in.tellg();
xact->pos->beg_line = 0;
xact->pos->sequence = 0;
xact->pos->beg_line = linenum;
xact->pos->sequence = sequence++;
post->xact = xact.get();
#if 0
post->pos = position_t();
post->pos->pathname = pathname;
post->pos->beg_pos = line_beg_pos;
post->pos->beg_pos = in.tellg();
post->pos->beg_line = linenum;
post->pos->sequence = context.sequence++;
#endif
post->pos->sequence = sequence++;
post->set_state(item_t::CLEARED);
post->account = NULL;
@ -167,7 +167,6 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
string total;
string field;
try {
while (instr.good() && ! instr.eof()) {
field = read_field(instr);
@ -236,19 +235,12 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
}
n++;
}
}
catch (const std::exception&) {
add_error_context(_("While parsing CSV field:"));
add_error_context(line_context(field));
throw;
}
#if 0
if (rich_data) {
xact->set_tag(_("Imported"),
string(format_date(CURRENT_DATE(), FMT_WRITTEN)));
xact->set_tag(_("Original"), string(line));
xact->set_tag(_("SHA1"), string(sha1sum(line)));
#endif
string_value(format_date(CURRENT_DATE(), FMT_WRITTEN)));
xact->set_tag(_("CSV"), string_value(line));
}
// Translate the account name, if we have enough information to do so
@ -267,13 +259,11 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
post->xact = xact.get();
#if 0
post->pos = position_t();
post->pos->pathname = pathname;
post->pos->beg_pos = line_beg_pos;
post->pos->beg_pos = in.tellg();
post->pos->beg_line = linenum;
post->pos->sequence = context.sequence++;
#endif
post->pos->sequence = sequence++;
post->set_state(item_t::CLEARED);
post->account = bucket;

View file

@ -52,9 +52,13 @@ class account_t;
class csv_reader
{
static const std::size_t MAX_LINE = 1024;
static const std::size_t MAX_LINE = 4096;
std::istream& in;
path pathname;
char linebuf[MAX_LINE];
std::size_t linenum;
std::size_t sequence;
enum headers_t {
FIELD_DATE = 0,
@ -80,13 +84,11 @@ class csv_reader
std::vector<int> index;
std::vector<string> names;
std::vector<string> fields;
typedef std::map<string, string> string_map;
public:
csv_reader(std::istream& _in)
: in(_in),
csv_reader(std::istream& _in, const path& _pathname)
: in(_in), pathname(_pathname),
linenum(0), sequence(0),
date_mask("date"),
date_eff_mask("posted( ?date)?"),
code_mask("code"),
@ -98,11 +100,30 @@ public:
read_index(in);
}
void read_index(std::istream& in);
string read_field(std::istream& in);
char * next_line(std::istream& in);
void read_index(std::istream& in);
xact_t * read_xact(journal_t& journal, account_t * bucket);
xact_t * read_xact(journal_t& journal, account_t * bucket, bool rich_data);
const char * get_last_line() const {
return linebuf;
}
path get_pathname() const {
return pathname;
}
std::size_t get_linenum() const {
return linenum;
}
void reset() {
pathname.clear();
index.clear();
names.clear();
linenum = 0;
sequence = 0;
}
};
} // namespace ledger

View file

@ -107,6 +107,17 @@ account_t * journal_t::find_account_re(const string& regexp)
bool journal_t::add_xact(xact_t * xact)
{
if (optional<value_t> ref = xact->get_tag(_("SHA1"))) {
std::pair<checksum_map_t::iterator, bool> result
= checksum_map.insert(checksum_map_t::value_type(ref->to_string(), xact));
if (! result.second) {
throw_(std::runtime_error,
_("Found duplicated transaction with SHA1: ")
<< ref->to_string());
return false;
}
}
xact->journal = this;
if (! xact->finalize()) {

View file

@ -63,6 +63,7 @@ typedef std::pair<mask_t, string> payee_mapping_t;
typedef std::list<payee_mapping_t> payee_mappings_t;
typedef std::pair<mask_t, account_t *> account_mapping_t;
typedef std::list<account_mapping_t> account_mappings_t;
typedef std::map<string, xact_t *> checksum_map_t;
class journal_t : public noncopyable
{
@ -117,6 +118,7 @@ public:
std::list<fileinfo_t> sources;
payee_mappings_t payee_mappings;
account_mappings_t account_mappings;
checksum_map_t checksum_map;
bool was_loaded;
journal_t();
@ -198,6 +200,7 @@ private:
ar & sources;
ar & payee_mappings;
ar & account_mappings;
ar & checksum_map;
}
#endif // HAVE_BOOST_SERIALIZATION
};

View file

@ -1094,6 +1094,7 @@ option_t<report_t> * report_t::lookup_option(const char * p)
else OPT(revalued);
else OPT(revalued_only);
else OPT(revalued_total_);
else OPT(rich_data);
break;
case 's':
OPT(sort_);

View file

@ -313,6 +313,7 @@ public:
HANDLER(revalued).report(out);
HANDLER(revalued_only).report(out);
HANDLER(revalued_total_).report(out);
HANDLER(rich_data).report(out);
HANDLER(seed_).report(out);
HANDLER(sort_).report(out);
HANDLER(sort_all_).report(out);
@ -893,6 +894,8 @@ public:
set_expr(args.get<string>(0), args.get<string>(1));
});
OPTION(report_t, rich_data);
OPTION(report_t, seed_);
OPTION_(report_t, sort_, DO_(args) { // -S

View file

@ -1150,8 +1150,9 @@ post_t * instance_t::parse_post(char * line,
if (context.strict && ! post->account->has_flags(ACCOUNT_KNOWN)) {
if (post->_state == item_t::UNCLEARED)
warning_(_("\"%1\", line %2: Unknown account '%3'")
<< pathname.string() << linenum << post->account->fullname());
warning_(_("%1Unknown account '%2'")
<< file_context(pathname, linenum)
<< post->account->fullname());
post->account->add_flags(ACCOUNT_KNOWN);
}
@ -1181,8 +1182,9 @@ post_t * instance_t::parse_post(char * line,
if (context.strict &&
! post->amount.commodity().has_flags(COMMODITY_KNOWN)) {
if (post->_state == item_t::UNCLEARED)
warning_(_("\"%1\", line %2: Unknown commodity '%3'")
<< pathname.string() << linenum << post->amount.commodity());
warning_(_("%1Unknown commodity '%2'")
<< file_context(pathname, linenum)
<< post->amount.commodity());
post->amount.commodity().add_flags(COMMODITY_KNOWN);
}

View file

@ -50,8 +50,8 @@ void debug_assert(const string& reason,
std::size_t line)
{
std::ostringstream buf;
buf << "Assertion failed in \"" << file << "\", line " << line
<< ": " << func << ": " << reason;
buf << "Assertion failed in " << file_context(file, line)
<< func << ": " << reason;
throw assertion_failed(buf.str());
}

View file