Added --rich-data for 'convert', and SHA1 checksum checking
This commit is contained in:
parent
4cf95497f9
commit
aa9b07d79b
12 changed files with 172 additions and 163 deletions
|
|
@ -397,6 +397,7 @@ appeared in the original journal file.
|
|||
.It Fl \-revalued
|
||||
.It Fl \-revalued-only
|
||||
.It Fl \-revalued-total Ar EXPR
|
||||
.It Fl \-rich-data
|
||||
.It Fl \-seed Ar INT
|
||||
.It Fl \-script
|
||||
.It Fl \-sort Ar EXPR Pq Fl S
|
||||
|
|
|
|||
|
|
@ -56,72 +56,41 @@ value_t convert_command(call_scope_t& args)
|
|||
account_t * bucket = journal.master->find_account(bucket_name);
|
||||
account_t * unknown = journal.master->find_account(_("Expenses:Unknown"));
|
||||
|
||||
// Make an amounts mapping for the account under consideration
|
||||
|
||||
typedef std::map<value_t, std::list<post_t *> > post_map_t;
|
||||
post_map_t post_map;
|
||||
|
||||
xacts_iterator journal_iter(journal);
|
||||
while (xact_t * xact = *journal_iter++) {
|
||||
post_t * post = NULL;
|
||||
xact_posts_iterator xact_iter(*xact);
|
||||
while ((post = *xact_iter++) != NULL) {
|
||||
if (post->account == bucket)
|
||||
break;
|
||||
}
|
||||
if (post) {
|
||||
post_map_t::iterator i = post_map.find(post->amount);
|
||||
if (i == post_map.end()) {
|
||||
std::list<post_t *> post_list;
|
||||
post_list.push_back(post);
|
||||
post_map.insert(post_map_t::value_type(post->amount, post_list));
|
||||
} else {
|
||||
(*i).second.push_back(post);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create a flat list
|
||||
xacts_list current_xacts(journal.xacts_begin(), journal.xacts_end());
|
||||
|
||||
// Read in the series of transactions from the CSV file
|
||||
|
||||
print_xacts formatter(report);
|
||||
ifstream data(path(args.get<string>(0)));
|
||||
csv_reader reader(data);
|
||||
path csv_file_path(args.get<string>(0));
|
||||
ifstream data(csv_file_path);
|
||||
csv_reader reader(data, csv_file_path);
|
||||
|
||||
while (xact_t * xact = reader.read_xact(journal, bucket)) {
|
||||
try {
|
||||
while (xact_t * xact = reader.read_xact(journal, bucket,
|
||||
report.HANDLED(rich_data))) {
|
||||
if (report.HANDLED(invert)) {
|
||||
foreach (post_t * post, xact->posts)
|
||||
post->amount.in_place_negate();
|
||||
}
|
||||
|
||||
bool matched = false;
|
||||
if (! xact->posts.front()->amount.is_null()) {
|
||||
post_map_t::iterator i = post_map.find(- xact->posts.front()->amount);
|
||||
if (i != post_map.end()) {
|
||||
std::list<post_t *>& post_list((*i).second);
|
||||
foreach (post_t * post, post_list) {
|
||||
if (xact->code && post->xact->code &&
|
||||
*xact->code == *post->xact->code) {
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
else if (xact->actual_date() == post->actual_date()) {
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
string ref = (xact->has_tag(_("SHA1")) ?
|
||||
xact->get_tag(_("SHA1"))->to_string() :
|
||||
sha1sum(reader.get_last_line()));
|
||||
|
||||
checksum_map_t::const_iterator entry = journal.checksum_map.find(ref);
|
||||
if (entry != journal.checksum_map.end()) {
|
||||
INFO(file_context(reader.get_pathname(),
|
||||
reader.get_linenum())
|
||||
<< "Ignoring known SHA1 " << ref);
|
||||
checked_delete(xact); // ignore it
|
||||
continue;
|
||||
}
|
||||
|
||||
if (matched) {
|
||||
DEBUG("convert.csv", "Ignored xact with code: " << *xact->code);
|
||||
checked_delete(xact); // ignore it
|
||||
}
|
||||
else {
|
||||
if (report.HANDLED(rich_data) && ! xact->has_tag(_("SHA1")))
|
||||
xact->set_tag(_("SHA1"), string_value(ref));
|
||||
|
||||
if (xact->posts.front()->account == NULL) {
|
||||
// jww (2010-03-07): Bind this logic to an option: --auto-match
|
||||
if (account_t * acct =
|
||||
(report.HANDLED(auto_match) ?
|
||||
lookup_probable_account(xact->payee, current_xacts.rbegin(),
|
||||
|
|
@ -143,8 +112,16 @@ value_t convert_command(call_scope_t& args)
|
|||
formatter(*post);
|
||||
}
|
||||
}
|
||||
}
|
||||
formatter.flush();
|
||||
}
|
||||
catch (const std::exception&) {
|
||||
add_error_context(_("While parsing file %1")
|
||||
<< file_context(reader.get_pathname(),
|
||||
reader.get_linenum()));
|
||||
add_error_context(_("While parsing CSV line:"));
|
||||
add_error_context(line_context(reader.get_last_line()));
|
||||
throw;
|
||||
}
|
||||
|
||||
// If not, transform the payee according to regexps
|
||||
|
||||
|
|
|
|||
42
src/csv.cc
42
src/csv.cc
|
|
@ -70,20 +70,20 @@ string csv_reader::read_field(std::istream& sin)
|
|||
else {
|
||||
while (sin.good() && ! sin.eof()) {
|
||||
sin.get(c);
|
||||
if (sin.good()) {
|
||||
if (c == ',')
|
||||
break;
|
||||
if (c != '\0')
|
||||
field += c;
|
||||
}
|
||||
}
|
||||
}
|
||||
trim(field);
|
||||
return field;
|
||||
}
|
||||
|
||||
char * csv_reader::next_line(std::istream& sin)
|
||||
{
|
||||
static char linebuf[MAX_LINE + 1];
|
||||
|
||||
while (sin.good() && ! sin.eof() && sin.peek() == '#')
|
||||
sin.getline(linebuf, MAX_LINE);
|
||||
|
||||
|
|
@ -130,11 +130,13 @@ void csv_reader::read_index(std::istream& sin)
|
|||
}
|
||||
}
|
||||
|
||||
xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
|
||||
xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket,
|
||||
bool rich_data)
|
||||
{
|
||||
char * line = next_line(in);
|
||||
if (! line || index.empty())
|
||||
return NULL;
|
||||
linenum++;
|
||||
|
||||
std::istringstream instr(line);
|
||||
|
||||
|
|
@ -144,20 +146,18 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
|
|||
xact->set_state(item_t::CLEARED);
|
||||
|
||||
xact->pos = position_t();
|
||||
xact->pos->pathname = "jww (2010-03-05): unknown";
|
||||
xact->pos->pathname = pathname;
|
||||
xact->pos->beg_pos = in.tellg();
|
||||
xact->pos->beg_line = 0;
|
||||
xact->pos->sequence = 0;
|
||||
xact->pos->beg_line = linenum;
|
||||
xact->pos->sequence = sequence++;
|
||||
|
||||
post->xact = xact.get();
|
||||
|
||||
#if 0
|
||||
post->pos = position_t();
|
||||
post->pos->pathname = pathname;
|
||||
post->pos->beg_pos = line_beg_pos;
|
||||
post->pos->beg_pos = in.tellg();
|
||||
post->pos->beg_line = linenum;
|
||||
post->pos->sequence = context.sequence++;
|
||||
#endif
|
||||
post->pos->sequence = sequence++;
|
||||
|
||||
post->set_state(item_t::CLEARED);
|
||||
post->account = NULL;
|
||||
|
|
@ -167,7 +167,6 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
|
|||
string total;
|
||||
string field;
|
||||
|
||||
try {
|
||||
while (instr.good() && ! instr.eof()) {
|
||||
field = read_field(instr);
|
||||
|
||||
|
|
@ -236,19 +235,12 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
|
|||
}
|
||||
n++;
|
||||
}
|
||||
}
|
||||
catch (const std::exception&) {
|
||||
add_error_context(_("While parsing CSV field:"));
|
||||
add_error_context(line_context(field));
|
||||
throw;
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (rich_data) {
|
||||
xact->set_tag(_("Imported"),
|
||||
string(format_date(CURRENT_DATE(), FMT_WRITTEN)));
|
||||
xact->set_tag(_("Original"), string(line));
|
||||
xact->set_tag(_("SHA1"), string(sha1sum(line)));
|
||||
#endif
|
||||
string_value(format_date(CURRENT_DATE(), FMT_WRITTEN)));
|
||||
xact->set_tag(_("CSV"), string_value(line));
|
||||
}
|
||||
|
||||
// Translate the account name, if we have enough information to do so
|
||||
|
||||
|
|
@ -267,13 +259,11 @@ xact_t * csv_reader::read_xact(journal_t& journal, account_t * bucket)
|
|||
|
||||
post->xact = xact.get();
|
||||
|
||||
#if 0
|
||||
post->pos = position_t();
|
||||
post->pos->pathname = pathname;
|
||||
post->pos->beg_pos = line_beg_pos;
|
||||
post->pos->beg_pos = in.tellg();
|
||||
post->pos->beg_line = linenum;
|
||||
post->pos->sequence = context.sequence++;
|
||||
#endif
|
||||
post->pos->sequence = sequence++;
|
||||
|
||||
post->set_state(item_t::CLEARED);
|
||||
post->account = bucket;
|
||||
|
|
|
|||
37
src/csv.h
37
src/csv.h
|
|
@ -52,9 +52,13 @@ class account_t;
|
|||
|
||||
class csv_reader
|
||||
{
|
||||
static const std::size_t MAX_LINE = 1024;
|
||||
static const std::size_t MAX_LINE = 4096;
|
||||
|
||||
std::istream& in;
|
||||
path pathname;
|
||||
char linebuf[MAX_LINE];
|
||||
std::size_t linenum;
|
||||
std::size_t sequence;
|
||||
|
||||
enum headers_t {
|
||||
FIELD_DATE = 0,
|
||||
|
|
@ -80,13 +84,11 @@ class csv_reader
|
|||
|
||||
std::vector<int> index;
|
||||
std::vector<string> names;
|
||||
std::vector<string> fields;
|
||||
|
||||
typedef std::map<string, string> string_map;
|
||||
|
||||
public:
|
||||
csv_reader(std::istream& _in)
|
||||
: in(_in),
|
||||
csv_reader(std::istream& _in, const path& _pathname)
|
||||
: in(_in), pathname(_pathname),
|
||||
linenum(0), sequence(0),
|
||||
date_mask("date"),
|
||||
date_eff_mask("posted( ?date)?"),
|
||||
code_mask("code"),
|
||||
|
|
@ -98,11 +100,30 @@ public:
|
|||
read_index(in);
|
||||
}
|
||||
|
||||
void read_index(std::istream& in);
|
||||
string read_field(std::istream& in);
|
||||
char * next_line(std::istream& in);
|
||||
void read_index(std::istream& in);
|
||||
|
||||
xact_t * read_xact(journal_t& journal, account_t * bucket);
|
||||
xact_t * read_xact(journal_t& journal, account_t * bucket, bool rich_data);
|
||||
|
||||
const char * get_last_line() const {
|
||||
return linebuf;
|
||||
}
|
||||
|
||||
path get_pathname() const {
|
||||
return pathname;
|
||||
}
|
||||
std::size_t get_linenum() const {
|
||||
return linenum;
|
||||
}
|
||||
|
||||
void reset() {
|
||||
pathname.clear();
|
||||
index.clear();
|
||||
names.clear();
|
||||
linenum = 0;
|
||||
sequence = 0;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace ledger
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ bool item_t::has_tag(const mask_t& tag_mask,
|
|||
return false;
|
||||
}
|
||||
|
||||
optional<value_t> item_t::get_tag(const string& tag, bool) const
|
||||
optional<value_t> item_t::get_tag(const string& tag, bool) const
|
||||
{
|
||||
DEBUG("item.meta", "Getting item tag: " << tag);
|
||||
if (metadata) {
|
||||
|
|
|
|||
|
|
@ -107,6 +107,17 @@ account_t * journal_t::find_account_re(const string& regexp)
|
|||
|
||||
bool journal_t::add_xact(xact_t * xact)
|
||||
{
|
||||
if (optional<value_t> ref = xact->get_tag(_("SHA1"))) {
|
||||
std::pair<checksum_map_t::iterator, bool> result
|
||||
= checksum_map.insert(checksum_map_t::value_type(ref->to_string(), xact));
|
||||
if (! result.second) {
|
||||
throw_(std::runtime_error,
|
||||
_("Found duplicated transaction with SHA1: ")
|
||||
<< ref->to_string());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
xact->journal = this;
|
||||
|
||||
if (! xact->finalize()) {
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@ typedef std::pair<mask_t, string> payee_mapping_t;
|
|||
typedef std::list<payee_mapping_t> payee_mappings_t;
|
||||
typedef std::pair<mask_t, account_t *> account_mapping_t;
|
||||
typedef std::list<account_mapping_t> account_mappings_t;
|
||||
typedef std::map<string, xact_t *> checksum_map_t;
|
||||
|
||||
class journal_t : public noncopyable
|
||||
{
|
||||
|
|
@ -117,6 +118,7 @@ public:
|
|||
std::list<fileinfo_t> sources;
|
||||
payee_mappings_t payee_mappings;
|
||||
account_mappings_t account_mappings;
|
||||
checksum_map_t checksum_map;
|
||||
bool was_loaded;
|
||||
|
||||
journal_t();
|
||||
|
|
@ -198,6 +200,7 @@ private:
|
|||
ar & sources;
|
||||
ar & payee_mappings;
|
||||
ar & account_mappings;
|
||||
ar & checksum_map;
|
||||
}
|
||||
#endif // HAVE_BOOST_SERIALIZATION
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1094,6 +1094,7 @@ option_t<report_t> * report_t::lookup_option(const char * p)
|
|||
else OPT(revalued);
|
||||
else OPT(revalued_only);
|
||||
else OPT(revalued_total_);
|
||||
else OPT(rich_data);
|
||||
break;
|
||||
case 's':
|
||||
OPT(sort_);
|
||||
|
|
|
|||
|
|
@ -313,6 +313,7 @@ public:
|
|||
HANDLER(revalued).report(out);
|
||||
HANDLER(revalued_only).report(out);
|
||||
HANDLER(revalued_total_).report(out);
|
||||
HANDLER(rich_data).report(out);
|
||||
HANDLER(seed_).report(out);
|
||||
HANDLER(sort_).report(out);
|
||||
HANDLER(sort_all_).report(out);
|
||||
|
|
@ -893,6 +894,8 @@ public:
|
|||
set_expr(args.get<string>(0), args.get<string>(1));
|
||||
});
|
||||
|
||||
OPTION(report_t, rich_data);
|
||||
|
||||
OPTION(report_t, seed_);
|
||||
|
||||
OPTION_(report_t, sort_, DO_(args) { // -S
|
||||
|
|
|
|||
|
|
@ -1150,8 +1150,9 @@ post_t * instance_t::parse_post(char * line,
|
|||
|
||||
if (context.strict && ! post->account->has_flags(ACCOUNT_KNOWN)) {
|
||||
if (post->_state == item_t::UNCLEARED)
|
||||
warning_(_("\"%1\", line %2: Unknown account '%3'")
|
||||
<< pathname.string() << linenum << post->account->fullname());
|
||||
warning_(_("%1Unknown account '%2'")
|
||||
<< file_context(pathname, linenum)
|
||||
<< post->account->fullname());
|
||||
post->account->add_flags(ACCOUNT_KNOWN);
|
||||
}
|
||||
|
||||
|
|
@ -1181,8 +1182,9 @@ post_t * instance_t::parse_post(char * line,
|
|||
if (context.strict &&
|
||||
! post->amount.commodity().has_flags(COMMODITY_KNOWN)) {
|
||||
if (post->_state == item_t::UNCLEARED)
|
||||
warning_(_("\"%1\", line %2: Unknown commodity '%3'")
|
||||
<< pathname.string() << linenum << post->amount.commodity());
|
||||
warning_(_("%1Unknown commodity '%2'")
|
||||
<< file_context(pathname, linenum)
|
||||
<< post->amount.commodity());
|
||||
post->amount.commodity().add_flags(COMMODITY_KNOWN);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -50,8 +50,8 @@ void debug_assert(const string& reason,
|
|||
std::size_t line)
|
||||
{
|
||||
std::ostringstream buf;
|
||||
buf << "Assertion failed in \"" << file << "\", line " << line
|
||||
<< ": " << func << ": " << reason;
|
||||
buf << "Assertion failed in " << file_context(file, line)
|
||||
<< func << ": " << reason;
|
||||
throw assertion_failed(buf.str());
|
||||
}
|
||||
|
||||
|
|
|
|||
0
test/baseline/opt-rich-data.test
Normal file
0
test/baseline/opt-rich-data.test
Normal file
Loading…
Add table
Reference in a new issue