ledger/contrib/non-profit-audit-reports/csv2ods.py

#!/usr/bin/python
# csv2ods.py
# Convert example csv file to ods
#
# Copyright (c) 2012       Tom Marble
# Copyright (c) 2012, 2013 Bradley M. Kuhn
#
# This program gives you software freedom; you can copy, modify, convey,
# and/or redistribute it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program in a file called 'GPLv3'.  If not, write to the:
#    Free Software Foundation, Inc., 51 Franklin St, Fifth Floor
#                                    Boston, MA 02110-1301, USA.

import sys, os, os.path, optparse
import csv
import ooolib2
import shutil
import string
from Crypto.Hash import SHA256

def err(msg):
    print 'error: %s' % msg
    sys.exit(1)

def ReadChecksums(inputFile):
    checksums = {}
    with open(inputFile, "r") as inputFH:
        entries = inputFH.readlines()
    for ee in entries:
        fileName, checksum = ee.split(":")
        fileName = fileName.replace(' ', "")
        checksum = checksum.replace(' ', "")
        checksum = checksum.replace("\n", "")
        checksums[checksum] = fileName
    return checksums

def ChecksumFile(filename):
    sha256 = SHA256.new()
    chunk_size = 8192
    with open(filename, 'rb') as myFile:
        while True:
            chunk = myFile.read(chunk_size)
            if len(chunk) == 0:
                break
            sha256.update(chunk)
    return sha256.hexdigest()

def main():
    program = os.path.basename(sys.argv[0])

    print get_file_checksum(sys.argv[1])

def csv2ods(csvname, odsname, encoding='', singleFileDirectory=None, knownChecksums={}, verbose = False):
    filesSavedinManifest = {}

    if knownChecksums:
        checksumCache = {}

    if verbose:
        print 'converting from %s to %s' % (csvname, odsname)

    if singleFileDirectory:
        if not os.path.isdir(os.path.join(os.getcwd(),singleFileDirectory)):
            os.mkdir(singleFileDirectory)

    doc = ooolib2.Calc()
    #  add a pagebreak style
    style = 'pagebreak'
    style_pagebreak = doc.styles.get_next_style('row')
    style_data = tuple([style, ('style:row-height', doc.styles.property_row_height)])
    doc.styles.style_config[style_data] = style_pagebreak
    #  add a currency style
    style = 'currency'
    style_currency = doc.styles.get_next_style('cell')
    style_data = tuple([style])
    doc.styles.style_config[style_data] = style_currency

    row = 1
    csvdir = os.path.dirname(csvname)
    if len(csvdir) == 0:
        csvdir = '.'
    csvfile = open(csvname, 'rb')
    reader = csv.reader(csvfile, delimiter=',', quotechar='"')
    for fields in reader:
        if len(fields) > 0:
            for col in range(len(fields)):
                val = fields[col]
                if encoding != '' and val[0:5] != "link:":  # Only utf8 encode if it's not a filename
                    val = unicode(val, 'utf8')
                if len(val) > 0 and val[0] == '$':
                    doc.set_cell_value(col + 1, row, 'currency', val[1:])
                else:
                    if (len(val) > 0 and val[0:5] == "link:"):
                        val = val[5:]
                        linkname = os.path.basename(val) # name is just the last component
                        newFile = None

                        if not singleFileDirectory:
                            newFile = val

                        if knownChecksums:
                            if not checksumCache.has_key(val):
                                checksum = ChecksumFile(val)
                                checksumCache[val] = checksum
                            else:
                                checksum = checksumCache[val]

                            if knownChecksums.has_key(checksum):
                                newFile = knownChecksums[checksum]
                                print "FOUND new file in known: " + newFile

                        if not newFile:
                            relativeFileWithPath = os.path.basename(val)

                            fileName, fileExtension = os.path.splitext(relativeFileWithPath)
                            newFile = fileName[:15]   # 15 is an arbitrary choice.
                            newFile = newFile + fileExtension
                            # We'll now test to see if we made this file
                            # before, and if it matched the same file we
                            # now want.  If it doesn't, try to make a
                            # short file name for it.
                            if filesSavedinManifest.has_key(newFile) and filesSavedinManifest[newFile] != val:
                                testFile = None
                                for cc in list(string.letters) + list(string.digits):
                                    testFile = cc + newFile
                                    if not filesSavedinManifest.has_key(testFile):
                                        break
                                    testFile = None
                                    if not testFile:
                                        raise Exception("too many similar file names for linkage; giving up")
                                    else:
                                        newFile = testFile
                                        if not os.path.exists(csvdir + '/' + val):
                                            raise Exception("File" + csvdir + '/' + val + " does not exist in single file directory mode; giving up")
                            src = os.path.join(csvdir, val)
                            dest = os.path.join(csvdir, singleFileDirectory, newFile)
                            shutil.copyfile(src, dest)
                            shutil.copystat(src, dest)
                            shutil.copymode(src, dest)

                            newFile = os.path.join(singleFileDirectory, newFile)

                        if knownChecksums:
                            checksumCache[checksum]   = newFile
                            knownChecksums[checksum]  = newFile

                        linkrel = '../' + newFile # ../ means remove the name of the *.ods
                        doc.set_cell_value(col + 1, row, 'link', (linkrel, linkname))
                        linkpath = csvdir + '/' + val

                        if not val in filesSavedinManifest:
                            filesSavedinManifest[newFile] = val

                        if not os.path.exists(linkpath):
                            print "WARNING: link %s DOES NOT EXIST at %s" % (val, linkpath)
                        if verbose:
                            if os.path.exists(linkpath):
                                print 'relative link %s EXISTS at %s' % (val, linkpath)
                    else:
                        if val == "pagebreak":
                            doc.sheets[doc.sheet_index].set_sheet_config(('row', row), style_pagebreak)
                        else:
                            doc.set_cell_value(col + 1, row, 'string', val)
        else:
            # enter an empty string for blank lines
            doc.set_cell_value(1, row, 'string', '')
        row += 1
    # save manifest file
    if filesSavedinManifest.keys() != []:
        manifestFH = open("MANIFEST", "a")
        manifestFH.write("# Files from %s\n" % odsname)
        for file in filesSavedinManifest.keys():
            manifestFH.write("%s\n" % file)

        manifestFH.close()
    # Save spreadsheet file.
    doc.save(odsname)

def main():
    program = os.path.basename(sys.argv[0])
    version = '0.1'
    parser = optparse.OptionParser(usage='%prog [--help] [--verbose]',
                                   version='%prog ' + version)
    parser.add_option('-v', '--verbose', action='store_true',
                      dest='verbose',
                      help='provide extra information while processing')
    parser.add_option('-c', '--csv', action='store',
                      help='csv file to process')
    parser.add_option('-o', '--ods', action='store',
                      help='ods output filename')
    parser.add_option('-e', '--encoding', action='store',
                      help='unicode character encoding type')
    parser.add_option('-d', '--single-file-directory', action='store',
                      help='directory name to move all files into')
    parser.add_option('-s', '--known-checksum-list', action='store',
                      help='directory name to move all files into')
    (options, args) = parser.parse_args()

    if len(args) != 0:
        parser.error("not expecting extra args")
    if not os.path.exists(options.csv):
        err('csv does not exist: %s' % options.csv)
    if not options.ods:
        (root, ext) = os.path.splitext(options.csv)
        options.ods = root + '.ods'
    if options.verbose:
        print '%s: verbose mode on' % program
        print 'csv:', options.csv
        print 'ods:', options.ods
        print 'ods:', options.encoding
    if options.known_checksum_list and not options.single_file_directory:
        err(program + ": --known-checksum-list option is completely useless without --single-file-directory")
    knownChecksums = {}
    if options.known_checksum_list:
        if not os.access(options.known_checksum_list, os.R_OK):
            err(program + ": unable to read file: " + options.known_checksum_list)
        knownChecksums = ReadChecksums(options.known_checksum_list)
    csv2ods(options.csv, options.ods, options.encoding, options.single_file_directory, knownChecksums, options.verbose)

if __name__ == '__main__':
  main()