Source code for excelkit.core.parse

"""
    parse excel file
"""
import sys
import json
import datetime
from collections import OrderedDict

import click
import openpyxl

from simple_loggers import SimpleLogger
from excelkit.util.reformat import Formatter


[docs]class ExcelParser(object):
    logger = SimpleLogger('ExcelParser')
    def __init__(self):
        pass

[docs]    def parse(self, filename, data_only=False, read_only=False, sheet_idx=None, choose_one=False, skip=None, limit=None, **kwargs):
        """
            data_only=True: get the value instead of formula when data_type is 'f'
            read_only=True: to deal with large file, some attributes might lost
        """
        wb = openpyxl.load_workbook(filename, data_only=data_only, read_only=read_only)

        sheets = wb.worksheets
        if len(wb.sheetnames) > 1:
            if sheet_idx is not None:
                sheets = [wb.worksheets[sheet_idx]]
            elif choose_one:
                sheets = self.choose_sheet(wb)

        return self.get_data(sheets, skip=skip, limit=limit)

[docs]    def choose_sheet(self, workbook):
        context = dict(enumerate(workbook.sheetnames))
        click.secho('{}'.format(json.dumps(context, indent=2, ensure_ascii=False)), err=True, fg='bright_green')

        while True:
            idxes = click.prompt('please choose one or more sheets, separate by comma', err=True)
            for idx in idxes.split(','):
                if int(idx) not in context:
                    self.logger.warning('bad choice, choose from: {}'.format(list(context.keys())))
                    continue
            sheets = [workbook.worksheets[int(idx)] for idx in idxes.split(',')]
            return sheets

[docs]    def get_data(self, worksheets, skip=None, limit=None, fillna=''):
        data = OrderedDict()
        for ws in worksheets:
            data[ws.title] = []
            for n, row in enumerate(ws.rows):
                if skip and n < skip:
                    continue
                if limit and len(data[ws.title]) > limit:
                    break

                line = [
                    cell.value.strftime('%Y-%m-%d')
                    if cell.data_type == 'd'
                    else fillna if cell.value is None else cell.value
                    for cell in row
                ]
                data[ws.title].append(line)

        return data

[docs]    def export(self, data, outfile=None, fmt='table', indent=None, sep='\t', header=True, index=True, color=None, pager=False):
        """"export data

        parameters
            data: data return by get_data method
            outfile: output file, default stdout
            fmt: 'table', 'html', 'tsv' or 'json'
            indent: for json fmt export
            sep: for tsv fmt export
        """
        out = open(outfile, 'w') if outfile else sys.stdout
        with out:
            for sheet, rows in data.items():
                click.secho('>>> {}'.format(sheet), err=True, fg='yellow')
                fd = Formatter(rows, header=header)
                if fmt == 'table':
                    res = fd.to_table(index=index).get_string()
                elif fmt == 'html':
                    res = fd.to_table(index=index).get_html_string()
                elif fmt == 'json':
                    res = fd.to_json(indent=indent)
                elif fmt == 'tsv':
                    res = fd.to_tsv(sep=sep)
                else:
                    exit('bad format, choose from table, html, json, tsv')

                if color:
                    res = click.style(res, fg=color)

                if pager:
                    click.echo_via_pager(res, color=False)
                else:
                    out.write(res + '\n')


[docs]def parse_text(file_hdl, sep='\t', comment=None):

    with file_hdl:
        for line in file_hdl:
            if comment and line.startswith(comment):
                continue
            linelist = line.strip().split(sep)
            yield linelist
Source code for excelkit.core.parse

excelkit

Navigation

Related Topics