Skip to content

csv support #3

@Jaymon

Description

@Jaymon

I had to do some csv manipulations and so I wrote this little script csvtool.py to do it but it would kind of be cool to have built in support for this so I could manipulate csv files on the command line

# -*- coding: utf-8 -*-
from __future__ import unicode_literals, division, print_function, absolute_import
import csv
import os
from collections import Counter

from captain import arg, exit, echo


# we be cool to support xlsx, it can be converted using this:
# https://github.com/dilshod/xlsx2csv


class CSVRow(dict):
    def __init__(self, row):
        super(CSVRow, self).__init__(row)

    def __getattr__(self, k):
        try:
            return self[k]
        except KeyError:
            raise AttributeError(k)


class CSV(object):
    reader_class = csv.DictReader
    row_class = CSVRow
    def __init__(self, *paths, **kwargs):
        self.csv_paths = []
        for p in paths:
            p = os.path.abspath(os.path.expanduser(p))
            if os.path.isdir(p):
                for csv_path in glob.glob('*.csv'):
                    self.csv_paths.append(csv_path)

            elif os.path.isfile(p):
                self.csv_paths.append(p)

            else:
                raise ValueError("{} is not a valid file".format(p))

        if 'row_class' in kwargs:
            self.row_class = kwargs['row_class']

        if 'reader_class' in kwargs:
            self.reader_class = kwargs['reader_class']


    def __iter__(self):
        for p in self.csv_paths:
            #with codecs.open(p, encoding="utf-8") as f:
            with open(p, 'rU') as f:
                csv_reader = self.reader_class(f)
                for row in csv_reader:
                    r = self.row_class(row)
                    yield r





@arg("--column", "--c", help="the columns to print unique values of")
@arg("paths", nargs='+')
def main_unique(column, paths):
    unique_map = {}
    unique = Counter()
    rows = CSV(*paths)
    for i, r in enumerate(rows, 1):
        if r[column] not in unique:
            unique_map[r[column]] = i
            #echo.out("{}. {}", i, r[column])
        unique[r[column]] += 1

    echo.table(((unique_map[r[0]], r[0], r[1]) for r in unique.items()))

    echo.out("Checked {} rows and found {} unique values in {}", i, len(unique), column)





exit(__name__)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions