-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Description
I had to do some csv manipulations and so I wrote this little script csvtool.py to do it but it would kind of be cool to have built in support for this so I could manipulate csv files on the command line
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, division, print_function, absolute_import
import csv
import os
from collections import Counter
from captain import arg, exit, echo
# we be cool to support xlsx, it can be converted using this:
# https://github.com/dilshod/xlsx2csv
class CSVRow(dict):
def __init__(self, row):
super(CSVRow, self).__init__(row)
def __getattr__(self, k):
try:
return self[k]
except KeyError:
raise AttributeError(k)
class CSV(object):
reader_class = csv.DictReader
row_class = CSVRow
def __init__(self, *paths, **kwargs):
self.csv_paths = []
for p in paths:
p = os.path.abspath(os.path.expanduser(p))
if os.path.isdir(p):
for csv_path in glob.glob('*.csv'):
self.csv_paths.append(csv_path)
elif os.path.isfile(p):
self.csv_paths.append(p)
else:
raise ValueError("{} is not a valid file".format(p))
if 'row_class' in kwargs:
self.row_class = kwargs['row_class']
if 'reader_class' in kwargs:
self.reader_class = kwargs['reader_class']
def __iter__(self):
for p in self.csv_paths:
#with codecs.open(p, encoding="utf-8") as f:
with open(p, 'rU') as f:
csv_reader = self.reader_class(f)
for row in csv_reader:
r = self.row_class(row)
yield r
@arg("--column", "--c", help="the columns to print unique values of")
@arg("paths", nargs='+')
def main_unique(column, paths):
unique_map = {}
unique = Counter()
rows = CSV(*paths)
for i, r in enumerate(rows, 1):
if r[column] not in unique:
unique_map[r[column]] = i
#echo.out("{}. {}", i, r[column])
unique[r[column]] += 1
echo.table(((unique_map[r[0]], r[0], r[1]) for r in unique.items()))
echo.out("Checked {} rows and found {} unique values in {}", i, len(unique), column)
exit(__name__)
Metadata
Metadata
Assignees
Labels
No labels