-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathaverage.py
More file actions
72 lines (59 loc) · 1.82 KB
/
average.py
File metadata and controls
72 lines (59 loc) · 1.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/python3
# This script imports two identically formatted .tsv files to Pandas dataframes
# and prints a new .tsv in which each cell contains the average of all the input
# files
import sys
import pandas as pd
# Check args have been provided
args = sys.argv[1:]
if not args:
print("Usage: python3 average.py <tsv1> <tsv2> ...")
quit()
# Import data
data = []
for fname in args:
try:
df = pd.read_csv(fname, sep='\t')
except FileNotFoundError:
print("{0} was not found".format(fname))
quit()
except pd.errors.EmptyDataError:
print("{0} contains no data".format(fname))
quit()
data.append(df)
# Assert more than one dataframe has been put in
try:
assert(len(data) > 1)
except:
print("Please specify at least two files")
quit()
# Check dataframes are of same structure
for i in range(1, len(data)):
try:
assert(data[i].shape == data[i-1].shape)
except:
print("Dataframes are not all of the same shape")
quit()
for j in range(0, len(data[i].columns)):
try:
assert(data[i].columns[j] == data[i-1].columns[j])
except:
print("Dataframe columns do not match")
quit()
# At this point we have a list of appropriately formatted dataframes to average
# Concatenate dataframes and produce means
df_concat = pd.concat(data)
by_row_index = df_concat.groupby(df_concat.index)
df_means = by_row_index.mean()
# If Name column contains strings, they are absent from df_means.
# This block restores them
try:
df_means['Name']
except KeyError:
df_means.insert(0, 'Name',
pd.Series(data[0]['Name'].values, index=df_means.index))
# Output to new .tsv file
try:
df_means.to_csv('RENAME_ME.tsv', sep='\t', index=False)
except:
print("Unable to output results to file")