Skip to content

Commit 143a0fa

Browse files
committed
Adding script to convert tables to csv
1 parent 2a86675 commit 143a0fa

File tree

3 files changed

+178
-0
lines changed

3 files changed

+178
-0
lines changed

Libraries_Tests/Untitled.ipynb

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import jinja2"
10+
]
11+
},
12+
{
13+
"cell_type": "code",
14+
"execution_count": 2,
15+
"metadata": {},
16+
"outputs": [],
17+
"source": [
18+
"from jinja2 import Template"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": 3,
24+
"metadata": {},
25+
"outputs": [],
26+
"source": [
27+
"t = Template(\"Hello {{ something }}!\")"
28+
]
29+
},
30+
{
31+
"cell_type": "code",
32+
"execution_count": 4,
33+
"metadata": {},
34+
"outputs": [
35+
{
36+
"data": {
37+
"text/plain": [
38+
"u'Hello World!'"
39+
]
40+
},
41+
"execution_count": 4,
42+
"metadata": {},
43+
"output_type": "execute_result"
44+
}
45+
],
46+
"source": [
47+
"t.render(something=\"World\")"
48+
]
49+
},
50+
{
51+
"cell_type": "code",
52+
"execution_count": 5,
53+
"metadata": {},
54+
"outputs": [],
55+
"source": [
56+
"t = Template(\"My favorite numbers: {% for n in range(1,10) %}{{n}} \" \"{% endfor %}\")"
57+
]
58+
},
59+
{
60+
"cell_type": "code",
61+
"execution_count": 6,
62+
"metadata": {},
63+
"outputs": [
64+
{
65+
"data": {
66+
"text/plain": [
67+
"u'My favorite numbers: 1 2 3 4 5 6 7 8 9 '"
68+
]
69+
},
70+
"execution_count": 6,
71+
"metadata": {},
72+
"output_type": "execute_result"
73+
}
74+
],
75+
"source": [
76+
"t.render()"
77+
]
78+
},
79+
{
80+
"cell_type": "code",
81+
"execution_count": null,
82+
"metadata": {},
83+
"outputs": [],
84+
"source": []
85+
}
86+
],
87+
"metadata": {
88+
"kernelspec": {
89+
"display_name": "Python 3",
90+
"language": "python",
91+
"name": "python3"
92+
},
93+
"language_info": {
94+
"codemirror_mode": {
95+
"name": "ipython",
96+
"version": 2
97+
},
98+
"file_extension": ".py",
99+
"mimetype": "text/x-python",
100+
"name": "python",
101+
"nbconvert_exporter": "python",
102+
"pygments_lexer": "ipython2",
103+
"version": "2.7.12"
104+
}
105+
},
106+
"nbformat": 4,
107+
"nbformat_minor": 2
108+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import requests
2+
import pandas as pd
3+
import numpy as np
4+
import matplotlib
5+
import bs4
6+
from bs4 import BeautifulSoup
7+
import csv
8+
import re
9+
10+
11+
url = "./1 APR to 15 APR.html"
12+
13+
def html_tables_to_csv(input_url):
14+
soup = BeautifulSoup(open(url).read(), 'html.parser')
15+
tables = soup.find_all('table')
16+
table = table[3]
17+
headers = [(header.text).decode() for header in table.find_all('th')]
18+
rows = []
19+
for row in table_.find_all('tr'):
20+
rows.append([val.text for val in row.find_all('td')])
21+
22+
rows = [[(x.replace('"', '')).replace('=','').replace(',','').replace(' ','') for x in i] for i in rows]
23+
24+
25+
with open('output_file8.csv', 'w') as f:
26+
writer = csv.writer(f)
27+
writer.writerow(headers)
28+
writer.writerows(row for row in rows if row)
29+
30+
31+
def main(url, output_file):
32+
html_tables_to_csv(url, output_file)
33+
34+
if __name__ == "__main__":
35+
main(sys.argv[1])
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import requests
2+
import pandas as pd
3+
import numpy as np
4+
import matplotlib
5+
import bs4
6+
from bs4 import BeautifulSoup
7+
import csv
8+
import re
9+
10+
11+
url = "./1 APR to 15 APR.html"
12+
13+
def html_tables_to_csv(input_url):
14+
soup = BeautifulSoup(open(url).read(), 'html.parser')
15+
tables = soup.find_all('table')
16+
table = table[3]
17+
headers = [(header.text).decode() for header in table.find_all('th')]
18+
rows = []
19+
for row in table_.find_all('tr'):
20+
rows.append([val.text for val in row.find_all('td')])
21+
22+
rows = [[(x.replace('"', '')).replace('=','').replace(',','').replace(' ','') for x in i] for i in rows]
23+
24+
25+
with open('output_file8.csv', 'w') as f:
26+
writer = csv.writer(f)
27+
writer.writerow(headers)
28+
writer.writerows(row for row in rows if row)
29+
30+
31+
def main(url, output_file):
32+
html_tables_to_csv(url, output_file)
33+
34+
if __name__ == "__main__":
35+
main(sys.argv[1])

0 commit comments

Comments
 (0)