Skip to content

Commit 58c75ba

Browse files
JanardhanamJanardhanam
authored andcommitted
Initial Commit
0 parents  commit 58c75ba

File tree

1 file changed

+59
-0
lines changed

1 file changed

+59
-0
lines changed

HTML-Beauti.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
from bs4 import BeautifulSoup
2+
3+
4+
5+
6+
7+
8+
9+
10+
11+
12+
def file_replace_rext(filename, toreplace, replacement):
13+
with open(filename, 'r', encoding='utf-8') as file:
14+
filedata = file.read()
15+
# Replace the text in file
16+
filedata = filedata.replace(toreplace, replacement)
17+
# Write the file out again
18+
with open(filename, 'w') as file:
19+
file.write(filedata)
20+
21+
# Read in the file
22+
with open('todo.html', 'r', encoding='utf-8') as file:
23+
filedata = file.read()
24+
# Replace the “smart quotes”
25+
filedata = filedata.replace('“', '"').replace('”', '"')
26+
# Write the file out again
27+
with open('todo.html', 'w') as file:
28+
file.write(filedata)
29+
30+
#TODO: verify if removing entire style tag is good or no
31+
32+
33+
with open("todo.html", encoding='utf-8') as fp:
34+
soup = BeautifulSoup(fp, "html.parser")
35+
36+
37+
title = soup.title.get_text()
38+
print(title)
39+
40+
#remove style tag from header
41+
soup.find('style').extract()
42+
43+
for tag in soup():
44+
for attribute in ['class', 'id', 'name', 'style']:
45+
del tag[attribute]
46+
47+
#do string processing after here
48+
soup_string = str(soup)
49+
50+
#removes all <p><br/></p> tags
51+
soup_string = soup_string.replace('<p><br/></p>', '<br/>')
52+
53+
#encapsulates <p> tags in <body> with a <div>
54+
soup_string = soup_string.replace('<body>', '<body>\n<div>')
55+
soup_string = soup_string.replace('</body>', '</div>\n</body>')
56+
57+
#exports file
58+
with open('done.html', 'w') as file:
59+
file.write(str(soup_string))

0 commit comments

Comments
 (0)