Skip to content

Commit 728a8b9

Browse files
JanardhanamJanardhanam
authored andcommitted
fixed utf-8 jank
1 parent e70e67e commit 728a8b9

File tree

1 file changed

+14
-23
lines changed

1 file changed

+14
-23
lines changed

HTML-Beauti.py

Lines changed: 14 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,6 @@ def main():
5050

5151
print(filename, end=" ")
5252

53-
#removing smart quotes
54-
file_replace_text(filepath, '“', '"')
55-
file_replace_text(filepath, '”', '"')
56-
file_replace_text(filepath, '’', '')
57-
#file_replace_text(filepath, '', '"')
58-
59-
print(".", end=" ")
6053

6154
#start html cleaning with beautifulsoup
6255
with open(filepath, encoding='utf-8') as fp:
@@ -68,6 +61,8 @@ def main():
6861
creditMe = soup.new_tag('meta', content='Converted to HTML by Vinay Janardhanam')
6962
soup.head.append(creditMe)
7063

64+
print(".", end=" ")
65+
7166
#remove style tag from header
7267
for s in soup('style'):
7368
s.extract()
@@ -80,6 +75,8 @@ def main():
8075
for h2 in soup('h2'):
8176
h2.name = 'h4'
8277

78+
print(".", end=" ")
79+
8380
#remove unnecessary attributes
8481
for tag in soup():
8582
for attribute in ['class', 'id', 'name', 'style']:
@@ -94,31 +91,25 @@ def main():
9491
for image in soup.findAll('img'):
9592
image['src'] = image['src'].replace(filename, "/images/group87/"+KBID)
9693

97-
#self explanitory :)
98-
soup.prettify()
99-
100-
#do string processing after here
101-
soup_string = str(soup.encode("utf-8"))
102-
10394
print(".", end=" ")
10495

105-
#removes all <p><br/></p> tags
106-
soup_string = soup_string.replace('<p><br/></p>', '')
107-
108-
#encapsulates <p> tags in <body> with a <div>
109-
soup_string = soup_string.replace('<body>', '<body>\n<div>')
110-
soup_string = soup_string.replace('</body>', '</div>\n</body>')
96+
#exports file with .new at end
97+
with open(filepath+'.new', 'w', encoding="utf-8") as file:
98+
file.write(str(soup))
11199

112100
print(".", end=" ")
113101

114-
#exports file with .new at end
115-
with open(filepath+'.new', 'w') as file:
116-
file.write(str(soup_string))
117-
118102
#preserves original html file (minus the dumb smart quotes) with a .old extension
119103
os.rename(filepath,filepath+'.old')
120104
os.rename(filepath+'.new', filepath)
121105

106+
print(".", end=" ")
107+
108+
file_replace_text(filepath, '<p><br/></p>', '')
109+
file_replace_text(filepath, '<body>', '<body>\n<div>')
110+
file_replace_text(filepath, '</body>', '</div>\n</body>')
111+
file_replace_text(filepath, '', '->')
112+
122113
print('done!')
123114

124115

0 commit comments

Comments
 (0)