Skip to content

Commit a972031

Browse files
Add files via upload
1 parent 34e58cc commit a972031

File tree

1 file changed

+39
-0
lines changed

1 file changed

+39
-0
lines changed
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# use urllib to read the HTML from the data files below, and parse the data,
2+
# extracting numbers and compute the sum of the numbers in the file.
3+
# http://py4e-data.dr-chuck.net/comments_42.html is where we should get data from
4+
# This file is written so I'm able to refer back to remind myself
5+
6+
import urllib.request, urllib.parse, urllib.error
7+
from bs4 import BeautifulSoup
8+
import ssl
9+
10+
num=list()
11+
12+
# to ignore certificate errors for https
13+
ctx=ssl.create_default_context()
14+
ctx.check_hostname=False
15+
ctx.verify_mode=ssl.CERT_NONE
16+
17+
# Urlopen will return a sort of file handle we can then read. read() will read
18+
# the entire document into a one string
19+
# html_object is a clean html version of the page which has been parsed by bs
20+
# tags here outputs a list that include the entire span tag with content inside
21+
url=input("Enter URL: ")
22+
if len(url)<1:
23+
url= 'http://py4e-data.dr-chuck.net/comments_42.html'
24+
25+
html=urllib.request.urlopen(url,context=ctx).read()
26+
# #testing
27+
# print('\n===OUTPUT OF HTML FROM READ()===\n',html)
28+
29+
html_object=BeautifulSoup(html, 'html.parser')
30+
# #testing
31+
# print('\n===HTML OBJECT RETURNED FROM BeautifulSoup===\n',html_object)
32+
html_tags=html_object('span')
33+
# #testing
34+
# print('\n===TAGS WITH \'span\'===\n',html_tags)
35+
for tag in html_tags:
36+
# # testing
37+
# print('\n===TAG\n',tag)
38+
num.append(int(tag.contents[0]))
39+
print(sum(num))

0 commit comments

Comments
 (0)