Skip to content

Commit df4780b

Browse files
committed
adicionando exemplos e arrumando dirs
1 parent 7cd428e commit df4780b

File tree

6 files changed

+81
-9
lines changed

6 files changed

+81
-9
lines changed

.vscode/settings.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"python.pythonPath": "/home/jonas/0programacao-git/python/webScrapingComPython-livro/scrapingEnv/bin/python3.9"
3+
}
Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
from urllib.request import urlopen
2-
from bs4 import BeautifulSoup
2+
33
print()
44
# no site a um texto em latim, trarah um texto para imprimir
55
html = urlopen('http://pythonscraping.com/pages/page1.html')
6-
bs = BeautifulSoup(html.read(), 'html.parser')
7-
8-
print(bs.h1)
6+
print(html.read())
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from urllib.request import urlopen
2+
from bs4 import BeautifulSoup
3+
print()
4+
html = urlopen('http://pythonscraping.com/pages/page1.html')
5+
bs = BeautifulSoup(html.read(), 'html.parser')
6+
7+
print(bs.title)
8+
# print o titulo da pagina "h1"
9+
print(bs.h1)
10+
11+
print('div : ')
12+
print(bs.div)
13+
14+
"""
15+
qualquer uma das funcoes produziriam o mesmo resultado:
16+
bs.html.body.h1
17+
bs.body.h1
18+
bs.html.h1
19+
"""
20+
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from urllib.request import urlopen
2+
from bs4 import BeautifulSoup
3+
print()
4+
5+
try:
6+
7+
html = urlopen('http://pythonscraping.com/pages/page1.html')
8+
except HTTPError as e:
9+
print(e)
10+
except URLError as e:
11+
print(e)
12+
print('the server could not be found')
13+
print('o servidor não poode ser encontrado')
14+
15+
else:
16+
print('it worked')
17+
print('funcionou')
18+
19+
20+
21+
22+
bs = BeautifulSoup(html.read(), 'html.parser')
23+
24+
print(bs.title)
25+
print(bs.h1)
26+
27+
print('div : ')
28+
print(bs.div)
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from urllib.request import urlopen
2+
from bs4 import BeautifulSoup
3+
from urllib.error import HTTPError
4+
5+
# html = urlopen('http://pythonscraping.com/pages/page1.html')
6+
def getTitle():
7+
8+
print()
9+
10+
try:
11+
html = urlopen(url)
12+
13+
except HTTPError as e:
14+
return None
15+
16+
try:
17+
bs = BeautifulSoup(html.read(), 'html.parser')
18+
title = bs.body.h1
19+
except AttributeError as e:
20+
return None
21+
return title
22+
23+
title = getTitle('http://www.pythonscraping.com/pages/page1.html')
24+
if title == None:
25+
print('title could not be found')
26+
print('titulo não poode ser encontrado')
27+
28+
getTitle()

1-capitulo/02-executandoBeautifulSoup/scrapetest.py

Lines changed: 0 additions & 5 deletions
This file was deleted.

0 commit comments

Comments
 (0)