AD3: conexion API del COVID19 y análisis con Pandas
Esta es la actividad dirigida 3 que consiste en hacer un ejercicio de programación literaria aprovechando el códico que hemos usado en programación con Python donde realizamos web scraping. A continuación pongo el código fuente.
pip install requests bs4 pandas termcolor
Requirement already satisfied: requests in c:\users\ania alonzo\anaconda3\lib\site-packages (2.27.1)
Requirement already satisfied: bs4 in c:\users\ania alonzo\anaconda3\lib\site-packages (0.0.1)
Requirement already satisfied: pandas in c:\users\ania alonzo\anaconda3\lib\site-packages (1.4.2)
Requirement already satisfied: termcolor in c:\users\ania alonzo\anaconda3\lib\site-packages (1.1.0)
Requirement already satisfied: idna<4,>=2.5 in c:\users\ania alonzo\anaconda3\lib\site-packages (from requests) (3.3)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\ania alonzo\anaconda3\lib\site-packages (from requests) (2021.10.8)
Requirement already satisfied: charset-normalizer~=2.0.0 in c:\users\ania alonzo\anaconda3\lib\site-packages (from requests) (2.0.4)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\users\ania alonzo\anaconda3\lib\site-packages (from requests) (1.26.9)
Requirement already satisfied: beautifulsoup4 in c:\users\ania alonzo\anaconda3\lib\site-packages (from bs4) (4.11.1)
Requirement already satisfied: pytz>=2020.1 in c:\users\ania alonzo\anaconda3\lib\site-packages (from pandas) (2021.3)
Requirement already satisfied: numpy>=1.18.5 in c:\users\ania alonzo\anaconda3\lib\site-packages (from pandas) (1.21.5)
Requirement already satisfied: python-dateutil>=2.8.1 in c:\users\ania alonzo\anaconda3\lib\site-packages (from pandas) (2.8.2)
Requirement already satisfied: six>=1.5 in c:\users\ania alonzo\anaconda3\lib\site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)
Requirement already satisfied: soupsieve>1.2 in c:\users\ania alonzo\anaconda3\lib\site-packages (from beautifulsoup4->bs4) (2.3.1)
Note: you may need to restart the kernel to use updated packages.
Aquí voy a importar las siguientes librerías
import requests
import time
import csv
import re
from bs4 import BeautifulSoup
import os
import pandas as pd
from termcolor import colored
import requests import time import csv import re from bs4 import BeautifulSoup import os import pandas as pd from termcolor import colored
resultados = []
req = requests.get("https://resultados.elpais.com")
if (req.status_code != 200): raise Exception("No se puede hacer Web Scraping en"+ URL) soup = BeautifulSoup(req.text, 'html.parser')
tags = soup.findAll("h2")
for h2 in tags: print(h2.text) resultados.append(h2.text)
req2 = requests.get("https://elpais.com/internacional")
if (req.status_code != 200): raise Exception("No se puede hacer Web Scraping en"+ URL) soup2 = BeautifulSoup(req2.text, 'html.parser')
tags = soup2.findAll("h2")
for h2 in tags: print(h2.text) resultados.append(h2.text)
req3 = requests.get("https://elpais.com/opinion")
if (req.status_code != 200): raise Exception("No se puede hacer Web Scraping en"+ URL) soup3 = BeautifulSoup(req3.text, 'html.parser')
tags = soup3.findAll("h2")
for h2 in tags: print(h2.text) resultados.append(h2.text)
req4 = requests.get("https://elpais.com/espana/")
if (req.status_code != 200): raise Exception("No se puede hacer Web Scraping en"+ URL) soup4 = BeautifulSoup(req4.text, 'html.parser')
tags = soup4.findAll("h2")
for h2 in tags: print(h2.text) resultados.append(h2.text)
req5 = requests.get("https://elpais.com/economia/")
if (req.status_code != 200): raise Exception("No se puede hacer Web Scraping en"+ URL) soup5 = BeautifulSoup(req5.text, 'html.parser')
tags = soup5.findAll("h2")
for h2 in tags: print(h2.text) resultados.append(h2.text)
req6 = requests.get("https://elpais.com/sociedad/")
if (req.status_code != 200): raise Exception("No se puede hacer Web Scraping en"+ URL) soup6 = BeautifulSoup(req6.text, 'html.parser')
tags = soup6.findAll("h2")
for h2 in tags: print(h2.text) resultados.append(h2.text)
req7 = requests.get("https://elpais.com/educacion/")
if (req.status_code != 200): raise Exception("No se puede hacer Web Scraping en"+ URL) soup7 = BeautifulSoup(req7.text, 'html.parser')
tags = soup7.findAll("h2")
for h2 in tags: print(h2.text) resultados.append(h2.text)
req8 = requests.get("https://elpais.com/clima-y-medio-ambiente/")
if (req.status_code != 200): raise Exception("No se puede hacer Web Scraping en"+ URL) soup8 = BeautifulSoup(req8.text, 'html.parser')
tags = soup8.findAll("h2")
for h2 in tags: print(h2.text) resultados.append(h2.text)
req9 = requests.get("https://elpais.com/ciencia/")
if (req.status_code != 200): raise Exception("No se puede hacer Web Scraping en"+ URL) soup9 = BeautifulSoup(req9.text, 'html.parser')
tags = soup9.findAll("h2")
for h2 in tags: print(h2.text) resultados.append(h2.text)
req10 = requests.get("https://elpais.com/cultura/")
if (req.status_code != 200): raise Exception("No se puede hacer Web Scraping en"+ URL) soup10 = BeautifulSoup(req10.text, 'html.parser')
tags = soup10.findAll("h2")
for h2 in tags: print(h2.text) resultados.append(h2.text)
req11 = requests.get("https://elpais.com/babelia/")
if (req.status_code != 200): raise Exception("No se puede hacer Web Scraping en"+ URL) soup11 = BeautifulSoup(req11.text, 'html.parser')
tags = soup11.findAll("h2")
for h2 in tags: print(h2.text) resultados.append(h2.text)
req12 = requests.get("https://elpais.com/deportes/")
if (req.status_code != 200): raise Exception("No se puede hacer Web Scraping en"+ URL) soup12 = BeautifulSoup(req12.text, 'html.parser')
tags = soup12.findAll("h2")
for h2 in tags: print(h2.text) resultados.append(h2.text)
req13 = requests.get("https://elpais.com/tecnologia/")
if (req.status_code != 200): raise Exception("No se puede hacer Web Scraping en"+ URL) soup13 = BeautifulSoup(req13.text, 'html.parser')
tags = soup13.findAll("h2")
for h2 in tags: print(h2.text) resultados.append(h2.text)
req14 = requests.get("https://elpais.com/tecnologia/")
if (req.status_code != 200): raise Exception("No se puede hacer Web Scraping en"+ URL) soup14 = BeautifulSoup(req14.text, 'html.parser')
tags = soup14.findAll("h2")
for h2 in tags: print(h2.text) resultados.append(h2.text)
req15 = requests.get("https://elpais.com/gente/")
if (req.status_code != 200): raise Exception("No se puede hacer Web Scraping en"+ URL) soup15 = BeautifulSoup(req15.text, 'html.parser')
tags = soup15.findAll("h2")
for h2 in tags: print(h2.text) resultados.append(h2.text)
req16 = requests.get("https://elpais.com/television/")
if (req.status_code != 200): raise Exception("No se puede hacer Web Scraping en"+ URL) soup16 = BeautifulSoup(req16.text, 'html.parser')
tags = soup16.findAll("h2")
for h2 in tags: print(h2.text) resultados.append(h2.text)
req17 = requests.get("https://elpais.com/eps/")
if (req.status_code != 200): raise Exception("No se puede hacer Web Scraping en"+ URL) soup17 = BeautifulSoup(req17.text, 'html.parser')
tags = soup17.findAll("h2")
for h2 in tags: print(h2.text) resultados.append(h2.text)
os.system("clear")
print(colored("A continuación se muestran los titulares de las páginas principales del diario El País que contienen las siguientes palabras:", 'blue', attrs=['bold'])) print(colored("Feminismo", 'green', attrs=['bold']))
str_match = [s for s in resultados if "feminismo" in s] print("\n".join(str_match))
print(colored("Igualdad", 'green', attrs=['bold']))
str_match = [s for s in resultados if "igualdad" in s] print("\n".join(str_match))
print(colored("Mujeres", 'green', attrs=['bold']))
str_match = [s for s in resultados if "mujeres" in s] print("\n".join(str_match))
print(colored("Mujer", 'green', attrs=['bold']))
str_match = [s for s in resultados if "mujer" in s] print("\n".join(str_match))
print(colored("Brecha salarial", 'green', attrs=['bold']))
str_match = [s for s in resultados if "brecha salarial" in s] print("\n".join(str_match))
print(colored("Machismo", 'green', attrs=['bold']))
str_match = [s for s in resultados if "machismo" in s] print("\n".join(str_match))
print(colored("Violencia", 'green', attrs=['bold']))
str_match = [s for s in resultados if "violencia" in s] print("\n".join(str_match))
print(colored("Maltrato", 'green', attrs=['bold']))
str_match = [s for s in resultados if "maltrato" in s] print("\n".join(str_match))
print(colored("Homicidio", 'green', attrs=['bold']))
str_match = [s for s in resultados if "homicidio" in s] print("\n".join(str_match))
print(colored("Género", 'green', attrs=['bold']))
str_match = [s for s in resultados if "género" in s] print("\n".join(str_match))
print(colored("Asesinato", 'green', attrs=['bold']))
str_match = [s for s in resultados if "asesinato" in s] print("\n".join(str_match))
print(colored("Sexo", 'green', attrs=['bold']))
str_match = [s for s in resultados if "sexo" in s] print("\n".join(str_match))