Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
robsalgado authored Apr 24, 2019
1 parent 6d8135a commit 3147259
Showing 1 changed file with 101 additions and 0 deletions.
101 changes: 101 additions & 0 deletions automate_reporting/news_api_to_sheet_full_code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@

# Import required libraries
import pandas as pd
import numpy as np
import requests
from datetime import datetime, timedelta
from apiclient import discovery
from google.oauth2 import service_account
from googleapiclient.discovery import build
import httplib2

# Get yesterday's date
today = datetime.today()
yesterday = today - timedelta(days=1)
yesterday = yesterday.strftime('%Y-%m-%d') # Format it

# Define the url with the desired endpoint
base_url = 'https://newsapi.org/v2/everything'

# Define the query string parameters to get the data we need
params = {'q': 'artificial intelligence',
'to': yesterday,
'from':yesterday,
'apiKey':'<YOUR API KEY>',
'language' : 'en',
'pageSize': 100}

# Construct the api call and make a GET request per the docs
response = requests.get(base_url, params=params)

# Store the results in a variable as a json
total_results = response.json()

# Add the data to a df
# Create a list for each field
content, title, url, name, date = [], [], [], [], []

# Loop through the json and add the data to the list
for each in total_results['articles']:
if 'content' in each:
content.append(each['content'])
else:
content.append(np.nan)
title.append(each['title'])
url.append(each['url'])
name.append(each['source']['name'])
date.append(yesterday)

# Put the lists into a df and transpose them
df = pd.DataFrame([title, content, url, name, date]).T

# Add column names
df.columns = ['title', 'content', 'url', 'site', 'date']

# Put the data into the google sheet
# Define the scopes
scopes = ['https://www.googleapis.com/auth/spreadsheets']

# Define the credentials
credentials = service_account.Credentials.from_service_account_file('<PATH TO CREDENTIALS JSON>',
scopes=scopes)
service = discovery.build('sheets',
'v4',
credentials=credentials)

spreadsheet_id = '19uEbvGK1RxrI0BHHpgE8nMuGqrwqrowmo0pGngf25-c'

# This will create a df with the headers as the first row
with_headers = pd.DataFrame(np.vstack([df.columns, df]))

# You only need to do include headers the first time
# After that you can just do:
# values = [df[each_col].tolist() for each_col in df]

# Then put each column into a list
values = [with_headers[each_col].tolist() for each_col in with_headers]

# Define the range for the data
range_ = sheet_name + '!A2:E'

# How the input data should be interpreted
value_input_option = 'RAW' # Store values as they are

# How the input data should be inserted
insert_data_option = 'INSERT_ROWS' # Rows are insterted as opposed to overwriting

# Define the data fields and set major dimension to columns
# The default is rows which will transpose each column as a row which you don't want
data = {'values': values,
'majorDimension': 'COLUMNS'}

# Build the request and execute the api call
request = service.spreadsheets().values().append(spreadsheetId=spreadsheet_id,
range=range_,
valueInputOption=value_input_option,
insertDataOption=insert_data_option,
body=data).execute()

# Print out the number of rows to verify
print('Number of rows inserted {}'.format(request['updates']['updatedRows']))

0 comments on commit 3147259

Please sign in to comment.