-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathdata-collection.py
70 lines (57 loc) · 2.27 KB
/
data-collection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
import requests
from datetime import datetime, timedelta
from cassandra.cluster import Cluster
# Set up connection to Cassandra
cluster = Cluster(['127.0.0.1'])
session = cluster.connect()
# Set up Alpha Vantage API parameters
api_key = os.environ.get('ALPHA_VANTAGE_API_KEY')
symbol = 'AAPL'
interval = '5min'
# Set up Cassandra keyspace and table
session.execute("""
CREATE KEYSPACE IF NOT EXISTS stock_data
WITH replication = {'class':'SimpleStrategy', 'replication_factor':1};
""")
session.execute("""
CREATE TABLE IF NOT EXISTS stock_data.time_series (
symbol text,
timestamp timestamp,
open double,
high double,
low double,
close double,
volume int,
PRIMARY KEY (symbol, timestamp)
);
""")
# Define function to collect and store stock data
def collect_stock_data():
# Get current date and time
now = datetime.now()
# Calculate start and end times for API request
end_time = now - timedelta(minutes=5)
start_time = end_time - timedelta(days=1)
# Format times for API request
start_str = start_time.strftime('%Y-%m-%d %H:%M:%S')
end_str = end_time.strftime('%Y-%m-%d %H:%M:%S')
# Make API request for stock data
url = f'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol={symbol}&interval={interval}&apikey={api_key}&outputsize=compact&datatype=json&from={start_str}&to={end_str}'
response = requests.get(url)
# Parse API response and store data in Cassandra
data = response.json()['Time Series (5min)']
for timestamp, values in data.items():
open_price = float(values['1. open'])
high_price = float(values['2. high'])
low_price = float(values['3. low'])
close_price = float(values['4. close'])
volume = int(values['5. volume'])
session.execute(f"""
INSERT INTO stock_data.time_series (symbol, timestamp, open, high, low, close, volume)
VALUES ('{symbol}', '{timestamp}', {open_price}, {high_price}, {low_price}, {close_price}, {volume});
""")
print(
f"Stock data collected and stored for {start_time.strftime('%Y-%m-%d %H:%M:%S')} to {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
# Call function to collect and store stock data
collect_stock_data()