Skip to content

Commit

Permalink
New release. Lots of stuff.
Browse files Browse the repository at this point in the history
  • Loading branch information
quapsale committed May 18, 2023
1 parent b8b4e7f commit b754805
Show file tree
Hide file tree
Showing 22 changed files with 222 additions and 180 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ This repository is organized as it follows:
<li> Implement the Toda-Yamamoto procedure to test for Granger-causality between correlated cryptocoins.</li>
<li> Train and test SOTA machine learning models to forecast cryptocoin price series (namely GRU, LSTM, CatBoost, LightGBM and XGBoost).</li>
</ul>
<li><b>Data:</b> pre-built datasets adopted in the above-mentioned analyses, spanning 27 months from 13/03/2020 to 22/06/2022. </li>
<li><b>Data:</b> pre-built datasets adopted in the above-mentioned analyses, spanning 33 months from 20-02-2020 to 26-02-2023. </li>

## Data
The data sources used to gather information about cryptocurrency trends are [CoinMarketCap](https://www.coinmarketcap.com/) and [Binance](https://www.binance.com/).
Expand Down
Binary file added analysis/causality/data/binance_notest.zip
Binary file not shown.
37 changes: 17 additions & 20 deletions analysis/causality/toda_yamamoto.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# File: toda_yamamoto.R
# Description: Implementation of the Toda-Yamamoto (1995) procedure to test for Granger causality.
# File Created: 01/07/2022
# File Created: 01/02/2023
# R Version: 3.6


Expand All @@ -14,26 +14,30 @@ library(zoo)
library(tseries)

# File properties
data_path <- './data/datasets/binance.csv'
wd <- getwd()
data_path <- file.path(wd, 'data/binance_notest.csv')

# Set seed
set.seed(123)
set.seed(1234)

# Load data
data <- read_delim(data_path, delim = '\t')
coins <- names(data[, 2:16])
data <- read_delim(data_path, delim = ',')
coins <- names(data[, 2:17])

# Transform into daily observations
daily_freq <- as.Date(cut(data$Date, 'day'))
new_data <- aggregate(ADA ~ daily_freq, data, mean)
colnames(new_data) <- c('Date', 'ADA')
new_data <- aggregate(DOGE ~ daily_freq, data, mean)
colnames(new_data) <- c('Date', 'XLM')

for (i in coins[2:15])
for (i in coins[2:16])
{ coin <- aggregate(data[[i]] ~ daily_freq, data, mean)
colnames(coin) <- c('Date', i)
new_data <- cbind(new_data, coin[2])
}

# Write output
sink('results/output.txt')

# ADF test
for (i in coins)
{ print(i)
Expand Down Expand Up @@ -65,7 +69,7 @@ for (i in coins)
# 1st order differentiation eliminates the unit root, then the maximum order of integration is 1.

# Set up VAR select using all the possible coin pairs to find optimal lag (lower AIC)
coins_subset <- coins[-c(3, 6)]
coins_subset <- coins[-c(3, 5)]
btc_lags <- c()
eth_lags <- c()

Expand Down Expand Up @@ -95,11 +99,6 @@ V <- VAR(new_data[c('BTC', i)], p=lag+1, type='both')
waldtest1 <- wald.test(b=coef(V$varresult[[1]]), Sigma=vcov(V$varresult[[1]]), Terms=c(seq(2, by=2, length=lag)))
cat(i, 'does not Granger-cause BTC', '\n')
print(waldtest1$result)

# Wald-test 2 (H0: BTC does not Granger-cause alt-coin)
waldtest2 <- wald.test(b=coef(V$varresult[[2]]), Sigma=vcov(V$varresult[[2]]), Terms=c(seq(1, by=2, length=lag)))
cat('BTC does not Granger-cause', i, '\n')
print(waldtest2$result)
}

# ETH
Expand All @@ -113,10 +112,8 @@ V <- VAR(new_data[c('ETH', i)], p=lag+1, type='both')
# Wald-test 1 (H0: alt-coin does not Granger-cause ETH)
waldtest1 <- wald.test(b=coef(V$varresult[[1]]), Sigma=vcov(V$varresult[[1]]), Terms=c(seq(2, by=2, length=lag)))
cat(i, 'does not Granger-cause ETH', '\n')
print(waldtest1$result)

# Wald-test 2 (H0: ETH does not Granger-cause alt-coin)
waldtest2 <- wald.test(b=coef(V$varresult[[2]]), Sigma=vcov(V$varresult[[2]]), Terms=c(seq(1, by=2, length=lag)))
cat('ETH does not Granger-cause', i, '\n')
print(waldtest2$result)
print(waldtest1$result)
}

# Stop writing output
sink()
16 changes: 9 additions & 7 deletions analysis/correlation/ohlc/correlations_daily.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,27 @@
"""
File: correlations_daily.py
Description: Correlation Analysis with daily granularity (source: coinmarketcap dataset).
File Created: 06/01/2022
Description: Correlation Analysis with daily granularity (source: raw.csv dataset).
File Created: 01/01/2023
Python Version: 3.9
"""

# Imports
import sys
import os
import numpy as np
import pandas as pd

# File properties
data_path = './data/datasets/coinmarketcap.csv'
corr_data_path = './analysis/correlation/correlogram_data'
root_dir = sys.path[1]
data_path = os.path.join(root_dir, 'data/datasets/coinmarketcap.csv')
corr_data_path = os.path.join(root_dir, 'correlation/correlogram_data')
column_names = ['coin', 'x', 'y', 'radius', 'arc_begin', 'arc_end', 'color']
arc_begin = 0
arc_end = 360
color = 1

# Extract btc and eth benchmarks
data = pd.read_csv(data_path, sep='\t')
data = pd.read_csv(data_path, sep=',')
data['Date'] = pd.to_datetime(data['Date'])
data.drop(['Market Cap', 'Volume'], axis=1, inplace=True)
data['Avg OHLC Price'] = data[['Open', 'High', 'Low', 'Close']].mean(axis=1)
Expand Down Expand Up @@ -86,5 +88,5 @@
corr_data_eth = pd.concat(list_data_eth)
file_name_btc = os.path.join(corr_data_path, 'daily_btc_OHLC.csv')
file_name_eth = os.path.join(corr_data_path, 'daily_eth_OHLC.csv')
corr_data_btc.to_csv(file_name_btc, sep='\t', encoding='utf-8', index=False)
corr_data_eth.to_csv(file_name_eth, sep='\t', encoding='utf-8', index=False)
corr_data_btc.to_csv(file_name_btc, sep=',', encoding='utf-8', index=False)
corr_data_eth.to_csv(file_name_eth, sep=',', encoding='utf-8', index=False)
16 changes: 9 additions & 7 deletions analysis/correlation/ohlc/correlations_monthly_sw.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,28 @@
"""
File: correlations_monthly_sw.py
Description: Correlation Analysis with monthly granularity and sliding window (source: coinmarketcap dataset).
File Created: 06/01/2022
Description: Correlation Analysis with monthly granularity and sliding window (source: raw.csv dataset).
File Created: 01/01/2023
Python Version: 3.9
"""

# Imports
import sys
import os
import numpy as np
import pandas as pd

# File properties
data_path = './data/datasets/coinmarketcap.csv'
corr_data_path = './analysis/correlation/correlogram_data'
root_dir = sys.path[1]
data_path = os.path.join(root_dir, 'data/datasets/coinmarketcap.csv')
corr_data_path = os.path.join(root_dir, 'correlation/correlogram_data')
window_size = '30D'
column_names = ['coin', 'x', 'y', 'radius', 'arc_begin', 'arc_end', 'color']
arc_begin = 0
arc_end = 360
color = 1

# Extract btc and eth benchmarks
data = pd.read_csv(data_path, sep='\t')
data = pd.read_csv(data_path, sep=',')
data['Date'] = pd.to_datetime(data['Date'])
data.drop(['Market Cap', 'Volume'], axis=1, inplace=True)
data['Avg OHLC Price'] = data[['Open', 'High', 'Low', 'Close']].mean(axis=1)
Expand Down Expand Up @@ -87,5 +89,5 @@
corr_data_eth = pd.concat(list_data_eth)
file_name_btc = os.path.join(corr_data_path, 'monthly_sw_btc_OHLC.csv')
file_name_eth = os.path.join(corr_data_path, 'monthly_sw_eth_OHLC.csv')
corr_data_btc.to_csv(file_name_btc, sep='\t', encoding='utf-8', index=False)
corr_data_eth.to_csv(file_name_eth, sep='\t', encoding='utf-8', index=False)
corr_data_btc.to_csv(file_name_btc, sep=',', encoding='utf-8', index=False)
corr_data_eth.to_csv(file_name_eth, sep=',', encoding='utf-8', index=False)
16 changes: 9 additions & 7 deletions analysis/correlation/ohlc/correlations_monthly_tw.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,28 @@
"""
File: correlations_monthly_tw.py
Description: Correlation Analysis with monthly granularity and thumbing window (source: coinmarketcap dataset).
File Created: 06/01/2022
Description: Correlation Analysis with monthly granularity and thumbing window (source: raw.csv dataset).
File Created: 01/01/2023
Python Version: 3.9
"""

# Imports
import sys
import os
import numpy as np
import pandas as pd

# File properties
data_path = './data/datasets/coinmarketcap.csv'
corr_data_path = './analysis/correlation/correlogram_data'
root_dir = sys.path[1]
data_path = os.path.join(root_dir, 'data/datasets/coinmarketcap.csv')
corr_data_path = os.path.join(root_dir, 'correlation/correlogram_data')
time_frame = 'M'
column_names = ['coin', 'x', 'y', 'radius', 'arc_begin', 'arc_end', 'color']
arc_begin = 0
arc_end = 360
color = 1

# Extract btc and eth benchmarks
data = pd.read_csv(data_path, sep='\t')
data = pd.read_csv(data_path, sep=',')
data['Date'] = pd.to_datetime(data['Date'])
data.drop(['Market Cap', 'Volume'], axis=1, inplace=True)
data['Avg OHLC Price'] = data[['Open', 'High', 'Low', 'Close']].mean(axis=1)
Expand Down Expand Up @@ -87,5 +89,5 @@
corr_data_eth = pd.concat(list_data_eth)
file_name_btc = os.path.join(corr_data_path, 'monthly_tw_btc_OHLC.csv')
file_name_eth = os.path.join(corr_data_path, 'monthly_tw_eth_OHLC.csv')
corr_data_btc.to_csv(file_name_btc, sep='\t', encoding='utf-8', index=False)
corr_data_eth.to_csv(file_name_eth, sep='\t', encoding='utf-8', index=False)
corr_data_btc.to_csv(file_name_btc, sep=',', encoding='utf-8', index=False)
corr_data_eth.to_csv(file_name_eth, sep=',', encoding='utf-8', index=False)
16 changes: 9 additions & 7 deletions analysis/correlation/ohlc/correlations_weekly_sw.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,28 @@
"""
File: correlations_weekly_sw.py
Description: Correlation Analysis with weekly granularity and sliding window (source: coinmarketcap dataset).
File Created: 06/01/2022
Description: Correlation Analysis with weekly granularity and sliding window (source: raw.csv dataset).
File Created: 01/01/2023
Python Version: 3.9
"""

# Imports
import sys
import os
import numpy as np
import pandas as pd

# File properties
data_path = './data/datasets/coinmarketcap.csv'
corr_data_path = './analysis/correlation/correlogram_data'
root_dir = sys.path[1]
data_path = os.path.join(root_dir, 'data/datasets/coinmarketcap.csv')
corr_data_path = os.path.join(root_dir, 'correlation/correlogram_data')
window_size = '7D'
column_names = ['coin', 'x', 'y', 'radius', 'arc_begin', 'arc_end', 'color']
arc_begin = 0
arc_end = 360
color = 1

# Extract btc and eth benchmarks
data = pd.read_csv(data_path, sep='\t')
data = pd.read_csv(data_path, sep=',')
data['Date'] = pd.to_datetime(data['Date'])
data.drop(['Market Cap', 'Volume'], axis=1, inplace=True)
data['Avg OHLC Price'] = data[['Open', 'High', 'Low', 'Close']].mean(axis=1)
Expand Down Expand Up @@ -87,5 +89,5 @@
corr_data_eth = pd.concat(list_data_eth)
file_name_btc = os.path.join(corr_data_path, 'weekly_sw_btc_OHLC.csv')
file_name_eth = os.path.join(corr_data_path, 'weekly_sw_eth_OHLC.csv')
corr_data_btc.to_csv(file_name_btc, sep='\t', encoding='utf-8', index=False)
corr_data_eth.to_csv(file_name_eth, sep='\t', encoding='utf-8', index=False)
corr_data_btc.to_csv(file_name_btc, sep=',', encoding='utf-8', index=False)
corr_data_eth.to_csv(file_name_eth, sep=',', encoding='utf-8', index=False)
16 changes: 9 additions & 7 deletions analysis/correlation/ohlc/correlations_weekly_tw.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,28 @@
"""
File: correlations_weekly_tw.py
Description: Correlation Analysis with weekly granularity and thumbing window (source: coinmarketcap dataset).
File Created: 06/01/2022
Description: Correlation Analysis with weekly granularity and thumbing window (source: raw.csv dataset).
File Created: 01/01/2023
Python Version: 3.9
"""

# Imports
import sys
import os
import numpy as np
import pandas as pd

# File properties
data_path = './data/datasets/coinmarketcap.csv'
corr_data_path = './analysis/correlation/correlogram_data'
root_dir = sys.path[1]
data_path = os.path.join(root_dir, 'data/datasets/coinmarketcap.csv')
corr_data_path = os.path.join(root_dir, 'correlation/correlogram_data')
time_frame = 'W'
column_names = ['coin', 'x', 'y', 'radius', 'arc_begin', 'arc_end', 'color']
arc_begin = 0
arc_end = 360
color = 1

# Extract btc and eth benchmarks
data = pd.read_csv(data_path, sep='\t')
data = pd.read_csv(data_path, sep=',')
data['Date'] = pd.to_datetime(data['Date'])
data.drop(['Market Cap', 'Volume'], axis=1, inplace=True)
data['Avg OHLC Price'] = data[['Open', 'High', 'Low', 'Close']].mean(axis=1)
Expand Down Expand Up @@ -87,5 +89,5 @@
corr_data_eth = pd.concat(list_data_eth)
file_name_btc = os.path.join(corr_data_path, 'weekly_tw_btc_OHLC.csv')
file_name_eth = os.path.join(corr_data_path, 'weekly_tw_eth_OHLC.csv')
corr_data_btc.to_csv(file_name_btc, sep='\t', encoding='utf-8', index=False)
corr_data_eth.to_csv(file_name_eth, sep='\t', encoding='utf-8', index=False)
corr_data_btc.to_csv(file_name_btc, sep=',', encoding='utf-8', index=False)
corr_data_eth.to_csv(file_name_eth, sep=',', encoding='utf-8', index=False)
16 changes: 9 additions & 7 deletions analysis/correlation/volume/correlations_daily.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,27 @@
"""
File: correlations_daily.py
Description: Correlation Analysis with daily granularity (source: coinmarketcap dataset).
File Created: 06/01/2022
Description: Correlation Analysis with daily granularity (source: raw.csv dataset).
File Created: 01/01/2023
Python Version: 3.9
"""

# Imports
import sys
import os
import numpy as np
import pandas as pd

# File properties
data_path = './data/datasets/coinmarketcap.csv'
corr_data_path = './analysis/correlation/correlogram_data'
root_dir = sys.path[1]
data_path = os.path.join(root_dir, 'data/datasets/coinmarketcap.csv')
corr_data_path = os.path.join(root_dir, 'correlation/correlogram_data')
column_names = ['coin', 'x', 'y', 'radius', 'arc_begin', 'arc_end', 'color']
arc_begin = 0
arc_end = 360
color = 1

# Extract btc and eth benchmarks
data = pd.read_csv(data_path, sep='\t')
data = pd.read_csv(data_path, sep=',')
data['Date'] = pd.to_datetime(data['Date'])
data.drop(['Market Cap', 'Open', 'High', 'Low', 'Close'], axis=1, inplace=True)
group = data.groupby('Coin')
Expand Down Expand Up @@ -84,5 +86,5 @@
corr_data_eth = pd.concat(list_data_eth)
file_name_btc = os.path.join(corr_data_path, 'daily_btc_VOL.csv')
file_name_eth = os.path.join(corr_data_path, 'daily_eth_VOL.csv')
corr_data_btc.to_csv(file_name_btc, sep='\t', encoding='utf-8', index=False)
corr_data_eth.to_csv(file_name_eth, sep='\t', encoding='utf-8', index=False)
corr_data_btc.to_csv(file_name_btc, sep=',', encoding='utf-8', index=False)
corr_data_eth.to_csv(file_name_eth, sep=',', encoding='utf-8', index=False)
16 changes: 9 additions & 7 deletions analysis/correlation/volume/correlations_monthly_sw.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,28 @@
"""
File: correlations_monthly_sw.py
Description: Correlation Analysis with monthly granularity and sliding window (source: coinmarketcap dataset).
File Created: 06/01/2022
Description: Correlation Analysis with monthly granularity and sliding window (source: raw.csv dataset).
File Created: 01/01/2023
Python Version: 3.9
"""

# Imports
import sys
import os
import numpy as np
import pandas as pd

# File properties
data_path = './data/datasets/coinmarketcap.csv'
corr_data_path = './analysis/correlation/correlogram_data'
root_dir = sys.path[1]
data_path = os.path.join(root_dir, 'data/datasets/coinmarketcap.csv')
corr_data_path = os.path.join(root_dir, 'correlation/correlogram_data')
window_size = '30D'
column_names = ['coin', 'x', 'y', 'radius', 'arc_begin', 'arc_end', 'color']
arc_begin = 0
arc_end = 360
color = 1

# Extract btc and eth benchmarks
data = pd.read_csv(data_path, sep='\t')
data = pd.read_csv(data_path, sep=',')
data['Date'] = pd.to_datetime(data['Date'])
data.drop(['Market Cap', 'Open', 'High', 'Low', 'Close'], axis=1, inplace=True)
group = data.groupby('Coin')
Expand Down Expand Up @@ -85,5 +87,5 @@
corr_data_eth = pd.concat(list_data_eth)
file_name_btc = os.path.join(corr_data_path, 'monthly_sw_btc_VOL.csv')
file_name_eth = os.path.join(corr_data_path, 'monthly_sw_eth_VOL.csv')
corr_data_btc.to_csv(file_name_btc, sep='\t', encoding='utf-8', index=False)
corr_data_eth.to_csv(file_name_eth, sep='\t', encoding='utf-8', index=False)
corr_data_btc.to_csv(file_name_btc, sep=',', encoding='utf-8', index=False)
corr_data_eth.to_csv(file_name_eth, sep=',', encoding='utf-8', index=False)
Loading

0 comments on commit b754805

Please sign in to comment.