From 428f6b26f21bd82f7f01f14128fe44b9a0e2aa16 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannick=20K=C3=A4lber?=
<104454936+YannickKae@users.noreply.github.com>
Date: Tue, 14 May 2024 19:39:24 +0200
Subject: [PATCH] Create Cleaned Up Code.R
In progress
---
Cleaned Up Code.R | 400 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 400 insertions(+)
create mode 100644 Cleaned Up Code.R
diff --git a/Cleaned Up Code.R b/Cleaned Up Code.R
new file mode 100644
index 0000000..a8958ec
--- /dev/null
+++ b/Cleaned Up Code.R
@@ -0,0 +1,400 @@
+# Required Libraries
+library(shiny)
+library(shinythemes)
+library(shinyMatrix)
+library(quantmod)
+library(RiskPortfolios)
+library(factoextra)
+library(dendextend)
+library(cluster)
+library(RColorBrewer)
+library(shinybusy)
+
+# User Interface
+ui <- fluidPage(
+ theme = shinytheme("darkly"),
+
+ titlePanel("Statistical Learning based Portfolio Optimization"),
+
+ sidebarLayout(
+ sidebarPanel(
+ matrixInput("input_matrix", "Portfolio",
+ value = matrix(data = c("VTSMX", "VGTSX", "VBMFX", "GLD", "GSG"), nrow = 5, ncol = 1, dimnames = list(NULL, c("Ticker"))),
+ rows = list(extend = TRUE, names = FALSE),
+ cols = list(names = TRUE)),
+ checkboxInput("specify_clusters", "Specify number of clusters?", FALSE),
+ conditionalPanel(
+ condition = "input.specify_clusters == true",
+ numericInput("min_clusters", "Minimum number of clusters", value = 2, min = 2),
+ numericInput("max_clusters", "Maximum number of clusters", value = 10, min = 2)
+ ),
+ selectInput("rebalancing_frequency", "Rebalancing / Sampling Frequency",
+ choices = c("Daily" = "daily", "Weekly" = "weekly", "Monthly" = "monthly", "Quarterly" = "quarterly", "Semi-annually" = "semiannually", "Yearly" = "annual"),
+ selected = "weekly"),
+ selectInput("inter_cluster_weighting", "Inter-Cluster Weighting",
+ choices = c("Risk Weighted" = "risk", "Equally Weighted" = "equally")),
+ selectInput("intra_cluster_weighting", "Intra-Cluster Weighting",
+ choices = c("Risk Weighted" = "risk", "Equally Weighted" = "equally")),
+ actionButton("submit", "Optimize"),
+ tags$style(type='text/css', "#info-text { margin-top: 40px; }"),
+ HTML("
Recommendation: It is recommended to use this application on a desktop for the best user experience.
+This web application utilizes the Hierarchical Equal Risk Contribution (HERC) approach, a modern portfolio optimization method developed by Raffinot (2018). It combines the unique strengths of the pioneering Hierarchical Risk Parity (HRP) method by López de Prado (2016) and Hierarchical Clustering-Based Asset Allocation (HCAA) method by Raffinot (2017).
+
+Traditional portfolio optimization suffers from significant instability, primarily due to treating the vector space of return series as a fully connected graph, where each node can potentially substitute for another. This complicated structure magnifies minute estimation errors, leading to unstable solutions.
+Hierarchical clustering-based tree structures address this issue by eliminating irrelevant links.
+
+Your Input:
+
+1) Use the Ticker table above to enter the securities you wish to invest in. The data is sourced from Yahoo Finance.
+2) Specify the range of clusters if you wish to narrow this parameter. If not specified, the Silhouette method will determine the optimal number.
+3) Choose the method to weight the clusters in relation to one another and the assets within each cluster. Risk is defined here as volatility.
+
+Output:
+
+On the right panel, you will find:
+1) A dendrogram illustrating the hierarchical structure of the securities.
+2) A labelled pie chart representing the optimized portfolio.
+3) A table listing each security's portfolio weight and cluster membership.
+4) A graph depicting the cumulative returns of the securities and the optimized portfolio based on the used data.
+5) A bar chart comparing the Sharpe ratios of the securities and the optimized portfolio.
+
+The application automatically identifies the optimal linkage method.
+For more information on this and my other projects, please visit my GitHub.
")
+ ),
+
+ mainPanel(
+ fluidRow(wellPanel(plotOutput("dendrogram_plot"))),
+ fluidRow(wellPanel(plotOutput("portfolio_pie"))),
+ fluidRow(wellPanel(tableOutput("optimized_portfolio"))),
+ fluidRow(wellPanel(plotOutput("cumulative_return_chart"))),
+ fluidRow(wellPanel(plotOutput("sharpe_ratio_barplot"))),
+ )
+ )
+)
+
+
+# Server logic
+server <- function(input, output) {
+
+ observeEvent(input$submit, {
+
+ req(input$input_matrix)
+ show_modal_spinner(color = "white") # show the modal window
+
+ tickers <- clean_tickers(input$input_matrix)
+ if (length(tickers) < 2) {
+ show_error_modal("You need at least two assets for optimization.")
+ return()
+ }
+
+ assets <- get_asset_data(tickers)
+ if (is.null(assets)) return()
+
+ return_matrix <- compute_returns_matrix(assets, input$rebalancing_frequency)
+ transposed_xts <- t(return_matrix)
+
+ best_method <- find_best_linkage_method(transposed_xts)
+ optimal_k <- determine_optimal_clusters(transposed_xts, best_method, input$specify_clusters, input$min_clusters, input$max_clusters)
+
+ final_clust <- perform_clustering(transposed_xts, best_method, optimal_k)
+ dend <- prepare_dendrogram(final_clust, optimal_k)
+
+ risk_contrib <- get_risk_contrib(return_matrix, input$inter_cluster_weighting)
+ cluster_weights, cluster_members <- calculate_cluster_weights(final_clust, risk_contrib, optimal_k)
+
+ risk_contrib <- get_risk_contrib(return_matrix, input$intra_cluster_weighting)
+ final_weights, asset_clusters <- calculate_final_weights(cluster_members, risk_contrib, cluster_weights)
+
+ output$dendrogram_plot <- render_dendrogram(dend)
+ output$optimized_portfolio <- render_optimized_portfolio(final_weights, asset_clusters)
+ output$portfolio_pie <- render_portfolio_pie(final_weights)
+ output$cumulative_return_chart <- render_cumulative_return_chart(return_matrix, final_weights)
+ output$sharpe_ratio_barplot <- render_sharpe_ratio_barplot(return_matrix, final_weights)
+
+ remove_modal_spinner() # remove it when done
+
+ })
+}
+
+shinyApp(ui = ui, server = server)
+
+# Helper functions
+
+clean_tickers <- function(input_matrix) {
+ tickers <- input_matrix[,1]
+ tickers <- tickers[tickers != ""]
+ return(tickers)
+}
+
+show_error_modal <- function(message) {
+ showModal(modalDialog(
+ title = "Input Error",
+ message
+ ))
+}
+
+get_asset_data <- function(tickers) {
+ assets <- list()
+ for (ticker in tickers) {
+ asset_data <- try(getSymbols(ticker, src = "yahoo", from = "1900-01-01", auto.assign = FALSE), silent = TRUE)
+ if (class(asset_data) == "try-error") {
+ show_error_modal(paste("The following ticker is not available: ", ticker))
+ return(NULL)
+ } else {
+ assets[[ticker]] <- asset_data
+ }
+ }
+ return(assets)
+}
+
+compute_returns <- function(price_data, frequency) {
+ switch(frequency,
+ "daily" = dailyReturn(price_data),
+ "weekly" = {price_data <- price_data[.indexwday(price_data) == 5]
+ weeklyReturn(price_data)},
+ "monthly" = monthlyReturn(price_data, leading = FALSE),
+ "quarterly" = quarterlyReturn(price_data, leading = FALSE),
+ "annual" = annualReturn(price_data, leading = FALSE),
+ stop("Invalid frequency"))
+}
+
+compute_returns_matrix <- function(assets, frequency) {
+ returns_list <- lapply(assets, function(asset) compute_returns(asset[, 6], frequency))
+ return_matrix <- na.omit(do.call("merge.xts", returns_list))
+ colnames(return_matrix) <- names(returns_list)
+ return(return_matrix)
+}
+
+find_best_linkage_method <- function(transposed_xts) {
+ m <- c("single", "average", "complete", "ward")
+ names(m) <- c("average", "single", "complete", "ward")
+ ac <- function(x) agnes(transposed_xts, method = x)$ac
+ ac_values <- sapply(m, ac)
+ best_method <- names(which.max(ac_values))
+ return(best_method)
+}
+
+determine_optimal_clusters <- function(transposed_xts, best_method, specify_clusters, min_clusters, max_clusters) {
+ mycluster <- function(x, k) {
+ d <- dist(x)
+ hc <- hclust(d, method = best_method)
+ cutree(hc, k = k)
+ }
+
+ if (specify_clusters) {
+ if (min_clusters > (nrow(transposed_xts) - 1)) {
+ show_error_modal("The minimum number of clusters must be less than the number of tickers.")
+ return()
+ }
+ max_clusters <- min(max_clusters, nrow(transposed_xts) - 1)
+ } else {
+ min_clusters <- 2
+ max_clusters <- nrow(transposed_xts) - 1
+ }
+
+ sil_width <- numeric(max_clusters)
+ for (k in min_clusters:max_clusters) {
+ cluster_assignments <- mycluster(transposed_xts, k)
+ sil_results <- silhouette(cluster_assignments, dist(transposed_xts))
+ sil_width[k] <- mean(sil_results[, 3])
+ }
+
+ optimal_k <- which.max(sil_width)
+ return(optimal_k)
+}
+
+perform_clustering <- function(transposed_xts, best_method, optimal_k) {
+ d <- dist(transposed_xts)
+ final_clust <- hclust(d, method = best_method)
+ return(final_clust)
+}
+
+prepare_dendrogram <- function(final_clust, optimal_k) {
+ dend <- as.dendrogram(final_clust)
+ if (optimal_k > 1) {
+ dend <- color_branches(dend, k = optimal_k)
+ dend <- color_labels(dend, k = optimal_k)
+ }
+ dend <- hang.dendrogram(dend)
+ dend <- set(dend, "labels_cex", 1.25)
+ dend <- set(dend, "branches_lwd", 4)
+ dend <- set(dend, "branches_lty", 1)
+ return(dend)
+}
+
+get_risk_contrib <- function(return_matrix, weighting_method) {
+ if (weighting_method == "risk") {
+ risk_contrib <- apply(return_matrix, 2, sd)
+ } else if (weighting_method == "equally") {
+ asset_names <- colnames(return_matrix)
+ risk_contrib <- rep(1, length(asset_names))
+ names(risk_contrib) <- asset_names
+ } else {
+ stop("Invalid option for weighting method")
+ }
+ return(risk_contrib)
+}
+
+calculate_cluster_weights <- function(final_clust, risk_contrib, optimal_k) {
+ cluster_weights <- c()
+ cluster_members <- c()
+ cluster_labels <- c()
+
+ while (length(cluster_weights) < optimal_k) {
+ if (length(cluster_weights) < 1) {
+ childs <- final_clust$merge[nrow(final_clust$merge), ]
+ rc_1 <- median(risk_contrib[get_cluster_members(childs[1], final_clust$merge)]) / (median(risk_contrib[get_cluster_members(childs[1], final_clust$merge)]) + median(risk_contrib[get_cluster_members(childs[2], final_clust$merge)]))
+ rc_2 <- 1 - rc_1
+ w_1 <- rc_2
+ w_2 <- rc_1
+ cluster_weights <- c(cluster_weights, w_1, w_2)
+ m_1 <- get_cluster_members(childs[1], final_clust$merge)
+ m_2 <- get_cluster_members(childs[2], final_clust$merge)
+ cluster_members <- c(cluster_members, list(m_1), list(m_2))
+ cluster_labels <- c(childs)
+ old_max_row <- nrow(final_clust$merge)
+ new_max_row <- old_max_row - 1
+ } else {
+ col_num <- which(cluster_labels == new_max_row)
+ childs <- final_clust$merge[new_max_row, ]
+ rc_1 <- median(risk_contrib[get_cluster_members(childs[1], final_clust$merge)]) / (median(risk_contrib[get_cluster_members(childs[1], final_clust$merge)]) + median(risk_contrib[get_cluster_members(childs[2], final_clust$merge)]))
+ rc_2 <- 1 - rc_1
+ m_1 <- get_cluster_members(childs[1], final_clust$merge)
+ m_2 <- get_cluster_members(childs[2], final_clust$merge)
+ if (col_num == 1) {
+ w_1 <- rc_2 * cluster_weights[1]
+ w_2 <- cluster_weights[1] - w_1
+ cluster_weights <- c(w_1, w_2, cluster_weights[2:length(cluster_weights)])
+ cluster_members <- c(list(m_1), list(m_2), cluster_members[2:length(cluster_members)])
+ cluster_labels <- c(childs, cluster_labels[2:length(cluster_labels)])
+ } else if (col_num == length(cluster_weights)) {
+ w_1 <- rc_2 * cluster_weights[length(cluster_weights)]
+ w_2 <- cluster_weights[length(cluster_weights)] - w_1
+ cluster_weights <- c(cluster_weights[1:(length(cluster_weights) - 1)], w_1, w_2)
+ cluster_members <- c(cluster_members[1:(length(cluster_members) - 1)], list(m_1), list(m_2))
+ cluster_labels <- c(cluster_labels[1:(length(cluster_labels) - 1)], childs)
+ } else {
+ w_1 <- rc_2 * cluster_weights[col_num]
+ w_2 <- cluster_weights[col_num] - w_1
+ cluster_weights <- c(cluster_weights[1:(length(cluster_weights) - 1)], w_1, w_2, cluster_weights[(col_num + 1):length(cluster_weights)])
+ cluster_members <- c(cluster_members[1:(length(cluster_members) - 1)], list(m_1), list(m_2), cluster_members[(col_num + 1):length(cluster_members)])
+ cluster_labels <- c(cluster_labels[1:(length(cluster_labels) - 1)], childs, cluster_labels[(col_num + 1):length(cluster_labels)])
+ }
+ old_max_row <- new_max_row
+ new_max_row <- old_max_row - 1
+ col_num <- 1
+ }
+ }
+ return(list(cluster_weights, cluster_members))
+}
+
+calculate_final_weights <- function(cluster_members, risk_contrib, cluster_weights) {
+ naive_risk_parity <- function(cluster, risk_contrib) {
+ cluster_risk_contrib <- risk_contrib[cluster]
+ inverse_risk <- 1 / cluster_risk_contrib
+ nrp_weights <- inverse_risk / sum(inverse_risk)
+ return(nrp_weights)
+ }
+
+ final_weights <- list()
+ asset_clusters <- list()
+ for (i in 1:length(cluster_members)) {
+ cluster <- colnames(return_matrix)[unlist(cluster_members[[i]])]
+ nrp_weights <- naive_risk_parity(cluster, risk_contrib)
+ final_weights[[i]] <- nrp_weights * cluster_weights[i]
+ asset_clusters[[i]] <- rep(i, length(nrp_weights))
+ }
+
+ final_weights <- unlist(final_weights)
+ asset_clusters <- unlist(asset_clusters)
+ return(list(final_weights, asset_clusters))
+}
+
+render_dendrogram <- function(dend) {
+ renderPlot({
+ par(mar = c(mar = c(5, 5, 5, 10)))
+ plot(dend, main = "Hierarchical Asset Structure", cex.main = 1.25)
+ })
+}
+
+render_optimized_portfolio <- function(final_weights, asset_clusters) {
+ renderTable({
+ df <- data.frame(
+ Weight = paste0(round(final_weights * 100), "%"),
+ Cluster = as.character(asset_clusters),
+ row.names = names(final_weights)
+ )
+ df_transposed <- t(df)
+ return(df_transposed)
+ }, rownames = TRUE)
+}
+
+render_portfolio_pie <- function(final_weights) {
+ df <- as.data.frame(final_weights)
+ labels <- rownames(df)
+ values <- df$final_weights
+ pie_data <- data.frame(labels, values)
+ pie_data <- pie_data[order(pie_data$values, decreasing = TRUE), ]
+
+ chart <- ggplot(pie_data, aes(x = "", y = values, fill = labels)) +
+ geom_bar(stat = "identity", width = 1) +
+ coord_polar("y", start = 0) +
+ scale_fill_brewer(palette = "Set1") +
+ theme_void() +
+ theme(legend.text = element_text(size = 15))
+
+ renderPlot({ chart })
+}
+
+render_cumulative_return_chart <- function(return_matrix, final_weights) {
+ renderPlot({
+ weighted_returns <- return_matrix * final_weights
+ portfolio_returns <- rowSums(weighted_returns)
+ Portfolio <- return_matrix
+ Portfolio$Portfolio_Returns <- portfolio_returns
+
+ cumulative_returns_portfolio <- cumprod(1 + Portfolio$Portfolio_Returns)
+ cumulative_returns_portfolio <- as.xts(cumulative_returns_portfolio)
+ cumulative_returns_assets <- cumprod(1 + return_matrix)
+ cumulative_returns_assets <- as.xts(cumulative_returns_assets)
+
+ colors <- brewer.pal(n = ncol(cumulative_returns_assets), name = "Set1")
+ matplot(index(cumulative_returns_assets), cumulative_returns_assets, type = 'l', lty = 1, lwd = 1, log = 'y', col = colors, xlab = "", ylab = "", yaxt = 'n')
+ lines(index(cumulative_returns_portfolio), cumulative_returns_portfolio, col = "black", lwd = 2)
+ title(main = "Cumulative Returns", col.main = "black", font.main = 4)
+ legend("topleft", legend = c(colnames(cumulative_returns_assets), "Portfolio"), col = c(colors, "black"), lty = 1, cex = 1, lwd = 2, bty = "n", box.col = "white")
+ abline(h = 1, col = "black", lwd = 2, lty = 2)
+ at = axTicks(2)
+ labels = scales::percent(at)
+ axis(2, at = at, labels = labels, las = 1)
+ })
+}
+
+render_sharpe_ratio_barplot <- function(return_matrix, final_weights) {
+ renderPlot({
+ weighted_returns <- return_matrix * final_weights
+ portfolio_returns <- rowSums(weighted_returns)
+ Portfolio <- return_matrix
+ Portfolio$Portfolio <- portfolio_returns
+ mean_returns <- apply(Portfolio, 2, mean)
+ sd_returns <- apply(Portfolio, 2, sd)
+ sharpe_ratio <- mean_returns / sd_returns
+ sharpe_data <- data.frame(Asset = names(sharpe_ratio), Sharpe = sharpe_ratio)
+
+ color_vector <- ifelse(names(sharpe_ratio) != "Portfolio", brewer.pal(length(sharpe_ratio) - 1, "Set1"), "black")
+ color_vector <- color_vector[order(sharpe_ratio, decreasing = TRUE)]
+ sharpe_data <- sharpe_data[order(-sharpe_data$Sharpe), ]
+
+ ggplot(sharpe_data, aes(x = reorder(Asset, Sharpe), y = Sharpe, fill = Asset)) +
+ geom_bar(stat = "identity", width = 0.5, color = "black") +
+ scale_fill_manual(values = color_vector) +
+ labs(title = "Sharpe Ratio", x = "", y = "Sharpe Ratio") +
+ theme_minimal() +
+ theme(legend.position = "none",
+ plot.title = element_text(hjust = 0.5, size = 20),
+ axis.text = element_text(size = 14),
+ axis.title = element_text(size = 14))
+ })
+}
+
+shinyApp(ui = ui, server = server)