From 428f6b26f21bd82f7f01f14128fe44b9a0e2aa16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannick=20K=C3=A4lber?= <104454936+YannickKae@users.noreply.github.com> Date: Tue, 14 May 2024 19:39:24 +0200 Subject: [PATCH] Create Cleaned Up Code.R In progress --- Cleaned Up Code.R | 400 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 400 insertions(+) create mode 100644 Cleaned Up Code.R diff --git a/Cleaned Up Code.R b/Cleaned Up Code.R new file mode 100644 index 0000000..a8958ec --- /dev/null +++ b/Cleaned Up Code.R @@ -0,0 +1,400 @@ +# Required Libraries +library(shiny) +library(shinythemes) +library(shinyMatrix) +library(quantmod) +library(RiskPortfolios) +library(factoextra) +library(dendextend) +library(cluster) +library(RColorBrewer) +library(shinybusy) + +# User Interface +ui <- fluidPage( + theme = shinytheme("darkly"), + + titlePanel("Statistical Learning based Portfolio Optimization"), + + sidebarLayout( + sidebarPanel( + matrixInput("input_matrix", "Portfolio", + value = matrix(data = c("VTSMX", "VGTSX", "VBMFX", "GLD", "GSG"), nrow = 5, ncol = 1, dimnames = list(NULL, c("Ticker"))), + rows = list(extend = TRUE, names = FALSE), + cols = list(names = TRUE)), + checkboxInput("specify_clusters", "Specify number of clusters?", FALSE), + conditionalPanel( + condition = "input.specify_clusters == true", + numericInput("min_clusters", "Minimum number of clusters", value = 2, min = 2), + numericInput("max_clusters", "Maximum number of clusters", value = 10, min = 2) + ), + selectInput("rebalancing_frequency", "Rebalancing / Sampling Frequency", + choices = c("Daily" = "daily", "Weekly" = "weekly", "Monthly" = "monthly", "Quarterly" = "quarterly", "Semi-annually" = "semiannually", "Yearly" = "annual"), + selected = "weekly"), + selectInput("inter_cluster_weighting", "Inter-Cluster Weighting", + choices = c("Risk Weighted" = "risk", "Equally Weighted" = "equally")), + selectInput("intra_cluster_weighting", "Intra-Cluster Weighting", + choices = c("Risk Weighted" = "risk", "Equally Weighted" = "equally")), + actionButton("submit", "Optimize"), + tags$style(type='text/css', "#info-text { margin-top: 40px; }"), + HTML("

Recommendation: It is recommended to use this application on a desktop for the best user experience.

+This web application utilizes the Hierarchical Equal Risk Contribution (HERC) approach, a modern portfolio optimization method developed by Raffinot (2018). It combines the unique strengths of the pioneering Hierarchical Risk Parity (HRP) method by López de Prado (2016) and Hierarchical Clustering-Based Asset Allocation (HCAA) method by Raffinot (2017).

+ +Traditional portfolio optimization suffers from significant instability, primarily due to treating the vector space of return series as a fully connected graph, where each node can potentially substitute for another. This complicated structure magnifies minute estimation errors, leading to unstable solutions. +Hierarchical clustering-based tree structures address this issue by eliminating irrelevant links.


+ +Your Input:

+ +1) Use the Ticker table above to enter the securities you wish to invest in. The data is sourced from Yahoo Finance.
+2) Specify the range of clusters if you wish to narrow this parameter. If not specified, the Silhouette method will determine the optimal number.
+3) Choose the method to weight the clusters in relation to one another and the assets within each cluster. Risk is defined here as volatility.

+ +Output:

+ +On the right panel, you will find:
+1) A dendrogram illustrating the hierarchical structure of the securities.
+2) A labelled pie chart representing the optimized portfolio.
+3) A table listing each security's portfolio weight and cluster membership.
+4) A graph depicting the cumulative returns of the securities and the optimized portfolio based on the used data.
+5) A bar chart comparing the Sharpe ratios of the securities and the optimized portfolio.

+ +The application automatically identifies the optimal linkage method.


+For more information on this and my other projects, please visit my GitHub.

") + ), + + mainPanel( + fluidRow(wellPanel(plotOutput("dendrogram_plot"))), + fluidRow(wellPanel(plotOutput("portfolio_pie"))), + fluidRow(wellPanel(tableOutput("optimized_portfolio"))), + fluidRow(wellPanel(plotOutput("cumulative_return_chart"))), + fluidRow(wellPanel(plotOutput("sharpe_ratio_barplot"))), + ) + ) +) + + +# Server logic +server <- function(input, output) { + + observeEvent(input$submit, { + + req(input$input_matrix) + show_modal_spinner(color = "white") # show the modal window + + tickers <- clean_tickers(input$input_matrix) + if (length(tickers) < 2) { + show_error_modal("You need at least two assets for optimization.") + return() + } + + assets <- get_asset_data(tickers) + if (is.null(assets)) return() + + return_matrix <- compute_returns_matrix(assets, input$rebalancing_frequency) + transposed_xts <- t(return_matrix) + + best_method <- find_best_linkage_method(transposed_xts) + optimal_k <- determine_optimal_clusters(transposed_xts, best_method, input$specify_clusters, input$min_clusters, input$max_clusters) + + final_clust <- perform_clustering(transposed_xts, best_method, optimal_k) + dend <- prepare_dendrogram(final_clust, optimal_k) + + risk_contrib <- get_risk_contrib(return_matrix, input$inter_cluster_weighting) + cluster_weights, cluster_members <- calculate_cluster_weights(final_clust, risk_contrib, optimal_k) + + risk_contrib <- get_risk_contrib(return_matrix, input$intra_cluster_weighting) + final_weights, asset_clusters <- calculate_final_weights(cluster_members, risk_contrib, cluster_weights) + + output$dendrogram_plot <- render_dendrogram(dend) + output$optimized_portfolio <- render_optimized_portfolio(final_weights, asset_clusters) + output$portfolio_pie <- render_portfolio_pie(final_weights) + output$cumulative_return_chart <- render_cumulative_return_chart(return_matrix, final_weights) + output$sharpe_ratio_barplot <- render_sharpe_ratio_barplot(return_matrix, final_weights) + + remove_modal_spinner() # remove it when done + + }) +} + +shinyApp(ui = ui, server = server) + +# Helper functions + +clean_tickers <- function(input_matrix) { + tickers <- input_matrix[,1] + tickers <- tickers[tickers != ""] + return(tickers) +} + +show_error_modal <- function(message) { + showModal(modalDialog( + title = "Input Error", + message + )) +} + +get_asset_data <- function(tickers) { + assets <- list() + for (ticker in tickers) { + asset_data <- try(getSymbols(ticker, src = "yahoo", from = "1900-01-01", auto.assign = FALSE), silent = TRUE) + if (class(asset_data) == "try-error") { + show_error_modal(paste("The following ticker is not available: ", ticker)) + return(NULL) + } else { + assets[[ticker]] <- asset_data + } + } + return(assets) +} + +compute_returns <- function(price_data, frequency) { + switch(frequency, + "daily" = dailyReturn(price_data), + "weekly" = {price_data <- price_data[.indexwday(price_data) == 5] + weeklyReturn(price_data)}, + "monthly" = monthlyReturn(price_data, leading = FALSE), + "quarterly" = quarterlyReturn(price_data, leading = FALSE), + "annual" = annualReturn(price_data, leading = FALSE), + stop("Invalid frequency")) +} + +compute_returns_matrix <- function(assets, frequency) { + returns_list <- lapply(assets, function(asset) compute_returns(asset[, 6], frequency)) + return_matrix <- na.omit(do.call("merge.xts", returns_list)) + colnames(return_matrix) <- names(returns_list) + return(return_matrix) +} + +find_best_linkage_method <- function(transposed_xts) { + m <- c("single", "average", "complete", "ward") + names(m) <- c("average", "single", "complete", "ward") + ac <- function(x) agnes(transposed_xts, method = x)$ac + ac_values <- sapply(m, ac) + best_method <- names(which.max(ac_values)) + return(best_method) +} + +determine_optimal_clusters <- function(transposed_xts, best_method, specify_clusters, min_clusters, max_clusters) { + mycluster <- function(x, k) { + d <- dist(x) + hc <- hclust(d, method = best_method) + cutree(hc, k = k) + } + + if (specify_clusters) { + if (min_clusters > (nrow(transposed_xts) - 1)) { + show_error_modal("The minimum number of clusters must be less than the number of tickers.") + return() + } + max_clusters <- min(max_clusters, nrow(transposed_xts) - 1) + } else { + min_clusters <- 2 + max_clusters <- nrow(transposed_xts) - 1 + } + + sil_width <- numeric(max_clusters) + for (k in min_clusters:max_clusters) { + cluster_assignments <- mycluster(transposed_xts, k) + sil_results <- silhouette(cluster_assignments, dist(transposed_xts)) + sil_width[k] <- mean(sil_results[, 3]) + } + + optimal_k <- which.max(sil_width) + return(optimal_k) +} + +perform_clustering <- function(transposed_xts, best_method, optimal_k) { + d <- dist(transposed_xts) + final_clust <- hclust(d, method = best_method) + return(final_clust) +} + +prepare_dendrogram <- function(final_clust, optimal_k) { + dend <- as.dendrogram(final_clust) + if (optimal_k > 1) { + dend <- color_branches(dend, k = optimal_k) + dend <- color_labels(dend, k = optimal_k) + } + dend <- hang.dendrogram(dend) + dend <- set(dend, "labels_cex", 1.25) + dend <- set(dend, "branches_lwd", 4) + dend <- set(dend, "branches_lty", 1) + return(dend) +} + +get_risk_contrib <- function(return_matrix, weighting_method) { + if (weighting_method == "risk") { + risk_contrib <- apply(return_matrix, 2, sd) + } else if (weighting_method == "equally") { + asset_names <- colnames(return_matrix) + risk_contrib <- rep(1, length(asset_names)) + names(risk_contrib) <- asset_names + } else { + stop("Invalid option for weighting method") + } + return(risk_contrib) +} + +calculate_cluster_weights <- function(final_clust, risk_contrib, optimal_k) { + cluster_weights <- c() + cluster_members <- c() + cluster_labels <- c() + + while (length(cluster_weights) < optimal_k) { + if (length(cluster_weights) < 1) { + childs <- final_clust$merge[nrow(final_clust$merge), ] + rc_1 <- median(risk_contrib[get_cluster_members(childs[1], final_clust$merge)]) / (median(risk_contrib[get_cluster_members(childs[1], final_clust$merge)]) + median(risk_contrib[get_cluster_members(childs[2], final_clust$merge)])) + rc_2 <- 1 - rc_1 + w_1 <- rc_2 + w_2 <- rc_1 + cluster_weights <- c(cluster_weights, w_1, w_2) + m_1 <- get_cluster_members(childs[1], final_clust$merge) + m_2 <- get_cluster_members(childs[2], final_clust$merge) + cluster_members <- c(cluster_members, list(m_1), list(m_2)) + cluster_labels <- c(childs) + old_max_row <- nrow(final_clust$merge) + new_max_row <- old_max_row - 1 + } else { + col_num <- which(cluster_labels == new_max_row) + childs <- final_clust$merge[new_max_row, ] + rc_1 <- median(risk_contrib[get_cluster_members(childs[1], final_clust$merge)]) / (median(risk_contrib[get_cluster_members(childs[1], final_clust$merge)]) + median(risk_contrib[get_cluster_members(childs[2], final_clust$merge)])) + rc_2 <- 1 - rc_1 + m_1 <- get_cluster_members(childs[1], final_clust$merge) + m_2 <- get_cluster_members(childs[2], final_clust$merge) + if (col_num == 1) { + w_1 <- rc_2 * cluster_weights[1] + w_2 <- cluster_weights[1] - w_1 + cluster_weights <- c(w_1, w_2, cluster_weights[2:length(cluster_weights)]) + cluster_members <- c(list(m_1), list(m_2), cluster_members[2:length(cluster_members)]) + cluster_labels <- c(childs, cluster_labels[2:length(cluster_labels)]) + } else if (col_num == length(cluster_weights)) { + w_1 <- rc_2 * cluster_weights[length(cluster_weights)] + w_2 <- cluster_weights[length(cluster_weights)] - w_1 + cluster_weights <- c(cluster_weights[1:(length(cluster_weights) - 1)], w_1, w_2) + cluster_members <- c(cluster_members[1:(length(cluster_members) - 1)], list(m_1), list(m_2)) + cluster_labels <- c(cluster_labels[1:(length(cluster_labels) - 1)], childs) + } else { + w_1 <- rc_2 * cluster_weights[col_num] + w_2 <- cluster_weights[col_num] - w_1 + cluster_weights <- c(cluster_weights[1:(length(cluster_weights) - 1)], w_1, w_2, cluster_weights[(col_num + 1):length(cluster_weights)]) + cluster_members <- c(cluster_members[1:(length(cluster_members) - 1)], list(m_1), list(m_2), cluster_members[(col_num + 1):length(cluster_members)]) + cluster_labels <- c(cluster_labels[1:(length(cluster_labels) - 1)], childs, cluster_labels[(col_num + 1):length(cluster_labels)]) + } + old_max_row <- new_max_row + new_max_row <- old_max_row - 1 + col_num <- 1 + } + } + return(list(cluster_weights, cluster_members)) +} + +calculate_final_weights <- function(cluster_members, risk_contrib, cluster_weights) { + naive_risk_parity <- function(cluster, risk_contrib) { + cluster_risk_contrib <- risk_contrib[cluster] + inverse_risk <- 1 / cluster_risk_contrib + nrp_weights <- inverse_risk / sum(inverse_risk) + return(nrp_weights) + } + + final_weights <- list() + asset_clusters <- list() + for (i in 1:length(cluster_members)) { + cluster <- colnames(return_matrix)[unlist(cluster_members[[i]])] + nrp_weights <- naive_risk_parity(cluster, risk_contrib) + final_weights[[i]] <- nrp_weights * cluster_weights[i] + asset_clusters[[i]] <- rep(i, length(nrp_weights)) + } + + final_weights <- unlist(final_weights) + asset_clusters <- unlist(asset_clusters) + return(list(final_weights, asset_clusters)) +} + +render_dendrogram <- function(dend) { + renderPlot({ + par(mar = c(mar = c(5, 5, 5, 10))) + plot(dend, main = "Hierarchical Asset Structure", cex.main = 1.25) + }) +} + +render_optimized_portfolio <- function(final_weights, asset_clusters) { + renderTable({ + df <- data.frame( + Weight = paste0(round(final_weights * 100), "%"), + Cluster = as.character(asset_clusters), + row.names = names(final_weights) + ) + df_transposed <- t(df) + return(df_transposed) + }, rownames = TRUE) +} + +render_portfolio_pie <- function(final_weights) { + df <- as.data.frame(final_weights) + labels <- rownames(df) + values <- df$final_weights + pie_data <- data.frame(labels, values) + pie_data <- pie_data[order(pie_data$values, decreasing = TRUE), ] + + chart <- ggplot(pie_data, aes(x = "", y = values, fill = labels)) + + geom_bar(stat = "identity", width = 1) + + coord_polar("y", start = 0) + + scale_fill_brewer(palette = "Set1") + + theme_void() + + theme(legend.text = element_text(size = 15)) + + renderPlot({ chart }) +} + +render_cumulative_return_chart <- function(return_matrix, final_weights) { + renderPlot({ + weighted_returns <- return_matrix * final_weights + portfolio_returns <- rowSums(weighted_returns) + Portfolio <- return_matrix + Portfolio$Portfolio_Returns <- portfolio_returns + + cumulative_returns_portfolio <- cumprod(1 + Portfolio$Portfolio_Returns) + cumulative_returns_portfolio <- as.xts(cumulative_returns_portfolio) + cumulative_returns_assets <- cumprod(1 + return_matrix) + cumulative_returns_assets <- as.xts(cumulative_returns_assets) + + colors <- brewer.pal(n = ncol(cumulative_returns_assets), name = "Set1") + matplot(index(cumulative_returns_assets), cumulative_returns_assets, type = 'l', lty = 1, lwd = 1, log = 'y', col = colors, xlab = "", ylab = "", yaxt = 'n') + lines(index(cumulative_returns_portfolio), cumulative_returns_portfolio, col = "black", lwd = 2) + title(main = "Cumulative Returns", col.main = "black", font.main = 4) + legend("topleft", legend = c(colnames(cumulative_returns_assets), "Portfolio"), col = c(colors, "black"), lty = 1, cex = 1, lwd = 2, bty = "n", box.col = "white") + abline(h = 1, col = "black", lwd = 2, lty = 2) + at = axTicks(2) + labels = scales::percent(at) + axis(2, at = at, labels = labels, las = 1) + }) +} + +render_sharpe_ratio_barplot <- function(return_matrix, final_weights) { + renderPlot({ + weighted_returns <- return_matrix * final_weights + portfolio_returns <- rowSums(weighted_returns) + Portfolio <- return_matrix + Portfolio$Portfolio <- portfolio_returns + mean_returns <- apply(Portfolio, 2, mean) + sd_returns <- apply(Portfolio, 2, sd) + sharpe_ratio <- mean_returns / sd_returns + sharpe_data <- data.frame(Asset = names(sharpe_ratio), Sharpe = sharpe_ratio) + + color_vector <- ifelse(names(sharpe_ratio) != "Portfolio", brewer.pal(length(sharpe_ratio) - 1, "Set1"), "black") + color_vector <- color_vector[order(sharpe_ratio, decreasing = TRUE)] + sharpe_data <- sharpe_data[order(-sharpe_data$Sharpe), ] + + ggplot(sharpe_data, aes(x = reorder(Asset, Sharpe), y = Sharpe, fill = Asset)) + + geom_bar(stat = "identity", width = 0.5, color = "black") + + scale_fill_manual(values = color_vector) + + labs(title = "Sharpe Ratio", x = "", y = "Sharpe Ratio") + + theme_minimal() + + theme(legend.position = "none", + plot.title = element_text(hjust = 0.5, size = 20), + axis.text = element_text(size = 14), + axis.title = element_text(size = 14)) + }) +} + +shinyApp(ui = ui, server = server)