--- title: "Combining predictive distributions of electricity prices: Does minimizing the CRPS lead to optimal decisions?" author: "Weronika Nitka, Rafał Weron" output: html_notebook --- ```{r init} library(tidyverse) library(lubridate) library(gridExtra) library(cowplot) library(forecast) library(profoc) library(Rglpk) library(zoo) library(latex2exp) save <- FALSE # should plots in pdf be saved ``` ### Session info: R version 4.3.1 (2023-06-16 ucrt) Platform: x86_64-w64-mingw32/x64 (64-bit) Running under: Windows 10 x64 (build 19045) Matrix products: default locale: [1] LC_COLLATE=Polish_Poland.utf8 LC_CTYPE=Polish_Poland.utf8 LC_MONETARY=Polish_Poland.utf8 LC_NUMERIC=C LC_TIME=Polish_Poland.utf8 time zone: Europe/Warsaw tzcode source: internal attached base packages: [1] stats graphics grDevices utils datasets methods base other attached packages: [1] zoo_1.8-12 Rglpk_0.6-5 slam_0.1-50 profoc_1.2.0 forecast_8.21 cowplot_1.1.1 gridExtra_2.3 lubridate_1.9.2 forcats_1.0.0 stringr_1.5.0 dplyr_1.1.2 [12] purrr_1.0.1 readr_2.1.4 tidyr_1.3.0 tibble_3.2.1 ggplot2_3.4.2 tidyverse_2.0.0 ```{r read data} load("./NN_forecasts.RData") ``` ```{r check data} names(epex_nn)[1:12] <- c('LEAR_QRA', 'LEAR_QRM', 'DNN_QRA', 'DNN_QRM', 'DDNN_JSU_1', 'DDNN_JSU_2', 'DDNN_JSU_3', 'DDNN_JSU_4', 'DDNN_N_1', 'DDNN_N_2', 'DDNN_N_3', 'DDNN_N_4') names(epex_nn) ``` # Averaging functions ```{r naive averaging function} simple_average <- function(list, indices = c(1:12)) { # naive averaging with equal weights of forecasts from list with given indices simple_average <- Reduce("+", list[indices])/length(indices) return(simple_average) } ``` ```{r crps averaging function} crps_averaging_hourly <- function(dane, prob_grid, indices = c(1:12)) { # CRPS online averaging of forecasts from list with given indices T <- length(dane$dates)/24 # Observations D <- 24 # Variables (hours) N <- length(indices) # Experts P <- length(prob_grid) # Quantiles #array of expert predictions experts <- array(dim = c(T, D, P, N)) for (d in 1:D) { #iterate over hours for (e in 1:N) { #iterate over experts experts[, d, , e] <- dane[[indices[e]]][seq(d, length(dane$dates) - (24 - d), by = 24), ] } } actual_hourly <- matrix(dane$actual, ncol = 24, byrow = TRUE) # stepwise online weight adjustment # default settings: method="bewa", no B+P pr smoothing, no B+P mv smoothing, no forgetting, no fixed share crps_model <- online( y = actual_hourly, experts = experts, loss_function = "quantile", method = "boa", p_smooth_pr = list(lambda = 2^c(-5:10)), # forget_regret = c(2^c(-12:-2), 0), tau = prob_grid ) #reshape predictions from 24 to one column predictions <- matrix(nrow = T*D, ncol = P) for (d in 1:D) { predictions[seq(d, length(dane$dates) - (24 - d), by = 24), ] <- crps_model$predictions[, d, ] } return(list(model=crps_model, predictions=predictions)) } ``` # Evaluation ```{r error and helper functions} rmse <- function(forc, true) { #helper function to compute rmse error, hours = columns err <- sqrt(mean((forc - true)^2)) names(err) <- "RMSE" return(err) } mae <- function(forc, true) { #helper function to compute mae error, hours = columns err <- mean(abs(forc - true)) names(err) <- "MAE" return(err) } pinball <- function(forc, true, quants) { #helper function to calculate pinball score, columns = quantiles temp <- true - forc pinball_score <- numeric(length(quants)) for (i in 1:length(quants)) { pinball_score[i] <- mean(abs(quants[i]*temp[, i] - (temp[, i] < 0)*temp[, i])) } return(pinball_score) } pinball_list_f <- function(data, quants=quantiles, exclude=c()) { # computes a list of per-quantile pinball scores for each single model + ensembles # exclude - list of model/ensemble names to exclude pinball_list <- list() model_indices <- which(!(names(data) %in% c(c('dates', 'hours', 'actual'), exclude))) j <- 1 for (i in model_indices) { pinball_list[[j]] <- pinball(data[[i]], data$actual, quants) j <- j + 1 } names(pinball_list) <- names(data)[model_indices] return(pinball_list) } aggregated_pinball <- function(pinball_list) { #returns average pinball scores (CRPS score) for each entry in pinball_list return(lapply(pinball_list, mean)) } ``` ```{r crps in time functions} crps <- function(forc, true, quants) { # helper function to calculate CRPS for a vector of true values true and a matrix of quantile predictions forc for equidistant quantiles quants # columns = quantiles # returns a vector of numbers - CRPS scores over time crps_score <- matrix(nrow=length(true), ncol=length(quants)) for (i in 1:length(quants)) { temp <- true - forc[, i] crps_score[, i] <- (quants[i]*temp - (temp < 0)*temp) } return(apply(crps_score, 1, mean)) } crps_list_f <- function(data, quants=quantiles, exclude=c()) { # computes a list of errors for each single model + ensembles (for the median) # exclude - list of model/ensemble names to exclude error_list <- list() model_indices <- which(!(names(data) %in% c(c('dates', 'hours', 'actual'), exclude))) j <- 1 for (i in model_indices) { error_list[[j]] <- crps(data[[i]], data$actual, quants) j <- j + 1 } names(error_list) <- names(data)[model_indices] return(error_list) } ``` ```{r pairwise dm test} error_list_f <- function(data, quants=quantiles, exclude=c(), median=TRUE) { # computes a list of errors for each single model + ensembles (for the median) # exclude - list of model/ensemble names to exclude error_list <- list() model_indices <- which(!(names(data) %in% c(c('dates', 'hours', 'actual'), exclude))) m_ind <- which(quants == 0.5) j <- 1 for (i in model_indices) { if (median) { # compute for median error_list[[j]] <- data[[i]][, m_ind] - data$actual } else { # compute for mean, approximated as mean of percentiles error_list[[j]] <- apply(data[[i]], 1, mean) - data$actual } j <- j + 1 } names(error_list) <- names(data)[model_indices] return(error_list) } pairwise_dm_hourly <- function(crps_list) { # returns the results of a Diebold-Mariano test on each pair of forecasts n_f <- length(crps_list) dm_matrix <- matrix(ncol=n_f, nrow=n_f) # matrix for results dimnames(dm_matrix) <- list(names(crps_list), names(crps_list)) for (i in 1:n_f) { for (j in 1:n_f) { if (i != j) { # do not compare identical forecasts crps_mat_1 <- matrix(crps_list[[i]], nrow=24) crps_mat_2 <- matrix(crps_list[[j]], nrow=24) dm_matrix[i, j] <- dm.test(apply(crps_mat_1, 2, mean), apply(crps_mat_2, 2, mean), alternative='g', power=1)$p.value } else { dm_matrix[i, j] <- NA } } } return(dm_matrix) } ``` # Plotting ```{r plot pinball function} pinball_plot <- function(quants, pinball_list) { data_df <- data.frame(pinball_list, q = quants) %>% pivot_longer(-q, names_to = "model", values_to = "score") ggplot(data_df) + geom_line(aes(q, score, color = model)) + theme_bw() } pinball_plot_3 <- function(quants, pinball_list) { #plot pinball scores per quantile with multiple single models best_model <- which.min(aggregated_pinball(pinball_list)) pinball_list <- lapply(pinball_list, function(vect) {vect/pinball_list[[best_model]]}) data_df <- data.frame(pinball_list, q = quants) %>% pivot_longer(-q, names_to = "model", values_to = "score") %>% mutate(color = ifelse(substr(model, 1, 5) == "model", "single", model)) ggplot(data_df) + geom_line(aes(q, score, group = model, color = color)) + theme_bw() } ``` ```{r} dm_test_heatmap <- function(dm_matrix, crps_list, sorted_names) { #visualize the forecast distances via heatmap data <- dm_matrix # create data frame data_df <- data.frame(data) data_df$model1 <- dimnames(dm_matrix)[[1]] data_df <- pivot_longer(data_df, -model1, names_to = "model2", values_to = "p_value") data_df$text_p_value <- ifelse(is.na(data_df$p_value), ' ', sprintf("%.3f", data_df$p_value)) data_df$p_value <- ifelse(is.na(data_df$p_value), 1, data_df$p_value) #create main heatmap hmap <- ggplot(data_df) + geom_tile(aes(model2, model1, fill = p_value)) + geom_text(aes(model2, model1, label = text_p_value), colour = "white", check_overlap = TRUE, size = 2.5) + scale_fill_gradientn(colors=c('green4', 'darkgreen', 'firebrick3', 'firebrick4'), limits = c(0, 0.1), na.value='black', name='p-value', values=c(0, 0.5, 0.50000001, 1)) + # ggtitle('DM test p-value: is model 2 better than model 1') + xlab('Model 1') + ylab('Model 2') + scale_y_discrete(limits=sorted_names[length(sorted_names):1]) + scale_x_discrete(limits=sorted_names) + theme_bw() + theme(axis.text.x = element_text(hjust=1, angle=45)) return(hmap) } ``` ```{r plot point errors} plot_errors <- function(error_list_med, error_list_mean, sorted_names) { # function for plotting RMSE and MAE errors for each strategy, ordered by MAE data_df <- data.frame(names = names(error_list), MAE = sapply(error_list_med, function(vect) {round(mean(abs(vect)), 3)}), RMSE = sapply(error_list_mean, function(vect) {round(sqrt(mean(vect^2)), 3)})) data_df_longer <- pivot_longer(data_df, -names, names_to='error', values_to='value') p <- ggplot(data_df_longer) + geom_col(aes(value, names, fill=error), position='dodge') + geom_text(data=data_df, aes(y=names, x=MAE, label=round(MAE, 3)), size=2.5, nudge_x=0.75, nudge_y=-0.3) + geom_text(data=data_df, aes(y=names, x=RMSE, label=round(RMSE, 3)), size=2.5, nudge_x=0.75, nudge_y=0.2) + theme_bw() + scale_y_discrete(limits=sorted_names) + scale_x_continuous(limits=c(0, 9)) + scale_fill_discrete(type=c('#337B7B', 'indianred3')) + theme(plot.margin = margin(0.1, 0, 0.1, 0, 'in'), axis.text.y = element_blank()) + ylab(NULL) + xlab('Error') + guides(fill=guide_legend('Error')) return(p) } plot_crps <- function(pinball_list, sorted_names) { # function for plotting CRPS score for each strategy, ordered data_df <- data.frame(names = names(pinball_list), CRPS = as.numeric(aggregated_pinball(pinball_list))) p <- ggplot(data_df) + geom_col(aes(CRPS, names, fill='CRPS'), fill='#337B7B') + annotate('text', y=data_df$names, x=data_df$CRPS + 0.2, label=round(data_df$CRPS, 3), size=2.5) + theme_bw() + scale_y_discrete(limits=sorted_names) + scale_x_continuous(limits=c(0, 2)) + # theme(axis.text.x = element_text(hjust=1, angle=45), # plot.margin = unit(c(0.1, 0.1, 0.1, 0.5), 'in'), # legend.position='none') + theme(legend.position='none', plot.margin = margin(0.1, 0.05, 0.1, 0.01, 'in')) + ylab('Model') + xlab('CRPS score') return(p) } ``` # Computing the averages ```{r crps score experiment} quantiles <- (1:99)/100 initial_crps <- aggregated_pinball(pinball_list_f(epex_nn)) top_models <- which(initial_crps %in% head(sort(as.numeric(initial_crps)), 5)) ``` ```{r errors of simple averaging} epex_nn$DDNN_N_qEns <- simple_average(epex_nn, indices=c(9:12)) epex_nn$DDNN_JSU_qEns <- simple_average(epex_nn, indices=c(5:8)) epex_nn$DDNN_N_qEns_LEAR <- simple_average(epex_nn, indices=c(1:2, 9:12)) epex_nn$DDNN_JSU_qEns_LEAR <- simple_average(epex_nn, indices=c(1:2, 5:8)) epex_nn$DDNN_N_qEns_DNN <- simple_average(epex_nn, indices=c(3:4, 9:12)) epex_nn$DDNN_JSU_qEns_DNN <- simple_average(epex_nn, indices=c(3:4, 5:8)) ``` ```{r errors of crps averaging} epex_nn$DDNN_N_CRPS <- crps_averaging_hourly(epex_nn, quantiles, indices=c(9:12))$predictions epex_nn$DDNN_JSU_CRPS <- crps_averaging_hourly(epex_nn, quantiles, indices=c(5:8))$predictions epex_nn$DDNN_N_CRPS_LEAR <- crps_averaging_hourly(epex_nn, quantiles, indices=c(1:2, 9:12))$predictions epex_nn$DDNN_JSU_CRPS_LEAR <- crps_averaging_hourly(epex_nn, quantiles, indices=c(1:2, 5:8))$predictions epex_nn$DDNN_N_CRPS_DNN <- crps_averaging_hourly(epex_nn, quantiles, indices=c(3:4, 9:12))$predictions epex_nn$DDNN_JSU_CRPS_DNN <- crps_averaging_hourly(epex_nn, quantiles, indices=c(3:4, 5:8))$predictions ``` ```{r crps score} # compute CRPS score for all forecasts aggregated_pinball(pinball_list_f(epex_nn)) ``` ```{r dm results} crps_list <- crps_list_f(epex_nn, exclude = names(epex_nn)[1:12]) data_df <- data.frame(names = names(aggregated_pinball(pinball_list_f(epex_nn))), CRPS = as.numeric(aggregated_pinball(pinball_list_f(epex_nn)))) sorted_names <- data_df %>% arrange(desc(CRPS)) %>% select(names) sorted_names <- sorted_names$names dm_results_mae <- pairwise_dm_hourly(crps_list) dm_test_heatmap(dm_results_mae, crps_list, sorted_names[13:length(sorted_names)]) if (save) { ggsave('dm_hourly_scores_big.eps',device=cairo_ps, width=7, height=4, units='in') } ``` ```{r point forecast results} error_list <- error_list_f(epex_nn) print(lapply(error_list, function(vect) {c('MAE: ', round(mean(abs(vect)), 3), ' RMSE: ', round(sqrt(mean(vect^2)), 3))})) ``` ```{r plot errors and CRPS} pinball_list <- pinball_list_f(epex_nn) error_list_med <- error_list_f(epex_nn) error_list_mean <- error_list_f(epex_nn, median=FALSE) point_error_plot <- plot_errors(error_list_med, error_list_mean, sorted_names) point_error_plot crps_plot <- plot_crps(pinball_list, sorted_names) crps_plot double_plot <- plot_grid(crps_plot, point_error_plot, align='h', rel_widths = c(1.2, 1)) double_plot if (save) { ggsave('error_plot.eps', plot=double_plot, device=cairo_ps, width=8, height=5, units='in') } ``` # Economic evaluation - battery ```{r} find_optimal_hours <- function(daily_medians, battery_state) { # a helper function to return optimal trading hours from a vector of medians # h1 - buy time # h2 - sell time # h3 - extra unlimited bid/offer time if (battery_state == 0) { # binary decision variables: h_buy_1:24, h_sell_1:24, h_*_1:24 coefs <- c(-1/0.9 * daily_medians, 0.9 * daily_medians, -1/0.9 * daily_medians) # constraints: one of each set can equal 1 constr_mat <- rbind(c(rep(1, 24), rep(0, 24), rep(0, 24)), c(rep(0, 24), rep(1, 24), rep(0, 24)), c(rep(0, 24), rep(0, 24), rep(1, 24))) constr_rhs <- c(1, 1, 1) # direction of constraints constr_dir <- c('==', '==', '==') # extra constraint: h* < h2 for (i in 1:24) { # h* < h2 constraint c_rel <- numeric(72) if (i > 1) {c_rel[25:(24 + i - 1)] <- 1} # h2 c_rel[48 + i] <- 1 # -h* rhs_rel <- 1 dir_rel <- '<=' # sums go the other way, i.e. h* can't =1 before h2 # add constraints constr_mat <- rbind(constr_mat, c_rel) constr_rhs <- c(constr_rhs, rhs_rel) constr_dir <- c(constr_dir, dir_rel) } # solve the LP optimum <- Rglpk_solve_LP(obj = coefs, mat = constr_mat, dir = constr_dir, rhs = constr_rhs, types = 'B', max = TRUE) h1 <- which.max(optimum$solution[1:24]) h2 <- which.max(optimum$solution[25:48]) h3 <- which.max(optimum$solution[49:72]) } else if (battery_state == 2) { # binary decision variables: h_buy_1:24, h_sell_1:24, h_*_1:24 coefs <- c(-1/0.9 * daily_medians, 0.9 * daily_medians, 0.9 * daily_medians) # constraints: one of each set can equal 1 constr_mat <- rbind(c(rep(1, 24), rep(0, 24), rep(0, 24)), c(rep(0, 24), rep(1, 24), rep(0, 24)), c(rep(0, 24), rep(0, 24), rep(1, 24))) constr_rhs <- c(1, 1, 1) # direction of constraints constr_dir <- c('==', '==', '==') # extra constraint: h* < h1 for (i in 1:24) { # h* < h1 constraint c_rel <- numeric(72) if (i > 1) {c_rel[1:(i - 1)] <- 1} # h1 c_rel[48 + i] <- 1 # -h* rhs_rel <- 1 dir_rel <- '<=' # sums go the other way, i.e. h* can't =1 before h2 # add constraints constr_mat <- rbind(constr_mat, c_rel) constr_rhs <- c(constr_rhs, rhs_rel) constr_dir <- c(constr_dir, dir_rel) } # solve the LP optimum <- Rglpk_solve_LP(obj = coefs, mat = constr_mat, dir = constr_dir, rhs = constr_rhs, types = 'B', max = TRUE) h1 <- which.max(optimum$solution[1:24]) h2 <- which.max(optimum$solution[25:48]) h3 <- which.max(optimum$solution[49:72]) } else { # no extra transaction h2 <- which.max(daily_medians) h1 <- which.min(daily_medians) h3 <- 0 } return(c(round(h1 - 1), round(h2 - 1), round(h3 - 1))) } trade_strategy <- function(predictions, actual, risk_appetite, battery_capacity = 2, efficiency = 0.9) { # results of trading with a battery basing on a single forecast with price predictions predictions # returns a list of two vectors: income (daily incomes) and trade (number of trades) T <- length(actual)/24 # Observations D <- 24 # Variables (hours) income <- numeric(T) trades <- data.frame(buy = numeric(T), sell = numeric(T)) battery_state <- 1 # assuming we start with 1 MWh charged q <- round((100 - risk_appetite * 100)/2) add <- mean(actual) add <- 0 for (t in 1:T) { # iterate over 554 forecasted days # find relevant data indices h0_ind <- (t - 1)*24 + 1 median_predictions <- predictions[h0_ind:(h0_ind + 23), 50] # find optimal trading hours opt_hours <- find_optimal_hours(median_predictions, battery_state) # plan bids buy_hour <- h0_ind + opt_hours[1] sell_hour <- h0_ind + opt_hours[2] balance_hour <- h0_ind + opt_hours[3] buy_order_limit <- predictions[buy_hour, (100 - q)] # quantile 1 - q of price at day d hour h1 sell_order_limit <- predictions[sell_hour, q] # quantile q of price at day d hour h2 buy_real <- actual[buy_hour] sell_real <- actual[sell_hour] balance_real <- actual[balance_hour] # compute income entry <- (efficiency * sell_order_limit - (1 / efficiency) * buy_order_limit > 0) if (battery_state == 0) { if (buy_real <= buy_order_limit && entry) { if (sell_real >= sell_order_limit) { # buy and sell and balance income[t] <- efficiency * (sell_real) - (1 / efficiency) * (buy_real + balance_real) + add trades$buy[t] <- 2 trades$sell[t] <- 1 battery_state <- 1 } else { # buy and balance income[t] <- - (1 / efficiency) * (buy_real + balance_real) + 2 * add trades$buy[t] <- 2 battery_state <- 2 } } else { if (sell_real >= sell_order_limit && entry) { # sell and balance income[t] <- efficiency * (sell_real) - (1 / efficiency) * (balance_real) trades$buy[t] <- 1 trades$sell[t] <- 1 battery_state <- 0 } else { # only balance income[t] <- - (1 / efficiency) * (balance_real) + add trades$buy[t] <- 1 battery_state <- 1 } } # battery state = 0 } else if (battery_state == 1) { if (buy_real <= buy_order_limit && entry) { if (sell_real >= sell_order_limit) { # buy and sell income[t] <- efficiency * (sell_real) - (1 / efficiency) * (buy_real) trades$buy[t] <- 1 trades$sell[t] <- 1 battery_state <- 1 } else { # only buy income[t] <- - (1 / efficiency) * (buy_real) + add trades$buy[t] <- 1 battery_state <- 2 } } else { if (sell_real >= sell_order_limit && entry) { # only sell income[t] <- efficiency * (sell_real) - add trades$sell[t] <- 1 battery_state <- 0 } else { # no action battery_state <- 1 } } # battery state = 1 } else if (battery_state == 2) { if (buy_real <= buy_order_limit && entry) { if (sell_real >= sell_order_limit) { # buy and sell and balance income[t] <- efficiency * (sell_real + balance_real) - (1 / efficiency) * (buy_real) - add trades$buy[t] <- 1 trades$sell[t] <- 2 battery_state <- 1 } else { # buy and balance income[t] <- efficiency * (balance_real) - (1 / efficiency) * (buy_real) trades$buy[t] <- 1 trades$sell[t] <- 1 battery_state <- 2 } } else { if (sell_real >= sell_order_limit && entry) { # sell and balance income[t] <- efficiency * (sell_real + balance_real) - 2 * add trades$sell[t] <- 2 battery_state <- 0 } else { # only balance income[t] <- efficiency * (balance_real) - add trades$sell[t] <- 1 battery_state <- 1 } } # battery state = 2 } } # t return(list(income = income, trades = trades)) } ``` ```{r income eval function} incomes_evaluation <- function(data, risk_appetites, battery_capacity = 2, efficiency = 0.9, exclude=c()) { # computes a list of total and per trade incomes for each strategy and each risk appetite # exclude - list of model/ensemble names to exclude model_indices <- which(!(names(data) %in% c(c('dates', 'hours', 'actual'), exclude))) n_models <- length(model_indices) n_alphas <- length(risk_appetites) total_income_df <- matrix(nrow = n_models, ncol = length(risk_appetites)) income_per_trade_df <- matrix(nrow = n_models, ncol = length(risk_appetites)) var_df <- matrix(nrow = n_models, ncol = length(risk_appetites)) k <- 1 for (i in model_indices) { for (j in 1:n_alphas) { trade_results <- trade_strategy(data[[i]], data$actual, risk_appetites[j], battery_capacity, efficiency) total_income_df[k, j] <- sum(trade_results$income) income_per_trade_df[k, j] <- sum(trade_results$income)/sum(trade_results$trades) var_df[k, j] <- quantile(trade_results$income[apply(trade_results$trades, 1, sum) > 0], 0.05) } k <- k + 1 } dimnames(total_income_df) <- list(names(data)[model_indices], risk_appetites) dimnames(income_per_trade_df) <- list(names(data)[model_indices], risk_appetites) dimnames(var_df) <- list(names(data)[model_indices], risk_appetites) return(list(total_income = total_income_df, income_per_trade = income_per_trade_df, var = var_df)) } ``` ```{r} # assumptions battery_capacity <- 2 # capacity in MWh efficiency <- 0.9 # charging/discharging efficiency risk_appetites <- c(0.9, 0.8, 0.7, 0.6, 0.5) incomes_eval <- incomes_evaluation(epex_nn, risk_appetites, exclude=names(epex_nn)[1:12]) print(incomes_eval$total_income) print(incomes_eval$income_per_trade) print(incomes_eval$var) ``` ```{r} income_heatmap <- function(income_matrix, title='Income', sorted_names) { #visualize the incomes via heatmap # create data frame data_df <- data.frame(income_matrix) data_df$model <- dimnames(income_matrix)[[1]] data_df <- pivot_longer(data_df, -model, names_to = "risk_appetite", values_to = "income") data_df$risk_appetite <- substr(data_df$risk_appetite, 2, 4) data_df <- data_df %>% group_by(risk_appetite) %>% mutate(rank=rank(income)) # find best values best_values <- apply(income_matrix, 2, max) bold_text <- ifelse(data_df$income %in% best_values, 'bold', 'plain') # adjust labels label_format <- ifelse(max(data_df$income) > 100, "%.0f", "%.2f") #create main heatmap hmap <- ggplot(data_df) + geom_tile(aes(risk_appetite, model, fill = rank)) + geom_text(aes(risk_appetite, model, label = sprintf(label_format, income), fontface=bold_text), colour = "black", check_overlap = TRUE, size = 3) + scale_fill_gradient2(high='#3EB96C', mid='#FFBF79', low='coral1', limits = c(min(data_df$rank), max(data_df$rank)), midpoint=(min(data_df$rank) + max(data_df$rank))/2) + scale_y_discrete(limits=sorted_names) + scale_x_discrete(position = "top") + theme_bw() + xlab('Risk appetite') + ylab('Model') + # guides(fill=guide_colorbar(title)) theme(legend.position='none') return(hmap) } plot_income_total <- income_heatmap(incomes_eval$total_income, 'Total income [EUR]', sorted_names[24:13]) plot_income_total if (save) { ggsave('plot_income_total.eps', device=cairo_ps, width=7, height=2.5, units='in') } plot_income_per_trade <- income_heatmap(incomes_eval$income_per_trade, 'Income per trade [EUR]', sorted_names[24:13]) plot_income_per_trade if (save) { ggsave('plot_income_per_trade.eps', device=cairo_ps, width=7, height=2.5, units='in') } plot_var <- income_heatmap(incomes_eval$var, 'Value at risk [EUR]', sorted_names[24:13]) plot_var if (save) { ggsave('plot_var.eps', device=cairo_ps, width=7, height=2.5, units='in') } ``` # Aditional illustrations ```{r strategy illustration} prices <- matrix(epex_nn$actual, nrow=24) selected_index <- which.max(apply(prices, 2, max) - apply(prices, 2, min)) - 2 h0_ind <- (selected_index - 1)*24 + 1 hours <- 1:24 daily_prices <- prices[, selected_index] q <- 10 low_quantile <- epex_nn$DDNN_N_qEns[h0_ind:(h0_ind + 23), q] med_quantile <- epex_nn$DDNN_N_qEns[h0_ind:(h0_ind + 23), 50] high_quantile <- epex_nn$DDNN_N_qEns[h0_ind:(h0_ind + 23), 100-q] min_hour <- which.min(med_quantile) max_hour <- which.max(med_quantile) ggplot(data.frame(hour=hours, price=daily_prices, lower_limit=low_quantile, upper_limit=high_quantile, median=med_quantile)) + geom_ribbon(aes(hour, ymin=lower_limit, ymax=upper_limit, fill='PI'), alpha=0.5) + geom_line(aes(hour, price, linetype='Actual price'), color='#337B7B', linewidth=0.5) + geom_line(aes(hour, median, linetype='Median forecast'), color='#337B7B', linewidth=0.5) + annotate('point', x=c(min_hour, max_hour), y=c(high_quantile[min_hour], low_quantile[max_hour]), color='indianred3', size=2) + annotate('segment', x=c(min_hour, max_hour), y=c(high_quantile[min_hour], low_quantile[max_hour]), xend=c(min_hour, max_hour), yend=c(med_quantile[min_hour], med_quantile[max_hour]), color='indianred3', linewidth=0.5, linetype='dashed') + annotate('text', x=c(min_hour, max_hour), y=c(high_quantile[min_hour] + 5, low_quantile[max_hour] - 5), label=c(TeX('$\\hat{Y}_{d,h1}^{0.9}$', output='character'), TeX('$\\hat{Y}_{d,h2}^{0.1}$', output='character')), parse=TRUE, size=4) + theme_bw() + scale_x_continuous(limits=c(1, 24), breaks=c(1, 6, 12, 18, 24)) + scale_linetype_manual(name=NULL, breaks=c('Actual price', 'Median forecast'), values=c('solid', 'dashed'), guide = "legend") + scale_fill_manual(name=NULL, breaks=c('PI'), values=c('gray'), guide = "legend") + scale_color_manual(name=NULL, breaks=c('selected hours'), values=c('indianred3'), guide = "legend") + theme(legend.position = c(0.19, 0.84), legend.box.background = element_rect(fill = "white", color = "black", linewidth=0.3), legend.key.height = unit(1, 'lines'), legend.key.width = unit(1, 'lines'), legend.margin = margin(-13, 0.1, 0.1, 0.1), legend.title=element_blank(), legend.box.margin = margin(10, 2, 2, 2)) + xlab('Hour') + ylab('Price') if (save) { ggsave('plot_strategy.eps', device=cairo_ps, width=4, height=3, units='in') } ``` ```{r crystal ball strategy} max_income <- sum(0.9 * apply(prices, 2, max) - 1/0.9 * apply(prices, 2, min)) min_income <- sum(0.9 * apply(prices, 2, min) - 1/0.9 * apply(prices, 2, max)) print('Maximum income on the possible gains scale:') print(1 - (max_income - max(incomes_eval$total_income))/(max_income - min_income)) print('Minimum income on the possible gains scale:') print(1 - (max_income - min(incomes_eval$total_income))/(max_income - min_income)) ``` ```{r} incomes_in_time <- function(data, risk_appetite, battery_capacity = 2, efficiency = 0.9, exclude=c()) { # computes a list of total and per trade incomes for each strategy and each risk appetite # exclude - list of model/ensemble names to exclude model_indices <- which(!(names(data) %in% c(c('dates', 'hours', 'actual'), exclude))) n_models <- length(model_indices) total_income_df <- matrix(ncol = n_models, nrow = length(data$actual)/24) trade_df <- matrix(ncol = n_models, nrow = length(data$actual)/24) k <- 1 for (i in model_indices) { trade_results <- trade_strategy(data[[i]], data$actual, risk_appetite, battery_capacity, efficiency) total_income_df[, k] <- trade_results$income trade_df[, k] <- apply(trade_results$trades, 1, sum) k <- k + 1 } dimnames(total_income_df) <- list(c(), names(data)[model_indices]) dimnames(trade_df) <- list(c(), names(data)[model_indices]) return(list(total_income = total_income_df, trade = trade_df)) } income_time_df_06 <- incomes_in_time(epex_nn, 0.6, exclude=names(epex_nn)[1:12]) income_time_df_08 <- incomes_in_time(epex_nn, 0.8, exclude=names(epex_nn)[1:12]) ``` ```{r} color_pal <- c('indianred3', '#337B7B', '#CDA555', '#9ABF4F', '#97A5C6', '#415789') sel_models <- c(14, 16, 20, 22, 24) - 12 cum_data_df <- data.frame(apply(apply(income_time_df_08$total_income[, sel_models], 2, cumsum), 2, rollmean, k=30, fill='extend')) cum_data_df <- cum_data_df - cum_data_df$DDNN_JSU_CRPS_LEAR cum_data_df$dates <- epex_nn$dates[seq(1, length(epex_nn$dates), by=24)] cum_data_df <- cum_data_df %>% pivot_longer(-dates, names_to='model', values_to='income') plot_income_in_time_08 <- ggplot(cum_data_df) + geom_line(aes(dates, income, color=model)) + scale_color_discrete(type=color_pal) + theme_bw() + theme(axis.text.x = element_text(hjust=1, angle=30), plot.margin = unit(c(1, 5.5, 0, 5.5), 'points'), # plot.margin = unit(c(1, 158, 0, 5.5), 'points'), legend.position = 'none' ) + # theme(axis.text.x = element_blank()) + scale_x_date(breaks='2 months', date_labels='%Y-%m') + xlab('Date') + guides(color=guide_legend('Ensemble')) + # xlab('') + ylab('Δ profit [EUR]') plot_income_in_time_08 ``` ```{r} cum_data_df <- data.frame(apply(apply(income_time_df_06$total_income[, sel_models], 2, cumsum), 2, rollmean, k=30, fill='extend')) cum_data_df <- cum_data_df - cum_data_df$DDNN_JSU_CRPS_LEAR cum_data_df$dates <- epex_nn$dates[seq(1, length(epex_nn$dates), by=24)] cum_data_df <- cum_data_df %>% pivot_longer(-dates, names_to='model', values_to='income') plot_income_in_time_06 <- ggplot(cum_data_df) + geom_line(aes(dates, income, color=model)) + scale_color_discrete(type=color_pal) + theme_bw() + theme(axis.text.x = element_blank(), plot.margin = unit(c(1, 5.5, 0, 5.5), 'points'), # plot.margin = unit(c(1, 158, 0, 5.5), 'points'), #legend.position = 'none' legend.position = c(0.24, 0.19), legend.background = element_rect(fill = "white", color = "black", linewidth=0.1), legend.key.height = unit(0.6, 'lines'), legend.key.width = unit(1, 'lines'), legend.margin = margin(0, 0.05, 0.2, 0.05, 'lines'), legend.text = element_text(size=9, margin=margin(t=1, b=1)) ) + # theme(axis.text.x = element_blank()) + scale_x_date(breaks='2 months') + # xlab('Date') + guides(color=guide_legend(NULL, ncol=2)) + xlab('') + ylab('Δ profit [EUR]') plot_income_in_time_06 ``` ```{r plot actual} data_plot <- function(data) { # plot of the actual prices over time # prepare data dates <- data$dates[seq(1, length(data$dates), by=24)] prices <- apply(matrix(data$actual, nrow=24), 2, mean) min_prices <- apply(matrix(data$actual, nrow=24), 2, min) max_prices <- apply(matrix(data$actual, nrow=24), 2, max) # plot mean daily prices p <- ggplot(data.frame(date=dates, price=prices, min_price=min_prices, max_price=max_prices)) + geom_ribbon(aes(date, ymin=min_price, ymax=max_price, fill='Daily price spread'), alpha=0.4) + geom_line(aes(date, price, color='Average daily price')) + xlab('Date') + ylab('Price [EUR/MWh]') + theme_bw() + # theme(axis.text.x = element_text(hjust=1, angle=45)) + scale_x_date(breaks='2 months', date_labels='%Y-%m') + # scale_y_continuous(limits=c(min(min_prices), max(max_prices))) + coord_cartesian(ylim=c(min(min_prices), max(max_prices))) + scale_color_manual(name=NULL, breaks=c('Average daily price'), values=c('#337B7B')) + scale_fill_manual(name=NULL, breaks=c('Daily price spread'), values=c('#337B7B')) + theme(axis.text.x=element_blank(), # axis.text.x = element_text(hjust=1, angle=30), plot.margin = unit(c(1, 5.5, 0, 7.5), 'points'), # plot.margin=unit(c(5, 158, 0, 8), 'points'), legend.position = c(0.11, 0.84), legend.box.background = element_rect(fill = "white", color = "black", linewidth=0.3), legend.key.height = unit(1, 'lines'), legend.key.width = unit(1, 'lines'), legend.margin = margin(-13, 0.1, 0.1, 0.1), legend.title=element_blank(), legend.box.margin = margin(10, 2, 2, 2), legend.text = element_text(size=10) ) + xlab('') return(p) } data_plot_obj <- data_plot(epex_nn) data_plot_obj if (save) { ggsave('plot_data.eps', device=cairo_ps, width=8, height=2, units='in') } ``` ```{r stack income in time plots} double_plot <- plot_grid(data_plot_obj, plot_income_in_time_06, plot_income_in_time_08, rel_heights = c(1.4, 1, 1.25), rel_widths=c(1, 1), ncol=1) double_plot if (save) { ggsave('income_in_time_plot.eps', plot=double_plot, device=cairo_ps, width=8, height=6, units='in') } ``` ```{r plot scaled pinball modified explanation} pinball_plot_3 <- function(quants, pinball_list) { #plot pinball scores per quantile with multiple single models best_model <- which.min(aggregated_pinball(pinball_list)) pinball_list <- lapply(pinball_list, function(vect) {vect/pinball_list[[best_model]]}) data_df <- data.frame(pinball_list, q = quants) %>% pivot_longer(-q, names_to = "model", values_to = "score") %>% mutate(color = ifelse(substr(model, 1, 5) == "model", "single", model)) ggplot(data_df) + geom_line(aes(q, score, group = model, color = color)) + scale_color_discrete(type=color_pal) + theme_bw() + xlab('Quantile') + ylab('Scaled pinball score') + guides(color=guide_legend('Ensemble')) + theme(legend.position = c(0.7, 0.3), legend.background = element_rect(fill = "white", color = "black"), legend.key.size = unit(0.8, 'lines')) } pinball_check2 <- pinball_plot_3(quantiles, pinball_list[sel_models + 12]) pinball_check2 if (save) { ggsave('pinball_check.eps', width=4, height=3, units='in') } ``` ```{r income and crps correlation} income_df <- data.frame(incomes_eval$total_income) income_df$CRPS <- sapply(crps_list, mean) income_df <- income_df %>% pivot_longer(-CRPS, names_to='risk_appetite', values_to='income') %>% mutate(risk_appetite=substr(risk_appetite, 2, 4)) %>% group_by(risk_appetite) %>% mutate(trend=lm(income~CRPS)$fitted.values, r2=summary(lm(income~CRPS))$r.squared) plot_income_CRPS <- ggplot(income_df) + geom_point(aes(CRPS, income, color=risk_appetite), size=2) + geom_line(aes(CRPS, trend, color=risk_appetite)) + scale_color_discrete(type=color_pal) + theme_bw() + theme( # axis.text.x = element_blank(), plot.margin = unit(c(1, 5.5, 0, 5.5), 'points'), # plot.margin = unit(c(1, 158, 0, 5.5), 'points'), #legend.position = 'none' legend.position = c(0.15, 0.2), legend.background = element_rect(fill = "white", color = "black", linewidth=0.1), legend.key.height = unit(0.6, 'lines'), legend.key.width = unit(1, 'lines'), legend.margin = margin(0, 0.05, 0.2, 0.05, 'lines'), legend.text = element_text(size=9, margin=margin(t=1, b=1)) ) + # theme(axis.text.x = element_blank()) + # scale_x_date(breaks='2 months') + xlab('CRPS') + guides(color=guide_legend("Risk appetite")) + ylab('Total income [EUR]') plot_income_CRPS ggsave('test_income.pdf', width=4, height=3, units='in') print(income_df %>% group_by(risk_appetite) %>% summarize(r2[1])) ``` ```{r forecast combination illustration} selected_hour <- 4 chosen_h <- h0_ind + selected_hour - 1 forecast_1 <- epex_nn$LEAR_QRA[chosen_h, ] forecast_2 <- epex_nn$DDNN_JSU_1[chosen_h, ] actual_price <- epex_nn$actual[chosen_h] # plot of two initial forecasts p_raw1 <- ggplot(data.frame(quantile=quantiles, LEAR_QRA=forecast_1, DDNN_JSU_1=forecast_2)) + geom_line(aes(LEAR_QRA, quantile), color=color_pal[1]) + geom_line(aes(DDNN_JSU_1, quantile), color=color_pal[2]) + geom_vline(xintercept=actual_price, color=color_pal[4], linetype='dashed') + theme_bw() + scale_x_continuous(breaks=c(15, 20, 25, 30, 35, 40, 45), limits=c(15, 45)) + xlab('Price') + ylab('Probability') + ggtitle('Individual forecasts') + theme(plot.margin=unit(c(0, 7.5, 0, 0.5), 'points'), plot.title = element_text(size=12)) # plot of naive average p_comb1 <- ggplot(data.frame(quantile=quantiles, ensemble=(forecast_1 + forecast_2)/2)) + geom_line(aes(ensemble, quantile), color=color_pal[3]) + geom_vline(xintercept=actual_price, color=color_pal[4], linetype='dashed') + theme_bw() + scale_x_continuous(breaks=c(15, 20, 25, 30, 35, 40, 45), limits=c(15, 45)) + xlab('') + ylab('Probability') + theme(axis.text.x=element_blank(), plot.margin=unit(c(0, 2.5, 0, 0.5), 'points'), plot.title = element_text(size=12)) + ggtitle('Naive (equal weights)') # CRPS average crps_model <- crps_averaging_hourly(epex_nn, quantiles, indices=c(1, 5)) ensemble2 <- crps_model$predictions[chosen_h, ] # plot of CRPS average p_comb2 <- ggplot(data.frame(quantile=quantiles, ensemble=ensemble2)) + geom_line(aes(ensemble, quantile), color=color_pal[3]) + geom_vline(xintercept=actual_price, color=color_pal[4], linetype='dashed') + theme_bw() + scale_x_continuous(breaks=c(15, 20, 25, 30, 35, 40, 45), limits=c(15, 45)) + xlab('Price') + ylab('Probability') + ggtitle('CRPS learning') + theme(plot.margin=unit(c(0, 2.5, 0, 0.5), 'points'), plot.title = element_text(size=12)) p_raw1 # plots of weights naive_weights <- matrix(rep(0.5, 99*2), ncol=2) crps_weights <- crps_model$model$weights[selected_index, selected_hour, , ] naive_weights <- data.frame(naive_weights, quantile=quantiles) %>% pivot_longer(-quantile, names_to='model', values_to='weights') crps_weights <- data.frame(crps_weights, quantile=quantiles) %>% pivot_longer(-quantile, names_to='model', values_to='weights') p_weight1 <- ggplot(naive_weights) + geom_bar(aes(weights, quantile, fill=model), position='stack', orientation='y', stat='identity', width=1) + scale_fill_discrete(type=color_pal) + scale_x_continuous(breaks=c(0, 0.5, 1)) + theme_bw() + xlab('') +ylab('') + theme(legend.position='none', axis.text.y=element_blank(), axis.text.x=element_blank(), plot.margin=unit(c(18, 2.5, 0, 0.5), 'points')) p_weight2 <- ggplot(crps_weights) + geom_bar(aes(weights, quantile, fill=model), position='stack', orientation='y', stat='identity', width=1) + scale_fill_discrete(type=color_pal) + scale_x_continuous(breaks=c(0, 0.5, 1)) + theme_bw() + xlab('Weights') + ylab('') + theme(legend.position='none', axis.text.y=element_blank(), plot.margin=unit(c(18, 2.5, 0, 0.5), 'points')) right_side <- plot_grid(p_comb1, p_weight1, p_comb2, p_weight2, ncol=2, nrow=2, rel_widths = c(1, 0.5), rel_heights=c(1, 1.1)) plot_grid(p_raw1, right_side, ncol=2, rel_widths = c(0.45, 0.55)) if (save) { ggsave('forecast_combination.eps', device=cairo_ps, width=8, height=3.5, units='in') } ```