R: Compare predicted and actual values for training and...

compare_predicted_vs_actual {tsLSTMx}

R Documentation

Compare predicted and actual values for training and validation sets

Description

This function compares the predicted and actual values for the training and validation sets and computes metrics.

Usage

compare_predicted_vs_actual(
  train_data,
  validation_data,
  y_train_pred,
  y_val_pred
)

Arguments

`train_data`	The training set data, including actual y values.
`validation_data`	The validation set data, including actual y values.
`y_train_pred`	Predicted y values for the training set.
`y_val_pred`	Predicted y values for the validation set.

Value

A list containing data frames with the comparison of actual vs. predicted values for training and validation sets, as well as metrics for the training and validation sets.

Examples


data <- data.frame(
  Date = as.Date(c("01-04-18", "02-04-18", "03-04-18", "04-04-18", "05-04-18",
                   "06-04-18", "07-04-18", "08-04-18", "09-04-18", "10-04-18",
                   "11-04-18", "12-04-18", "13-04-18", "14-04-18", "15-04-18",
                   "16-04-18", "17-04-18", "18-04-18", "19-04-18", "20-04-18"),
                 format = "%d-%m-%y"),
  A = c(0, 0, 4, 12, 20, 16, 16, 0, 12, 18, 12, 18, 18, 0, 0, 33, 31, 38, 76, 198)
)
check_and_format_data(data)
# Add a new column 'X' based on the values in the second column
data$X <- ifelse(data$A != 0, 1, 0)

result_embed <- embed_columns(data = data, n_lag = 2)
new_data <- result_embed$data_frame
embedded_colnames <- result_embed$column_names

result_split <- split_data(new_data = new_data, val_ratio = 0.1)
train_data <- result_split$train_data
validation_data <- result_split$validation_data
train_data <- result_split$train_data
validation_data <- result_split$validation_data
embedded_colnames <- result_embed$column_names
numeric_matrices <- convert_to_numeric_matrices(train_data = train_data,
                                                validation_data = validation_data,
                                                embedded_colnames = embedded_colnames)
X_train <- numeric_matrices$X_train
y_train <- numeric_matrices$y_train
X_val <- numeric_matrices$X_val
y_val <- numeric_matrices$y_val

#' initialize_tensorflow()

X_train <- numeric_matrices$X_train
X_val <- numeric_matrices$X_val
reshaped_data <- reshape_for_lstm(X_train = X_train, X_val = X_val)
X_train <- reshaped_data$X_train
X_val <- reshaped_data$X_val
X_train <- reshaped_data$X_train
y_train <- numeric_matrices$y_train
X_val <- reshaped_data$X_val
y_val <- numeric_matrices$y_val
tf <- reticulate::import("tensorflow")
tensors <- convert_to_tensors(X_train = X_train, y_train = y_train, X_val = X_val, y_val = y_val)
X_train <- tensors$X_train
y_train <- tensors$y_train
X_val <- tensors$X_val
y_val <- tensors$y_val
n_patience <- 50
early_stopping <- define_early_stopping(n_patience = n_patience)

X_train <- tensors$X_train
X_val <- tensors$X_val

y_train <- tensors$y_train
y_val <- tensors$y_val

embedded_colnames <- result_embed$column_names

# Define your custom loss function
custom_loss <- function(y_true, y_pred) {
  condition <- tf$math$equal(y_true, 0)
  loss <- tf$math$reduce_mean(tf$math$square(y_true - y_pred))  # Remove 'axis'
  loss <- tf$where(condition, tf$constant(0), loss)
  return(loss)
}

early_stopping <- define_early_stopping(n_patience = n_patience)

grid_search_results <- ts_lstm_x_tuning(
  X_train, y_train, X_val, y_val,
  embedded_colnames, custom_loss, early_stopping,
  n_lag = 2, # desired lag value
  lstm_units_list = c(32),
  learning_rate_list = c(0.001, 0.01),
  batch_size_list = c(32),
  dropout_list = c(0.2),
  l1_reg_list = c(0.001),
  l2_reg_list = c(0.001),
  n_iter = 10,
  n_verbose = 0 # or 1
)

results_df <- grid_search_results$results_df
all_histories <- grid_search_results$all_histories
lstm_models <- grid_search_results$lstm_models

# Find the row with the minimum val_loss_mae in results_df
min_val_loss_row <- results_df[which.min(results_df$val_loss_mae), ]

# Extract hyperparameters from the row
best_lstm_units <- min_val_loss_row$lstm_units
best_learning_rate <- min_val_loss_row$learning_rate
best_batch_size <- min_val_loss_row$batch_size
best_n_lag <- min_val_loss_row$n_lag
best_dropout <- min_val_loss_row$dropout
best_l1_reg <- min_val_loss_row$l1_reg
best_l2_reg <- min_val_loss_row$l2_reg

# Generate the lstm_model_name for the best model
best_model_name <- paste0("lstm_model_lu_", best_lstm_units, "_lr_", best_learning_rate,
                          "_bs_", best_batch_size, "_lag_", best_n_lag,
                          "_do_", best_dropout, "_l1_", best_l1_reg, "_l2_", best_l2_reg)

# Generate the history_name for the best model
best_history_name <- paste0("history_lu_", best_lstm_units, "_lr_", best_learning_rate,
                            "_bs_", best_batch_size, "_lag_", best_n_lag,
                            "_do_", best_dropout, "_l1_", best_l1_reg, "_l2_", best_l2_reg)

# Access the best model from lstm_models
best_model <- lstm_models[[best_model_name]]

best_model_details <- data.frame(min_val_loss_row)

colnames(best_model_details) <- colnames(results_df)

# Access the best model from lstm_models
best_history <- all_histories[[best_history_name]]

validation_loss_best <- best_model_on_validation(best_model, X_val, y_val)
predicted_values <- predict_y_values(best_model, X_train, X_val, train_data, validation_data)
y_train_pred <- predicted_values$y_train_pred
y_val_pred <- predicted_values$y_val_pred
comparison <- compare_predicted_vs_actual(train_data, validation_data, y_train_pred, y_val_pred)
compare_train <- comparison$compare_train
compare_val <- comparison$compare_val
metrics_train <- comparison$metrics_train
metrics_val <- comparison$metrics_val

[Package tsLSTMx version 0.1.0 Index]