nn_domain_score {viraldomain} | R Documentation |
Calculate the Neural Network model domain applicability score
Description
This function fits a Neural Network model to the provided data and computes a domain applicability score based on PCA distances.
Usage
nn_domain_score(
featured_col,
train_data,
nn_hyperparameters,
test_data,
threshold_value
)
Arguments
featured_col |
The name of the featured column in the training data. |
train_data |
The training data used to fit the Neural Network model. |
nn_hyperparameters |
A list of Neural Network hyperparameters, including hidden_units, penalty, and epochs. |
test_data |
The testing domain data used to calculate the domain applicability score. |
threshold_value |
The threshold value for domain applicability scoring. |
Value
A tibble with the domain applicability scores.
Examples
library(viraldomain)
library(dplyr)
# Set the seed for reproducibility
set.seed(1234)
# Create a tibble with the training data
data(viral)
# Number of imputations needed
num_imputations <- sum(viral$vl_2022 <= 40) # Count values below 40 cpm
# Impute unique values
imputed_values <- unique(rexp(num_imputations, rate = 1/13))
# Create a new tibble with mutated/imputed viral load
imputed_viral <- viral |>
mutate(imputed_vl_2022 = ifelse(vl_2022 <= 40, imputed_values, vl_2022),
log10_imputed_vl_2022 = log10(ifelse(vl_2022 <= 40, imputed_values, vl_2022)),
jittered_log10_imputed_vl_2022 = jitter(log10_imputed_vl_2022))
# Create a new tibble with mutated/imputed cd4 counts
imputed_viral <- imputed_viral |>
mutate(
jittered_cd_2022 = ifelse(
duplicated(cd_2022),
cd_2022 + sample(1:100, length(cd_2022), replace = TRUE),
cd_2022
)
)
# New data frame with mutated/imputed columns
imp_viral <- imputed_viral |>
select(jittered_cd_2022, jittered_log10_imputed_vl_2022) |>
scale() |>
as.data.frame()
# Set the seed for reproducibility
set.seed(1234)
# Create a tibble with the testing data
data(sero)
# Number of imputations needed
num_imputations <- sum(sero$vl_2022 <= 40) # Count values below 40 cpm
# Impute unique values
imputed_values <- unique(rexp(num_imputations, rate = 1/13))
# Create a new tibble with mutated/imputed viral load
imputed_sero <- sero |>
mutate(imputed_vl_2022 = ifelse(vl_2022 <= 40, imputed_values, vl_2022),
log10_imputed_vl_2022 = log10(ifelse(vl_2022 <= 40, imputed_values, vl_2022)),
jittered_log10_imputed_vl_2022 = jitter(log10_imputed_vl_2022))
# Create a new tibble with mutated/imputed cd
imputed_sero <- imputed_sero |>
mutate(
jittered_cd_2022 = ifelse(
duplicated(cd_2022),
cd_2022 + sample(1:100, length(cd_2022), replace = TRUE),
cd_2022
)
)
# New data frame with mutated/imputed columns
imp_sero <- imputed_sero |>
select(jittered_cd_2022, jittered_log10_imputed_vl_2022) |>
scale() |>
as.data.frame()
# Specify your function parameters
featured_col <- "jittered_cd_2022"
train_data <- imp_viral
nn_hyperparameters <- list(hidden_units = 1, penalty = 0.3746312, epochs = 480)
test_data <- imp_sero
threshold_value <- 0.99
# Call the function
nn_domain_score(featured_col, train_data, nn_hyperparameters, test_data, threshold_value)
[Package viraldomain version 0.0.3 Index]