R: Fit Logistic Multivariate Regression Model with mactivate...

f_fit_gradient_logistic_01 {mactivate}

R Documentation

Fit Logistic Multivariate Regression Model with mactivate Using Gradient Descent

Description

Use simple gradient descent to locate logistic model parameters, i.e., primary effects, multiplicative effects, and activation parameters, W.

Usage

f_fit_gradient_logistic_01(
X, 
y, 
m_tot, 
U = NULL, 
m_start = 1, 
mact_control = f_control_mactivate(), 
verbosity = 2)

Arguments

`X`	Numerical matrix, `N` x `d` of model inputs. Do not include intercept term.
`y`	Integer vector of length `N`, elements in `{0, 1}`. Binomial model response, or output.
`m_tot`	Scalar non-negative integer. Total number of columns of activation layer, `W`, over which to fit.
`U`	Numerical matrix, `N` x `d_u` of model inputs to send to the activation layer, `W`. The default, `NULL`, instructs this function to simply use arg `X`.
`m_start`	Currently not used.
`mact_control`	Named list of class `control_mactivate_obj` as created by fun `f_control_mactivate` — fitting hyperparameters.
`verbosity`	Scalar integer.

Details

Please make sure to read Details in f_dmss_dW help page before using this function.

Value

An unnamed list of class mactivate_fit_gradient_logistic_01 of length m_tot + 1. Each node is a named list containing fitted parameter estimates. The first top-level node of this object contains parameter estimates when fitting ‘primary effects’ only (W has no columns), the second, parameter estimates for fitting with 1 column of W, and so on.

Examples


xxnow <- Sys.time()

library(mactivate)

set.seed(777)


d <- 4
N <- 2400

X <- matrix(rnorm(N*d, 0, 1), N, d) ####

colnames(X) <- paste0("x", I(1:d))

############# primary effects
b <- rep_len( c(-1/2, 1/2), d )


xxA <- (X[ , 1]+1/3) * (X[ , 2]-1/3)
xxB <- (X[ , 1]+0/3) * (X[ , 2]-0/3) * (X[ , 4]-1/3)


ystar <-
X %*% b +
2 * xxA -
1 * xxB

xs2 <- "y ~ . "

xtrue_formula <- eval(parse(text=xs2))

xnoint_formula <- eval(parse(text="y ~ . - xxA - xxB"))


ysigmoid <- 1 / (1 + exp(-ystar))

range(ysigmoid)

y <- rbinom(size=1, n=N ,prob=ysigmoid)


Nall <- N

cov(X)

yall <- y

Xall <- X

### Xall <- X + rnorm(prod(dim(X)), 0, 1/10000) ### add a little noise -- optional

sd(y)


dfx <- data.frame("y"=yall, Xall, xxA, xxB)

tail(dfx)



################### incorrectly fit LM: no interactions
xglm <- glm(xnoint_formula , data=dfx, family=binomial(link="logit"))
summary(xglm)
yhat <- predict(xglm, newdata=dfx, type="response")
mean( f_logit_cost(y=yall, yhat=yhat) )


####### known true
xglm <- glm(xtrue_formula , data=dfx, family=binomial(link="logit"))
summary(xglm)
yhat <- predict(xglm, newdata=dfx, type="response")
mean( f_logit_cost(y=yall, yhat=yhat) )



xxfoldNumber <- rep_len( 1:4, Nall )

ufolds <- sort(unique(xxfoldNumber))

######################

xthis_fold <- ufolds[ 1 ]

xndx_test <- which( xxfoldNumber %in% xthis_fold )

xndx_train <- setdiff( 1:Nall, xndx_test )

##################

X_train <- Xall[ xndx_train, , drop=FALSE ]

X_test <- Xall[ xndx_test, , drop=FALSE ]

y_train <- yall[ xndx_train ]

y_test <- yall[ xndx_test ]



###################

m_tot <- 4

xcmact_gradient <-
f_control_mactivate(
param_sensitivity = 10^11,
bool_free_w       = FALSE,
w0_seed           = 0.05,
#w_col_search      = "alternate",
w_col_search      = "one",
bool_headStart    = TRUE,
ss_stop           = 10^(-12), ### very small
escape_rate       = 1.02,
step_size         = 1,
Wadj              = 1/1,
force_tries       = 0,
lambda            = 1/1 #### does nothing here
)


Uall <- Xall


X_train <- Xall[ xndx_train, , drop=FALSE ]
y_train <- yall[ xndx_train ]


xxls_out <-
f_fit_gradient_logistic_01(
X = X_train,
y = y_train,
m_tot = m_tot,
U = X_train,
m_start = 1,
mact_control = xcmact_gradient,
verbosity = 0
)


######### check test error

U_test <- Xall[ xndx_test, , drop=FALSE ]
X_test <- Xall[ xndx_test, , drop=FALSE ]
y_test <- yall[ xndx_test ]


yhatTT <- matrix(NA, length(xndx_test), m_tot+1)

for(iimm in 0:m_tot) {
    yhat_fold <- predict(object=xxls_out, X0=X_test, U0=U_test, mcols=iimm )
    yhatTT[ , iimm + 1 ] <- yhat_fold[[ "p0hat" ]]
}

errs_by_m <- NULL
for(iimm in 1:ncol(yhatTT)) {
    yhatX <- yhatTT[ , iimm]
    errs_by_m[ iimm ] <- mean( f_logit_cost(y=y_test, yhat=yhatX) )
    cat(iimm, "::", errs_by_m[ iimm ])
}

##### plot test Logit vs m

plot(0:(length(errs_by_m)-1), errs_by_m, type="l", xlab="m", ylab="Logit Cost")





################## test off 'correct' model
xtrue_formula_use <- xtrue_formula



xglm <- glm(xnoint_formula , data=dfx[ xndx_train, ], family=binomial(link="logit"))
yhat <- predict(xglm, newdata=dfx[ xndx_test, ], type="response")
cat("\n\n", "No interaction model logit:", mean( f_logit_cost(y=y_test, yhat=yhat) ), "\n")


xglm <- glm(xtrue_formula_use , data=dfx[ xndx_train, ], family=binomial(link="logit"))
yhat <- predict(xglm, newdata=dfx[ xndx_test, ], type="response")
cat("\n\n", "'true' model logit:", mean( f_logit_cost(y=y_test, yhat=yhat) ) , "\n")


cat( "Runtime:", difftime(Sys.time(), xxnow, units="secs"), "\n" )

[Package mactivate version 0.6.6 Index]