f_fit_gradient_logistic_01 {mactivate}R Documentation

Fit Logistic Multivariate Regression Model with mactivate Using Gradient Descent

Description

Use simple gradient descent to locate logistic model parameters, i.e., primary effects, multiplicative effects, and activation parameters, W.

Usage

f_fit_gradient_logistic_01(
X, 
y, 
m_tot, 
U = NULL, 
m_start = 1, 
mact_control = f_control_mactivate(), 
verbosity = 2)

Arguments

X

Numerical matrix, N x d of model inputs. Do not include intercept term.

y

Integer vector of length N, elements in {0, 1}. Binomial model response, or output.

m_tot

Scalar non-negative integer. Total number of columns of activation layer, W, over which to fit.

U

Numerical matrix, N x d_u of model inputs to send to the activation layer, W. The default, NULL, instructs this function to simply use arg X.

m_start

Currently not used.

mact_control

Named list of class control_mactivate_obj as created by fun f_control_mactivate — fitting hyperparameters.

verbosity

Scalar integer.

Details

Please make sure to read Details in f_dmss_dW help page before using this function.

Value

An unnamed list of class mactivate_fit_gradient_logistic_01 of length m_tot + 1. Each node is a named list containing fitted parameter estimates. The first top-level node of this object contains parameter estimates when fitting ‘primary effects’ only (W has no columns), the second, parameter estimates for fitting with 1 column of W, and so on.

See Also

See f_fit_gradient_01 or f_fit_gradient_logistic_01 for MLR data (numerical response).

Examples


xxnow <- Sys.time()

library(mactivate)

set.seed(777)


d <- 4
N <- 2400

X <- matrix(rnorm(N*d, 0, 1), N, d) ####

colnames(X) <- paste0("x", I(1:d))

############# primary effects
b <- rep_len( c(-1/2, 1/2), d )


xxA <- (X[ , 1]+1/3) * (X[ , 2]-1/3)
xxB <- (X[ , 1]+0/3) * (X[ , 2]-0/3) * (X[ , 4]-1/3)


ystar <-
X %*% b +
2 * xxA -
1 * xxB

xs2 <- "y ~ . "

xtrue_formula <- eval(parse(text=xs2))

xnoint_formula <- eval(parse(text="y ~ . - xxA - xxB"))


ysigmoid <- 1 / (1 + exp(-ystar))

range(ysigmoid)

y <- rbinom(size=1, n=N ,prob=ysigmoid)


Nall <- N

cov(X)

yall <- y

Xall <- X

### Xall <- X + rnorm(prod(dim(X)), 0, 1/10000) ### add a little noise -- optional

sd(y)


dfx <- data.frame("y"=yall, Xall, xxA, xxB)

tail(dfx)



################### incorrectly fit LM: no interactions
xglm <- glm(xnoint_formula , data=dfx, family=binomial(link="logit"))
summary(xglm)
yhat <- predict(xglm, newdata=dfx, type="response")
mean( f_logit_cost(y=yall, yhat=yhat) )


####### known true
xglm <- glm(xtrue_formula , data=dfx, family=binomial(link="logit"))
summary(xglm)
yhat <- predict(xglm, newdata=dfx, type="response")
mean( f_logit_cost(y=yall, yhat=yhat) )



xxfoldNumber <- rep_len( 1:4, Nall )

ufolds <- sort(unique(xxfoldNumber))

######################

xthis_fold <- ufolds[ 1 ]

xndx_test <- which( xxfoldNumber %in% xthis_fold )

xndx_train <- setdiff( 1:Nall, xndx_test )

##################

X_train <- Xall[ xndx_train, , drop=FALSE ]

X_test <- Xall[ xndx_test, , drop=FALSE ]

y_train <- yall[ xndx_train ]

y_test <- yall[ xndx_test ]



###################

m_tot <- 4

xcmact_gradient <-
f_control_mactivate(
param_sensitivity = 10^11,
bool_free_w       = FALSE,
w0_seed           = 0.05,
#w_col_search      = "alternate",
w_col_search      = "one",
bool_headStart    = TRUE,
ss_stop           = 10^(-12), ### very small
escape_rate       = 1.02,
step_size         = 1,
Wadj              = 1/1,
force_tries       = 0,
lambda            = 1/1 #### does nothing here
)


Uall <- Xall


X_train <- Xall[ xndx_train, , drop=FALSE ]
y_train <- yall[ xndx_train ]


xxls_out <-
f_fit_gradient_logistic_01(
X = X_train,
y = y_train,
m_tot = m_tot,
U = X_train,
m_start = 1,
mact_control = xcmact_gradient,
verbosity = 0
)


######### check test error

U_test <- Xall[ xndx_test, , drop=FALSE ]
X_test <- Xall[ xndx_test, , drop=FALSE ]
y_test <- yall[ xndx_test ]


yhatTT <- matrix(NA, length(xndx_test), m_tot+1)

for(iimm in 0:m_tot) {
    yhat_fold <- predict(object=xxls_out, X0=X_test, U0=U_test, mcols=iimm )
    yhatTT[ , iimm + 1 ] <- yhat_fold[[ "p0hat" ]]
}

errs_by_m <- NULL
for(iimm in 1:ncol(yhatTT)) {
    yhatX <- yhatTT[ , iimm]
    errs_by_m[ iimm ] <- mean( f_logit_cost(y=y_test, yhat=yhatX) )
    cat(iimm, "::", errs_by_m[ iimm ])
}

##### plot test Logit vs m

plot(0:(length(errs_by_m)-1), errs_by_m, type="l", xlab="m", ylab="Logit Cost")





################## test off 'correct' model
xtrue_formula_use <- xtrue_formula



xglm <- glm(xnoint_formula , data=dfx[ xndx_train, ], family=binomial(link="logit"))
yhat <- predict(xglm, newdata=dfx[ xndx_test, ], type="response")
cat("\n\n", "No interaction model logit:", mean( f_logit_cost(y=y_test, yhat=yhat) ), "\n")


xglm <- glm(xtrue_formula_use , data=dfx[ xndx_train, ], family=binomial(link="logit"))
yhat <- predict(xglm, newdata=dfx[ xndx_test, ], type="response")
cat("\n\n", "'true' model logit:", mean( f_logit_cost(y=y_test, yhat=yhat) ) , "\n")


cat( "Runtime:", difftime(Sys.time(), xxnow, units="secs"), "\n" )


[Package mactivate version 0.6.6 Index]