LMTrainer {superml} | R Documentation |
Linear Models Trainer
Description
Trains regression, lasso, ridge model in R
Details
Trains linear models such as Logistic, Lasso or Ridge regression model. It is built on glmnet R package. This class provides fit, predict, cross valdidation functions.
Public fields
family
type of regression to perform, values can be "gaussian" ,"binomial", "multinomial","mgaussian"
weights
observation weights. Can be total counts if responses are proportion matrices. Default is 1 for each observation
alpha
The elasticnet mixing parameter, alpha=1 is the lasso penalty, alpha=0 the ridge penalty, alpha=NULL is simple regression
lambda
the number of lambda values - default is 100
standardize
normalise the features in the given data
standardize.response
normalise the dependent variable between 0 and 1, default = FALSE
model
internal use
cvmodel
internal use
Flag
internal use
is_lasso
internal use
iid_names
internal use
Methods
Public methods
Method new()
Usage
LMTrainer$new(family, weights, alpha, lambda, standardize.response)
Arguments
family
character, type of regression to perform, values can be "gaussian" ,"binomial", "multinomial","mgaussian"
weights
numeric, observation weights. Can be total counts if responses are proportion matrices. Default is 1 for each observation
alpha
integer, The elasticnet mixing parameter, alpha=1 is the lasso penalty, alpha=0 the ridge penalty, alpha=NULL is simple regression
lambda
integer, the number of lambda values - default is 100
standardize.response
logical, normalise the dependent variable between 0 and 1, default = FALSE
Details
Create a new 'LMTrainer' object.
Returns
A 'LMTrainer' object.
Examples
\dontrun{ LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data" housing <- read.table(LINK) names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS", "RAD","TAX","PTRATIO","B","LSTAT","MEDV") names(housing) <- names lf <- LMTrainer$new(family = 'gaussian', alpha=1) }
Method fit()
Usage
LMTrainer$fit(X, y)
Arguments
X
data.frame containing train featuers
y
character, name of target variable
Details
Fits the LMTrainer model on given data
Returns
NULL, train the model and saves internally
Examples
\dontrun{ LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data" housing <- read.table(LINK) names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS", "RAD","TAX","PTRATIO","B","LSTAT","MEDV") names(housing) <- names lf <- LMTrainer$new(family = 'gaussian', alpha=1) lf$fit(X = housing, y = 'MEDV') }
Method predict()
Usage
LMTrainer$predict(df, lambda = NULL)
Arguments
df
data.frame containing test features
lambda
integer, the number of lambda values - default is 100. By default it picks the best value from the model.
Details
Returns predictions for test data
Returns
vector, a vector containing predictions
Examples
\dontrun{ LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data" housing <- read.table(LINK) names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS", "RAD","TAX","PTRATIO","B","LSTAT","MEDV") names(housing) <- names lf <- LMTrainer$new(family = 'gaussian', alpha=1) lf$fit(X = housing, y = 'MEDV') predictions <- lf$cv_predict(df = housing) }
Method cv_model()
Usage
LMTrainer$cv_model(X, y, nfolds, parallel, type.measure = "deviance")
Arguments
X
data.frame containing test features
y
character, name of target variable
nfolds
integer, number of folds
parallel
logical, if do parallel computation. Default=FALSE
type.measure
character, evaluation metric type. Default = deviance
Details
Train regression model using cross validation
Returns
NULL, trains the model and saves it in memory
Examples
\dontrun{ LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data" housing <- read.table(LINK) names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS", "RAD","TAX","PTRATIO","B","LSTAT","MEDV") names(housing) <- names lf <- LMTrainer$new(family = 'gaussian', alpha=1) lf$cv_model(X = housing, y = 'MEDV', nfolds = 5, parallel = FALSE) }
Method cv_predict()
Usage
LMTrainer$cv_predict(df, lambda = NULL)
Arguments
df
data.frame containing test features
lambda
integer, the number of lambda values - default is 100. By default it picks the best value from the model.
Details
Get predictions from the cross validated regression model
Returns
vector a vector containing predicted values
Examples
\dontrun{ LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data" housing <- read.table(LINK) names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS", "RAD","TAX","PTRATIO","B","LSTAT","MEDV") names(housing) <- names lf <- LMTrainer$new(family = 'gaussian', alpha=1) lf$cv_model(X = housing, y = 'MEDV', nfolds = 5, parallel = FALSE) predictions <- lf$cv_predict(df = housing) }
Method get_importance()
Usage
LMTrainer$get_importance()
Details
Get feature importance using model coefficients
Returns
a matrix containing feature coefficients
Examples
\dontrun{ LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data" housing <- read.table(LINK) names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS", "RAD","TAX","PTRATIO","B","LSTAT","MEDV") names(housing) <- names lf <- LMTrainer$new(family = 'gaussian', alpha=1) lf$cv_model(X = housing, y = 'MEDV', nfolds = 5, parallel = FALSE) predictions <- lf$cv_predict(df = housing) coefs <- lf$get_importance() }
Method clone()
The objects of this class are cloneable with this method.
Usage
LMTrainer$clone(deep = FALSE)
Arguments
deep
Whether to make a deep clone.
Examples
## ------------------------------------------------
## Method `LMTrainer$new`
## ------------------------------------------------
## Not run:
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
"RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing) <- names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
## End(Not run)
## ------------------------------------------------
## Method `LMTrainer$fit`
## ------------------------------------------------
## Not run:
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
"RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing) <- names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$fit(X = housing, y = 'MEDV')
## End(Not run)
## ------------------------------------------------
## Method `LMTrainer$predict`
## ------------------------------------------------
## Not run:
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
"RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing) <- names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$fit(X = housing, y = 'MEDV')
predictions <- lf$cv_predict(df = housing)
## End(Not run)
## ------------------------------------------------
## Method `LMTrainer$cv_model`
## ------------------------------------------------
## Not run:
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
"RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing) <- names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$cv_model(X = housing, y = 'MEDV', nfolds = 5, parallel = FALSE)
## End(Not run)
## ------------------------------------------------
## Method `LMTrainer$cv_predict`
## ------------------------------------------------
## Not run:
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
"RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing) <- names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$cv_model(X = housing, y = 'MEDV', nfolds = 5, parallel = FALSE)
predictions <- lf$cv_predict(df = housing)
## End(Not run)
## ------------------------------------------------
## Method `LMTrainer$get_importance`
## ------------------------------------------------
## Not run:
LINK <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
housing <- read.table(LINK)
names <- c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE","DIS",
"RAD","TAX","PTRATIO","B","LSTAT","MEDV")
names(housing) <- names
lf <- LMTrainer$new(family = 'gaussian', alpha=1)
lf$cv_model(X = housing, y = 'MEDV', nfolds = 5, parallel = FALSE)
predictions <- lf$cv_predict(df = housing)
coefs <- lf$get_importance()
## End(Not run)