MEPS14 {glmMisrep} | R Documentation |
MEPS 2014 Full Year Consolidated Data File
Description
MEPS14 is a subset of the MEPS 2014 Full Year Consolidated Data File, as described in Xia et. al., (2023).
Usage
data("MEPS14")
Format
A data frame with 13,301 observations on the following 7 variables:
TOTEXP14
total medical expenditure.
OBTOTV14
total number of office-based visits.
UNINS14
uninsured status (1 - insured, 0 - uninsured).
SEX
sex (1 - male, 0 - female).
AGE14X
age.
ADSMOK42
smoking status (1 - yes, 0 - no).
RTHLTH53
perceieved health status (1 - excellent, 5 - poor).
Source
https://meps.ahrq.gov/mepsweb/data_stats/download_data_files_detail.jsp?cboPufNumber=HC-171
References
Xia, Michelle, Rexford Akakpo, and Matthew Albaugh. "Maximum Likelihood Approaches to Misrepresentation Models in GLM ratemaking: Model Comparisons." Variance 16.1 (2023).
Examples
# Reproducing table 4 in Xia et. al., (2023).
data(MEPS14)
colMeans(MEPS14)
# TOTEXP14 OBTOTV14 UNINS14 SEX AGE14X ADSMOK42 RTHLTH53
#5042.4647771 6.2260732 0.1242012 0.4153071 41.6628825 0.1670551 2.4319224
apply(MEPS14, 2, sd)
# TOTEXP14 OBTOTV14 UNINS14 SEX AGE14X ADSMOK42 RTHLTH53
#1.358567e+04 1.272065e+01 3.298233e-01 4.927934e-01 1.332746e+01 3.730391e-01 1.074713e+00
sum(MEPS14$OBTOTV14 == 0 ) / nrow(MEPS14)
# [1] 0.1595369
sd(MEPS14$OBTOTV14 == 0)
# [1] 0.3661898
# Fit Gamma regression model with insured status as
# the misrepresented variable.
MEPS14$RTHLTH53 <- as.factor(MEPS14$RTHLTH53)
gamma_fit <- gammaRegMisrepEM(formula = TOTEXP14 ~ UNINS14
+ SEX + AGE14X + ADSMOK42 + RTHLTH53,
v_star = "UNINS14", data = MEPS14)
# summary returns a table of summary statistics, including
# goodness of fits (AIC, AICc, BIC), as well as the
# estimated prevalence of misrepresentation.
summary(gamma_fit)
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 8.03379 0.05341 150.41937 <2e-16 ***
# UNINS14 -1.98132 0.03170 -62.49292 <2e-16 ***
# SEX -0.20427 0.02669 -7.65320 <2e-16 ***
# AGE14X 0.02764 0.00099 27.83485 <2e-16 ***
# ADSMOK42 -0.08868 0.03653 -2.42776 0.01521 *
# RTHLTH532 0.24923 0.03533 7.05469 <2e-16 ***
# RTHLTH533 0.53860 0.03655 14.73488 <2e-16 ***
# RTHLTH534 1.00615 0.04837 20.80026 <2e-16 ***
# RTHLTH535 1.87845 0.08104 23.17833 <2e-16 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ---
# AIC AICc BIC
# 241083.9 241083.9 241166.3
# ---
# Log-Likelihood
# -120530.9
# ---
# Lambda: 0.7734337 std.err: 0.009628053
# Fit Lognormal regression model with insured status as
# the misrepresented variable.
LN_fit <- LnRegMisrepEM(formula = log(TOTEXP14) ~ UNINS14
+ SEX + AGE14X + ADSMOK42 + RTHLTH53,
v_star = "UNINS14", data = MEPS14)
summary(LN_fit)
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 7.28974 0.05648 129.05986 <2e-16 ***
# UNINS14 -1.29503 0.05496 -23.56317 <2e-16 ***
# SEX -0.29590 0.02808 -10.53844 <2e-16 ***
# AGE14X 0.02460 0.00107 23.10180 <2e-16 ***
# ADSMOK42 -0.07008 0.03756 -1.86591 0.06208 .
# RTHLTH532 0.26349 0.03831 6.87786 <2e-16 ***
# RTHLTH533 0.47184 0.03942 11.97017 <2e-16 ***
# RTHLTH534 1.05065 0.04990 21.05580 <2e-16 ***
# RTHLTH535 1.94978 0.08067 24.16987 <2e-16 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ---
# AIC AICc BIC
# 239726.4 239726.4 239808.8
# ---
# Log-Likelihood
# -119852.2
# ---
# Lambda: 0.1110631 std.err: 0.02548188
# Fit Negative Binomial regression model with insured status as
# the misrepresented variable.
NB_fit <- nbRegMisrepEM(formula = OBTOTV14 ~ UNINS14
+ SEX + AGE14X + ADSMOK42 + RTHLTH53,
v_star = "UNINS14", data = MEPS14)
summary(NB_fit)
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 2.00472 0.05463 36.69491 <2e-16 ***
# UNINS14 -1.68638 0.03371 -50.02640 <2e-16 ***
# SEX -0.40917 0.02303 -17.76536 <2e-16 ***
# AGE14X 0.01897 0.00087 21.91823 <2e-16 ***
# ADSMOK42 -0.11391 0.03038 -3.74948 0.00018 ***
# RTHLTH532 0.20720 0.03183 6.50966 <2e-16 ***
# RTHLTH533 0.36794 0.03240 11.35678 <2e-16 ***
# RTHLTH534 0.72357 0.03978 18.18859 <2e-16 ***
# RTHLTH535 1.24468 0.06281 19.81714 <2e-16 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ---
# AIC AICc BIC
# 72788.71 72788.73 72871.16
# ---
# Log-Likelihood
# -36383.35
# ---
# Lambda: 0.8351591 std.err: 0.009627158
# Fit Poisson regression model with smoking status as
# the misrepresented variable.
pois_fit <- poisRegMisrepEM(formula = OBTOTV14 ~ UNINS14
+ SEX + AGE14X + ADSMOK42 + RTHLTH53,
v_star = "UNINS14", data = MEPS14)
summary(pois_fit)
# Coefficients:
# Estimate Std. Error z value Pr(>|z|)
# (Intercept) 2.27367 0.02276 99.87676 <2e-16 ***
# UNINS14 -2.03719 0.00730 -279.00809 <2e-16 ***
# SEX -0.18594 0.01090 -17.05204 <2e-16 ***
# AGE14X 0.01631 0.00042 38.90467 <2e-16 ***
# ADSMOK42 0.09594 0.01313 7.30930 <2e-16 ***
# RTHLTH532 0.14918 0.01641 9.09033 <2e-16 ***
# RTHLTH533 0.31282 0.01620 19.31078 <2e-16 ***
# RTHLTH534 0.75044 0.01793 41.85270 <2e-16 ***
# RTHLTH535 1.09859 0.02265 48.49410 <2e-16 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ---
# AIC AICc BIC
# 99599.31 99599.33 99674.27
# ---
# Log-Likelihood
# -49789.66
# ---
# Lambda: 0.85957 std.err: 0.00348128
[Package glmMisrep version 0.1.1 Index]