case1202 {Sleuth3}R Documentation

Sex discrimination in Employment

Description

Data on employees from one job category (skilled, entry–level clerical) of a bank that was sued for sex discrimination. The data are on 32 male and 61 female employees, hired between 1965 and 1975.

Usage

case1202

Format

A data frame with 93 observations on the following 7 variables.

Bsal

Annual salary at time of hire

Sal77

Salary as of March 1975

Sex

Sex of employee

Senior

Seniority (months since first hired)

Age

Age of employee (in months)

Educ

Education (in years)

Exper

Work experience prior to employment with the bank (months)

Source

Ramsey, F.L. and Schafer, D.W. (2013). The Statistical Sleuth: A Course in Methods of Data Analysis (3rd ed), Cengage Learning.

References

Roberts, H.V. (1979). Harris Trust and Savings Bank: An Analysis of Employee Compensation, Report 7946, Center for Mathematical Studies in Business and Economics, University of Chicago Graduate School of Business.

See Also

case0102

Examples

str(case1202)
attach(case1202)

## EXPLORATION
logSal <- log(Bsal)    
myMatrix <- cbind (logSal, Senior,Age, Educ, Exper)   
if(require(car)){   # Use the car library
  scatterplotMatrix(myMatrix, smooth=FALSE, diagonal="histogram",
                    groups=Sex, col=c("red","blue") )   
}                                
myLm1 <- lm(logSal ~ Senior + Age + Educ + Exper + Sex)
plot(myLm1, which=1)           
plot(myLm1, which=4) #  Cook's Distance 
if(require(car)){   # Use the car library
  crPlots(myLm1)    # Partial residual plots
}             
ageSquared    <- Age^2   
ageCubed      <- Age^3     
experSquared  <- Exper^2
experCubed    <- Exper^3
myLm2 <- lm(logSal ~ Senior + Age + ageSquared  + ageCubed + 
  Educ + Exper + experSquared + experCubed  + Sex)
plot(myLm2, which=1)  # Residual plot         
plot(myLm1, which=4)  # Cook's distance         

if(require(leaps)){   # Use the leaps library
  mySubsets     <- regsubsets(logSal ~ (Senior + Age + Educ + Exper + 
    ageSquared  + experSquared)^2, nvmax=25, data=case1202)    
  mySummary  <- summary(mySubsets)    
  p  <- apply(mySummary$which, 1, sum)     
  plot(mySummary$bic ~ p, ylab = "BIC")            
  cbind(p,mySummary$bic)  
  mySummary$which[8,]  # Note that Age:ageSquared = ageCubed
}
myLm3         <- lm(logSal ~ Age + Educ + ageSquared + Senior:Educ + 
  Age:Exper + ageCubed + Educ:Exper + Exper:ageSquared) 
summary(myLm3)

myLm4 <- update(myLm3, ~ . + Sex)  
summary(myLm4)
myLm5 <- update(myLm4, ~ . + Sex:Age + Sex:Educ + Sex:Senior + 
  Sex:Exper + Sex:ageSquared)
anova(myLm4, myLm5) 

## INFERENCE AND INTERPRETATION
summary(myLm4)
beta          <- myLm4$coef  
exp(beta[6])             
exp(confint(myLm4,6))    
# Conclusion:  The median beginning salary for males was estimated to be 12% 
# higher than the median salary for females with similar values of the available 
# qualification variables (95% confidence interval: 7% to 17% higher).

## DISPLAY FOR PRESENTATION        
years <- Exper/12  # Change months to years
plot(Bsal ~ years, log="y", xlab="Previous Work Experience (Years)",
  ylab="Beginning Salary (Dollars); Log Scale",
  main="Beginning Salaries and Experience for 61 Female and 32 Male Employees",
  pch= ifelse(Sex=="Male",24,21), bg = "gray", 
  col= ifelse(Sex=="Male","blue","red"), lwd=2, cex=1.8 )
myLm6 <- lm(logSal ~ Exper + experSquared + experCubed + Sex)
beta <- myLm6$coef
dummyExper <- seq(min(Exper),max(Exper),length=50)
curveF <- beta[1] + beta[2]*dummyExper + beta[3]*dummyExper^2 +
  beta[4]*dummyExper^3 
curveM <- curveF + beta[5]
dummyYears <- dummyExper/12
lines(exp(curveF) ~ dummyYears, lty=1, lwd=2,col="red")
lines(exp(curveM) ~ dummyYears, lty = 2, lwd=2, col="blue")
legend(28,8150, c("Male","Female"),pch=c(24,21), pt.cex=1.8, pt.lwd=2, 
  pt.bg=c("gray","gray"), col=c("blue","red"), lty=c(2,1), lwd=2) 

detach(case1202)

[Package Sleuth3 version 1.0-6 Index]