Teams {Lahman} | R Documentation |
Teams table
Description
Yearly statistics and standings for teams
Usage
data(Teams)
Format
A data frame with 3015 observations on the following 48 variables.
yearID
Year
lgID
League; a factor with levels
AA
AL
FL
NL
PL
UA
teamID
Team; a factor
franchID
Franchise (links to
TeamsFranchises
table)divID
Team's division; a factor with levels
C
E
W
Rank
Position in final standings
G
Games played
Ghome
Games played at home
W
Wins
L
Losses
DivWin
Division Winner (Y or N)
WCWin
Wild Card Winner (Y or N)
LgWin
League Champion(Y or N)
WSWin
World Series Winner (Y or N)
R
Runs scored
AB
At bats
H
Hits by batters
X2B
Doubles
X3B
Triples
HR
Homeruns by batters
BB
Walks by batters
SO
Strikeouts by batters
SB
Stolen bases
CS
Caught stealing
HBP
Batters hit by pitch
SF
Sacrifice flies
RA
Opponents runs scored
ER
Earned runs allowed
ERA
Earned run average
CG
Complete games
SHO
Shutouts
SV
Saves
IPouts
Outs Pitched (innings pitched x 3)
HA
Hits allowed
HRA
Homeruns allowed
BBA
Walks allowed
SOA
Strikeouts by pitchers
E
Errors
DP
Double Plays
FP
Fielding percentage
name
Team's full name
park
Name of team's home ballpark
attendance
Home attendance total
BPF
Three-year park factor for batters
PPF
Three-year park factor for pitchers
teamIDBR
Team ID used by Baseball Reference website
teamIDlahman45
Team ID used in Lahman database version 4.5
teamIDretro
Team ID used by Retrosheet
Details
Variables X2B
and X3B
are named 2B
and 3B
in the original database
Source
Lahman, S. (2021) Lahman's Baseball Database, 1871-2020, 2020 version, https://www.seanlahman.com/baseball-archive/statistics/
Examples
data(Teams)
library("dplyr")
library("tidyr")
# Add some selected measures to the Teams data frame
# Restrict to AL and NL in modern era
teams <- Teams %>%
filter(yearID >= 1901 & lgID %in% c("AL", "NL")) %>%
group_by(yearID, teamID) %>%
mutate(TB = H + X2B + 2 * X3B + 3 * HR,
WinPct = W/G,
rpg = R/G,
hrpg = HR/G,
tbpg = TB/G,
kpg = SO/G,
k2bb = SO/BB,
whip = 3 * (H + BB)/IPouts)
# Function to create a ggplot by year for selected team stats
# Both arguments are character strings
yrPlot <- function(yvar, label)
{
require("ggplot2")
ggplot(teams, aes_string(x = "yearID", y = yvar)) +
geom_point(size = 0.5) +
geom_smooth(method="loess") +
labs(x = "Year", y = paste(label, "per game"))
}
## Run scoring in the modern era by year
yrPlot("rpg", "Runs")
## Home runs per game by year
yrPlot("hrpg", "Home runs")
## Total bases per game by year
yrPlot("tbpg", "Total bases")
## Strikeouts per game by year
yrPlot("kpg", "Strikeouts")
## Plot win percentage vs. run differential (R - RA)
ggplot(teams, aes(x = R - RA, y = WinPct)) +
geom_point(size = 0.5) +
geom_smooth(method="loess") +
geom_hline(yintercept = 0.5, color = "orange") +
geom_vline(xintercept = 0, color = "orange") +
labs(x = "Run differential", y = "Win percentage")
## Plot attendance vs. win percentage by league, post-1980
teams %>% filter(yearID >= 1980) %>%
ggplot(., aes(x = WinPct, y = attendance/1000)) +
geom_point(size = 0.5) +
geom_smooth(method="loess", se = FALSE) +
facet_wrap(~ lgID) +
labs(x = "Win percentage", y = "Attendance (1000s)")
## Teams with over 4 million attendance in a season
teams %>%
filter(attendance >= 4e6) %>%
select(yearID, lgID, teamID, Rank, attendance) %>%
arrange(desc(attendance))
## Average season HRs by park, post-1980
teams %>%
filter(yearID >= 1980) %>%
group_by(park) %>%
summarise(meanHRpg = mean((HR + HRA)/Ghome), nyears = n()) %>%
filter(nyears >= 10) %>%
arrange(desc(meanHRpg)) %>%
head(., 10)
## Home runs per game at Fenway Park and Wrigley Field,
## the two oldest MLB parks, by year. Fenway opened in 1912.
teams %>%
filter(yearID >= 1912 & teamID %in% c("BOS", "CHN")) %>%
mutate(hrpg = (HR + HRA)/Ghome) %>%
ggplot(., aes(x = yearID, y = hrpg, color = teamID)) +
geom_line(size = 1) +
geom_point() +
labs(x = "Year", y = "Home runs per game", color = "Team") +
scale_color_manual(values = c("red", "blue"))
## Ditto for total strikeouts per game
teams %>%
filter(yearID >= 1912 & teamID %in% c("BOS", "CHN")) %>%
mutate(kpg = (SO + SOA)/Ghome) %>%
ggplot(., aes(x = yearID, y = kpg, color = teamID)) +
geom_line(size = 1) +
geom_point() +
labs(x = "Year", y = "Strikeouts per game", color = "Team") +
scale_color_manual(values = c("red", "blue"))
## Not run:
if(require(googleVis)) {
motion1 <- gvisMotionChart(as.data.frame(teams),
idvar="teamID", timevar="yearID", chartid="gvisTeams",
options=list(width=700, height=600))
plot(motion1)
#print(motion1, file="gvisTeams.html")
# Merge with avg salary for years where salary is available
teamsal <- Salaries %>%
group_by(yearID, teamID) %>%
summarise(Salary = sum(salary, na.rm = TRUE)) %>%
select(yearID, teamID, Salary)
teamsSal <- teams %>%
filter(yearID >= 1985) %>%
left_join(teamsal, by = c("yearID", "teamID")) %>%
select(yearID, teamID, attendance, Salary, WinPct) %>%
as.data.frame(.)
motion2 <- gvisMotionChart(teamsSal, idvar="teamID", timevar="yearID",
xvar="attendance", yvar="salary", sizevar="WinPct",
chartid="gvisTeamsSal", options=list(width=700, height=600))
plot(motion2)
#print(motion2, file="gvisTeamsSal.html")
}
## End(Not run)