| Teams {Lahman} | R Documentation |
Teams table
Description
Yearly statistics and standings for teams
Usage
data(Teams)
Format
A data frame with 3015 observations on the following 48 variables.
yearIDYear
lgIDLeague; a factor with levels
AAALFLNLPLUAteamIDTeam; a factor
franchIDFranchise (links to
TeamsFranchisestable)divIDTeam's division; a factor with levels
CEWRankPosition in final standings
GGames played
GhomeGames played at home
WWins
LLosses
DivWinDivision Winner (Y or N)
WCWinWild Card Winner (Y or N)
LgWinLeague Champion(Y or N)
WSWinWorld Series Winner (Y or N)
RRuns scored
ABAt bats
HHits by batters
X2BDoubles
X3BTriples
HRHomeruns by batters
BBWalks by batters
SOStrikeouts by batters
SBStolen bases
CSCaught stealing
HBPBatters hit by pitch
SFSacrifice flies
RAOpponents runs scored
EREarned runs allowed
ERAEarned run average
CGComplete games
SHOShutouts
SVSaves
IPoutsOuts Pitched (innings pitched x 3)
HAHits allowed
HRAHomeruns allowed
BBAWalks allowed
SOAStrikeouts by pitchers
EErrors
DPDouble Plays
FPFielding percentage
nameTeam's full name
parkName of team's home ballpark
attendanceHome attendance total
BPFThree-year park factor for batters
PPFThree-year park factor for pitchers
teamIDBRTeam ID used by Baseball Reference website
teamIDlahman45Team ID used in Lahman database version 4.5
teamIDretroTeam ID used by Retrosheet
Details
Variables X2B and X3B are named 2B and 3B in the original database
Source
Lahman, S. (2021) Lahman's Baseball Database, 1871-2020, 2020 version, https://www.seanlahman.com/baseball-archive/statistics/
Examples
data(Teams)
library("dplyr")
library("tidyr")
# Add some selected measures to the Teams data frame
# Restrict to AL and NL in modern era
teams <- Teams %>%
filter(yearID >= 1901 & lgID %in% c("AL", "NL")) %>%
group_by(yearID, teamID) %>%
mutate(TB = H + X2B + 2 * X3B + 3 * HR,
WinPct = W/G,
rpg = R/G,
hrpg = HR/G,
tbpg = TB/G,
kpg = SO/G,
k2bb = SO/BB,
whip = 3 * (H + BB)/IPouts)
# Function to create a ggplot by year for selected team stats
# Both arguments are character strings
yrPlot <- function(yvar, label)
{
require("ggplot2")
ggplot(teams, aes_string(x = "yearID", y = yvar)) +
geom_point(size = 0.5) +
geom_smooth(method="loess") +
labs(x = "Year", y = paste(label, "per game"))
}
## Run scoring in the modern era by year
yrPlot("rpg", "Runs")
## Home runs per game by year
yrPlot("hrpg", "Home runs")
## Total bases per game by year
yrPlot("tbpg", "Total bases")
## Strikeouts per game by year
yrPlot("kpg", "Strikeouts")
## Plot win percentage vs. run differential (R - RA)
ggplot(teams, aes(x = R - RA, y = WinPct)) +
geom_point(size = 0.5) +
geom_smooth(method="loess") +
geom_hline(yintercept = 0.5, color = "orange") +
geom_vline(xintercept = 0, color = "orange") +
labs(x = "Run differential", y = "Win percentage")
## Plot attendance vs. win percentage by league, post-1980
teams %>% filter(yearID >= 1980) %>%
ggplot(., aes(x = WinPct, y = attendance/1000)) +
geom_point(size = 0.5) +
geom_smooth(method="loess", se = FALSE) +
facet_wrap(~ lgID) +
labs(x = "Win percentage", y = "Attendance (1000s)")
## Teams with over 4 million attendance in a season
teams %>%
filter(attendance >= 4e6) %>%
select(yearID, lgID, teamID, Rank, attendance) %>%
arrange(desc(attendance))
## Average season HRs by park, post-1980
teams %>%
filter(yearID >= 1980) %>%
group_by(park) %>%
summarise(meanHRpg = mean((HR + HRA)/Ghome), nyears = n()) %>%
filter(nyears >= 10) %>%
arrange(desc(meanHRpg)) %>%
head(., 10)
## Home runs per game at Fenway Park and Wrigley Field,
## the two oldest MLB parks, by year. Fenway opened in 1912.
teams %>%
filter(yearID >= 1912 & teamID %in% c("BOS", "CHN")) %>%
mutate(hrpg = (HR + HRA)/Ghome) %>%
ggplot(., aes(x = yearID, y = hrpg, color = teamID)) +
geom_line(size = 1) +
geom_point() +
labs(x = "Year", y = "Home runs per game", color = "Team") +
scale_color_manual(values = c("red", "blue"))
## Ditto for total strikeouts per game
teams %>%
filter(yearID >= 1912 & teamID %in% c("BOS", "CHN")) %>%
mutate(kpg = (SO + SOA)/Ghome) %>%
ggplot(., aes(x = yearID, y = kpg, color = teamID)) +
geom_line(size = 1) +
geom_point() +
labs(x = "Year", y = "Strikeouts per game", color = "Team") +
scale_color_manual(values = c("red", "blue"))
## Not run:
if(require(googleVis)) {
motion1 <- gvisMotionChart(as.data.frame(teams),
idvar="teamID", timevar="yearID", chartid="gvisTeams",
options=list(width=700, height=600))
plot(motion1)
#print(motion1, file="gvisTeams.html")
# Merge with avg salary for years where salary is available
teamsal <- Salaries %>%
group_by(yearID, teamID) %>%
summarise(Salary = sum(salary, na.rm = TRUE)) %>%
select(yearID, teamID, Salary)
teamsSal <- teams %>%
filter(yearID >= 1985) %>%
left_join(teamsal, by = c("yearID", "teamID")) %>%
select(yearID, teamID, attendance, Salary, WinPct) %>%
as.data.frame(.)
motion2 <- gvisMotionChart(teamsSal, idvar="teamID", timevar="yearID",
xvar="attendance", yvar="salary", sizevar="WinPct",
chartid="gvisTeamsSal", options=list(width=700, height=600))
plot(motion2)
#print(motion2, file="gvisTeamsSal.html")
}
## End(Not run)