Madrid Traffic Accidents Dataset – Quarto Research Blog

This is an analysis of the Madrid Traffic Accidents dataset. You can find a code for downloading and cleaning the data in code/00_download/ and code/01_cleaning/.

Note

I wrote this code for the workshop “An Advanced Introduction to R” at CEMFI in January 2022. You can find the slides and the materials in the GitHub repository

Code

accident_bike |>
  ggplot(aes(x = fct_rev(type_person), fill = fct_rev(gender))) +
  geom_bar(position = "dodge") +
  coord_flip() +
  scale_fill_gender() +
  labs(x = NULL, y = NULL, fill = NULL, title = "Number of Accidents") +
  theme(legend.position = c(0.85, 0.1), panel.grid.major.y = element_blank()) +
  guides(fill = guide_legend(reverse = TRUE))

Logit Regression of Hospitalization and Death

Code

models <- list(
  "(1)" = feglm(
    is_hospitalized ~
      type_person + positive_alcohol + positive_drug | age_c + gender,
    family = binomial(logit),
    data = accident_bike
  ),
  "(2)" = feglm(
    is_hospitalized ~
      type_person +
        positive_alcohol +
        positive_drug |
        age_c + gender + type_vehicle,
    family = binomial(logit),
    data = accident_bike
  ),
  "(3)" = feglm(
    is_hospitalized ~
      type_person +
        positive_alcohol +
        positive_drug |
        age_c + gender + type_vehicle + weather,
    family = binomial(logit),
    data = accident_bike
  ),
  "(4)" = feglm(
    is_died ~ type_person + positive_alcohol + positive_drug | age_c + gender,
    family = binomial(logit),
    data = accident_bike
  ),
  "(5)" = feglm(
    is_died ~
      type_person +
        positive_alcohol +
        positive_drug |
        age_c + gender + type_vehicle,
    family = binomial(logit),
    data = accident_bike
  ),
  "(6)" = feglm(
    is_died ~
      type_person +
        positive_alcohol +
        positive_drug |
        age_c + gender + type_vehicle + weather,
    family = binomial(logit),
    data = accident_bike
  )
)

cm <- c(
  "type_personPassenger" = "Passenger",
  "type_personPedestrian" = "Pedestrian",
  "positive_alcoholTRUE" = "Positive Alcohol"
)

gm <- tibble(
  raw = c("nobs", "FE: age_c", "FE: gender", "FE: type_vehicle", "FE: weather"),
  clean = c(
    "Observations",
    "FE: Age Group",
    "FE: Gender",
    "FE: Type of Vehicle",
    "FE: Weather"
  ),
  fmt = c(0, 0, 0, 0, 0)
)

modelsummary(
  models,
  stars = c("+" = .1, "*" = .05, "**" = .01),
  coef_map = cm,
  gof_map = gm
) |>
  group_tt(j = list("Hospitalization" = 2:4, "Died within 24 hours" = 5:7))

	Hospitalization			Died within 24 hours
	(1)	(2)	(3)	(4)	(5)	(6)
+ p < 0.1, * p < 0.05, ** p < 0.01
Passenger	0.026	0.501**	0.475**	-1.697**	-1.445*	-1.440*
	(0.100)	(0.069)	(0.068)	(0.616)	(0.631)	(0.631)
Pedestrian	2.117**	2.381**	2.298**	2.190**	2.326**	2.327**
	(0.114)	(0.061)	(0.058)	(0.260)	(0.248)	(0.246)
Positive Alcohol	0.007	0.355**	0.381**	-13.685**	-13.421**	-13.460**
	(0.072)	(0.081)	(0.081)	(0.059)	(0.055)	(0.054)
Observations	197477	197369	176133	118462	116344	112659
FE: Age Group	X	X	X	X	X	X
FE: Gender	X	X	X	X	X	X
FE: Type of Vehicle		X	X		X	X
FE: Weather			X			X

--- title: Madrid Traffic Accidents Dataset author: Kazuharu Yanagimoto date: 2024-03-24 categories: [descriptive, traffic accidents] --- This is an analysis of the Madrid Traffic Accidents [dataset](https://datos.madrid.es/portal/site/egob/menuitem.c05c1f754a33a9fbe4b2e4b284f1a5a0/?vgnextoid=7c2843010d9c3610VgnVCM2000001f4a900aRCRD&vgnextchannel=374512b9ace9f310VgnVCM100000171f5a0aRCRD&vgnextfmt=default). You can find a code for downloading and cleaning the data in `code/00_download/` and `code/01_cleaning/`. ::: {.callout-note} I wrote this code for the workshop "An Advanced Introduction to R" at CEMFI in January 2022. You can find the slides and the materials in the [GitHub repository](https://github.com/kazuyanagimoto/workshop-r-2022) ::: ```{r} #| label: setup #| include: false knitr::opts_chunk$set( fig.width = 6, fig.height = (6 * 0.618), out.width = "80%", fig.align = "center", fig.retina = 3, collapse = TRUE ) ``` ```{r} #| label: library-data #| include: false library(tidyverse) library(fixest) library(tinytable) library(modelsummary) library(targets) tar_config_set( store = here::here("_targets"), script = here::here("_targets.R") ) tar_load(c(accident_bike)) invisible(list2env(tar_read(fns_graphics), .GlobalEnv)) theme_set(theme_proj()) ``` ```{r} #| label: figure-num-accidents accident_bike |> ggplot(aes(x = fct_rev(type_person), fill = fct_rev(gender))) + geom_bar(position = "dodge") + coord_flip() + scale_fill_gender() + labs(x = NULL, y = NULL, fill = NULL, title = "Number of Accidents") + theme(legend.position = c(0.85, 0.1), panel.grid.major.y = element_blank()) + guides(fill = guide_legend(reverse = TRUE)) ``` ## Logit Regression of Hospitalization and Death ```{r} #| label: table-logit-hospital-death models <- list( "(1)" = feglm( is_hospitalized ~ type_person + positive_alcohol + positive_drug | age_c + gender, family = binomial(logit), data = accident_bike ), "(2)" = feglm( is_hospitalized ~ type_person + positive_alcohol + positive_drug | age_c + gender + type_vehicle, family = binomial(logit), data = accident_bike ), "(3)" = feglm( is_hospitalized ~ type_person + positive_alcohol + positive_drug | age_c + gender + type_vehicle + weather, family = binomial(logit), data = accident_bike ), "(4)" = feglm( is_died ~ type_person + positive_alcohol + positive_drug | age_c + gender, family = binomial(logit), data = accident_bike ), "(5)" = feglm( is_died ~ type_person + positive_alcohol + positive_drug | age_c + gender + type_vehicle, family = binomial(logit), data = accident_bike ), "(6)" = feglm( is_died ~ type_person + positive_alcohol + positive_drug | age_c + gender + type_vehicle + weather, family = binomial(logit), data = accident_bike ) ) cm <- c( "type_personPassenger" = "Passenger", "type_personPedestrian" = "Pedestrian", "positive_alcoholTRUE" = "Positive Alcohol" ) gm <- tibble( raw = c("nobs", "FE: age_c", "FE: gender", "FE: type_vehicle", "FE: weather"), clean = c( "Observations", "FE: Age Group", "FE: Gender", "FE: Type of Vehicle", "FE: Weather" ), fmt = c(0, 0, 0, 0, 0) ) modelsummary( models, stars = c("+" = .1, "*" = .05, "**" = .01), coef_map = cm, gof_map = gm ) |> group_tt(j = list("Hospitalization" = 2:4, "Died within 24 hours" = 5:7)) ```