UK Road traffic accidents 2021/2022
Richard_Rowe
2025-02-18
Brief summary
The dataset spans from January 1, 2021, to December 31, 2022, with 307,972 total accidents recorded.
The majority of accidents were classified as ‘Slight’ (85.5%), followed by ‘Serious’ (13.2%), and ‘Fatal’ (1.3%).
Friday had the highest number of accidents (16.4%), while Sunday had the lowest (10.9%).
Birmingham had the greatest number of accidents, followed by Leeds and Manchester.
The Metropolitan Police handled the highest number of accidents (15.2% of the total), followed by West Midlands and West Yorkshire.
Most accidents occurred during fine weather with no high winds (79.4%).
The most common time for accidents was around evening rush hours, particularly at 18:00 on Fridays.
Single carriageways during daylight hours saw the highest number of fatal accidents.
Roads with a speed limit of 30 mph had the highest number of accidents, regardless of severity.
Import, tidy and conduct basic analysis to gain insights on the “road accident data.csv” file.
- Set up the environment.
library(tidyverse)
library(readr)
library(lubridate)
library(kableExtra)
#librarys and API used for mapping in codechunk 34.
library(leaflet)
library(ggmap)
register_google(key = Sys.getenv("GOOGLE_API_KEY"))
- Load the data set.
rtdata <- read_csv("Road Accident Data.csv", show_col_types = FALSE)
- View column names.
colnames(rtdata)
## [1] "Accident_Index" "Accident Date"
## [3] "Month" "Day_of_Week"
## [5] "Year" "Junction_Control"
## [7] "Junction_Detail" "Accident_Severity"
## [9] "Latitude" "Light_Conditions"
## [11] "Local_Authority_(District)" "Carriageway_Hazards"
## [13] "Longitude" "Number_of_Casualties"
## [15] "Number_of_Vehicles" "Police_Force"
## [17] "Road_Surface_Conditions" "Road_Type"
## [19] "Speed_limit" "Time"
## [21] "Urban_or_Rural_Area" "Weather_Conditions"
## [23] "Vehicle_Type"
- Check the structure of the data set.
str(rtdata)
## spc_tbl_ [307,973 × 23] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Accident_Index : chr [1:307973] "200901BS70001" "200901BS70002" "200901BS70003" "200901BS70004" ...
## $ Accident Date : chr [1:307973] "1/1/2021" "1/5/2021" "1/4/2021" "1/5/2021" ...
## $ Month : chr [1:307973] "Jan" "Jan" "Jan" "Jan" ...
## $ Day_of_Week : chr [1:307973] "Thursday" "Monday" "Sunday" "Monday" ...
## $ Year : num [1:307973] 2021 2021 2021 2021 2021 ...
## $ Junction_Control : chr [1:307973] "Give way or uncontrolled" "Give way or uncontrolled" "Give way or uncontrolled" "Auto traffic signal" ...
## $ Junction_Detail : chr [1:307973] "T or staggered junction" "Crossroads" "T or staggered junction" "T or staggered junction" ...
## $ Accident_Severity : chr [1:307973] "Serious" "Serious" "Slight" "Serious" ...
## $ Latitude : num [1:307973] 51.5 51.5 51.5 51.5 51.5 ...
## $ Light_Conditions : chr [1:307973] "Daylight" "Daylight" "Daylight" "Daylight" ...
## $ Local_Authority_(District): chr [1:307973] "Kensington and Chelsea" "Kensington and Chelsea" "Kensington and Chelsea" "Kensington and Chelsea" ...
## $ Carriageway_Hazards : chr [1:307973] "None" "None" "None" "None" ...
## $ Longitude : num [1:307973] -0.201 -0.199 -0.18 -0.203 -0.173 ...
## $ Number_of_Casualties : num [1:307973] 1 11 1 1 1 3 1 1 2 1 ...
## $ Number_of_Vehicles : num [1:307973] 2 2 2 2 2 2 2 1 1 1 ...
## $ Police_Force : chr [1:307973] "Metropolitan Police" "Metropolitan Police" "Metropolitan Police" "Metropolitan Police" ...
## $ Road_Surface_Conditions : chr [1:307973] "Dry" "Wet or damp" "Dry" "Frost or ice" ...
## $ Road_Type : chr [1:307973] "One way street" "Single carriageway" "Single carriageway" "Single carriageway" ...
## $ Speed_limit : num [1:307973] 30 30 30 30 30 30 30 30 30 30 ...
## $ Time : 'hms' num [1:307973] 15:11:00 10:59:00 14:19:00 08:10:00 ...
## ..- attr(*, "units")= chr "secs"
## $ Urban_or_Rural_Area : chr [1:307973] "Urban" "Urban" "Urban" "Urban" ...
## $ Weather_Conditions : chr [1:307973] "Fine no high winds" "Fine no high winds" "Fine no high winds" "Other" ...
## $ Vehicle_Type : chr [1:307973] "Car" "Taxi/Private hire car" "Taxi/Private hire car" "Motorcycle over 500cc" ...
## - attr(*, "spec")=
## .. cols(
## .. Accident_Index = col_character(),
## .. `Accident Date` = col_character(),
## .. Month = col_character(),
## .. Day_of_Week = col_character(),
## .. Year = col_double(),
## .. Junction_Control = col_character(),
## .. Junction_Detail = col_character(),
## .. Accident_Severity = col_character(),
## .. Latitude = col_double(),
## .. Light_Conditions = col_character(),
## .. `Local_Authority_(District)` = col_character(),
## .. Carriageway_Hazards = col_character(),
## .. Longitude = col_double(),
## .. Number_of_Casualties = col_double(),
## .. Number_of_Vehicles = col_double(),
## .. Police_Force = col_character(),
## .. Road_Surface_Conditions = col_character(),
## .. Road_Type = col_character(),
## .. Speed_limit = col_double(),
## .. Time = col_time(format = ""),
## .. Urban_or_Rural_Area = col_character(),
## .. Weather_Conditions = col_character(),
## .. Vehicle_Type = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
- Check for missing values.
colSums(is.na(rtdata))
## Accident_Index Accident Date
## 0 0
## Month Day_of_Week
## 0 0
## Year Junction_Control
## 0 0
## Junction_Detail Accident_Severity
## 0 0
## Latitude Light_Conditions
## 0 0
## Local_Authority_(District) Carriageway_Hazards
## 0 3
## Longitude Number_of_Casualties
## 0 0
## Number_of_Vehicles Police_Force
## 0 0
## Road_Surface_Conditions Road_Type
## 317 1534
## Speed_limit Time
## 0 17
## Urban_or_Rural_Area Weather_Conditions
## 0 6057
## Vehicle_Type
## 0
- Check for duplicates in the data set.
sum(duplicated(rtdata))
## [1] 1
- Create clean_rtdata df and remove duplicate values.
clean_rtdata <- rtdata[!duplicated(rtdata), ]
- Remove the accident index column from the dataset.
clean_rtdata <- clean_rtdata %>% select(-Accident_Index)
- Change column names to lower case.
clean_rtdata <- clean_rtdata %>%
rename_all(tolower)
- Change the “accident date” column name to accident_date.
clean_rtdata <- clean_rtdata %>%
rename(accident_date = `accident date`)
- Change the accident_date column for chr to date format.
clean_rtdata$accident_date <- mdy(clean_rtdata$accident_date)
- Change the day_of_week column type from
to a factor so it can be sorted accordingly in later analysis. I had difficulty finding a way to do this using base or lubridate. created day_levels to define the correct order of days of the week.
# Define the correct order of days
day_levels <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")
clean_rtdata <- clean_rtdata %>%
mutate(day_of_week = factor(day_of_week, levels = day_levels))
- Change the month column from chr to a valid month value so it can be sorted accordingly in later analysis.
clean_rtdata$month <- factor(clean_rtdata$month, levels = month.abb)
- Remove any leading or trailing spaces from columns that are character type.
clean_rtdata <- clean_rtdata %>%
mutate_if(is.character, stringr::str_trim)
Analysis
- Find the date period the data was collected.
min(clean_rtdata$accident_date)
## [1] "2021-01-01"
max(clean_rtdata$accident_date)
## [1] "2022-12-31"
- Find the number of police forces that took part.
number_of_police_forces <- clean_rtdata %>%
count(police_force) %>%
summarise(count = n())
kable(number_of_police_forces)
| count |
|---|
| 51 |
- Find the total number of accidents in the data set and save as total_accidents for later use. Also find total yearly totals and total Fatal accidents for use later.
#total_accidents$total will be used in later analysis
total_accidents <- count(clean_rtdata) %>%
summarise(total = n)
total_accidents_2021 <- clean_rtdata %>%
dplyr::filter(year == 2021) %>%
summarise(count = n(), .groups = "drop")
total_accidents_2022 <- clean_rtdata %>%
dplyr::filter(year == 2022) %>%
summarise(count = n(), .groups = "drop")
# will be used later in analysis
total_fatal_accidents<- clean_rtdata %>%
dplyr::filter(accident_severity == "Fatal") %>%
summarise(Fatal = n(), .groups = "drop")
yearly_accidents <- clean_rtdata %>%
group_by(year, accident_severity) %>%
summarise(count = n(), .groups = "drop") %>%
pivot_wider(names_from = accident_severity, values_from = count)
accidents_by_severity <- clean_rtdata %>%
dplyr::group_by(clean_rtdata$accident_severity) %>%
summarise(count = n(), .groups = "drop") %>%
arrange(desc(count)) %>%
# Calculate the percentage using total_accidents$total from our previous calculation
mutate(percentage = round((count / total_accidents$total) * 100, 1))
# Print the resulting data frame
kable(total_accidents, col.names = c("Total Acidents 2021/2022")) %>%
kable_styling()
| Total Acidents 2021/2022 |
|---|
| 307972 |
kable(accidents_by_severity) %>%
kable_styling()
| clean_rtdata$accident_severity | count | percentage |
|---|---|---|
| Slight | 263279 | 85.5 |
| Serious | 40740 | 13.2 |
| Fatal | 3953 | 1.3 |
kable(yearly_accidents) %>%
kable_styling()
| year | Fatal | Serious | Slight |
|---|---|---|---|
| 2021 | 2404 | 21963 | 139186 |
| 2022 | 1549 | 18777 | 124093 |
- Find the day of the week that has the most accidents from the total number of accidents.
accidents_by_day <- clean_rtdata %>%
group_by(day_of_week) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
#calculate the percentage using the total_accidents df we created earlier)
mutate(percentage = round((count/total_accidents$total) * 100,1))
kable(accidents_by_day) %>%
kable_styling()
| day_of_week | count | percentage |
|---|---|---|
| Friday | 50529 | 16.4 |
| Tuesday | 46385 | 15.1 |
| Wednesday | 46381 | 15.1 |
| Thursday | 45649 | 14.8 |
| Monday | 43918 | 14.3 |
| Saturday | 41566 | 13.5 |
| Sunday | 33544 | 10.9 |
- create a simple bar graph to show accidents by day of the week for the whole dataset.
ggplot(accidents_by_day, aes(x = day_of_week, y= count))+
geom_bar(stat = "identity", fill = "#4183C4") +
labs(title = "Total accidents in 2021/2022 by day of the week")
- Find the month that has the most accidents from the total number of accidents.
accidents_by_month <- clean_rtdata %>%
group_by(month) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
#calculate the percentage using the total_accidents df we created earlier
mutate(percentage = round((count / total_accidents$total) * 100, 1))
kable(accidents_by_month) %>%
kable_styling()
| month | count | percentage |
|---|---|---|
| Nov | 29095 | 9.4 |
| Oct | 28368 | 9.2 |
| Jul | 26953 | 8.8 |
| Sep | 26752 | 8.7 |
| Jun | 26748 | 8.7 |
| May | 26183 | 8.5 |
| Mar | 25543 | 8.3 |
| Aug | 25503 | 8.3 |
| Apr | 24225 | 7.9 |
| Jan | 23383 | 7.6 |
| Dec | 23334 | 7.6 |
| Feb | 21885 | 7.1 |
21.create a simple bar graph to show accidents by month for the whole dataset.
ggplot(accidents_by_month, aes(x = month, y =count)) +
geom_bar(stat = "identity", fill = "#4183C4")+
labs(title = "Total accidents 2021/2022 by month")
- Find the month with most accidents from 2021
accidents_by_month_2021 <- clean_rtdata %>%
dplyr::filter(year == 2021) %>%
group_by(month) %>%
summarise(count_2021 = n()) %>%
#calculate the percentage using df we created earlier
mutate(percentage = round ((count_2021 / total_accidents_2021$count) * 100, 1)) %>%
arrange(desc(count_2021))
kable(accidents_by_month_2021) %>%
kable_styling()
| month | count_2021 | percentage |
|---|---|---|
| Nov | 15473 | 9.5 |
| Oct | 14834 | 9.1 |
| Jul | 14300 | 8.7 |
| Jun | 13936 | 8.5 |
| May | 13811 | 8.4 |
| Sep | 13792 | 8.4 |
| Dec | 13709 | 8.4 |
| Jan | 13416 | 8.2 |
| Aug | 13415 | 8.2 |
| Mar | 13202 | 8.1 |
| Apr | 12715 | 7.8 |
| Feb | 10950 | 6.7 |
- Find the month with most accidents from 2022
accidents_by_month_2022 <- clean_rtdata %>%
dplyr::filter(year == 2022) %>%
group_by(month) %>%
summarise(count_2022 = n()) %>%
#calculate the percentage using df we created earlier
mutate(percentage = round ((count_2022 / total_accidents_2022$count) * 100, 1)) %>%
arrange(desc(count_2022))
kable(accidents_by_month_2022) %>%
kable_styling()
| month | count_2022 | percentage |
|---|---|---|
| Nov | 13622 | 9.4 |
| Oct | 13534 | 9.4 |
| Sep | 12960 | 9.0 |
| Jun | 12812 | 8.9 |
| Jul | 12653 | 8.8 |
| May | 12372 | 8.6 |
| Mar | 12341 | 8.5 |
| Aug | 12088 | 8.4 |
| Apr | 11510 | 8.0 |
| Feb | 10935 | 7.6 |
| Jan | 9967 | 6.9 |
| Dec | 9625 | 6.7 |
- Create a comparison for each month between 2021 and 2022
#join our 2 existing accidents by year data frames.
accidents_year_comparision <- full_join(accidents_by_month_2021,accidents_by_month_2022, by ="month")
#arrange by month
accidents_year_comparision <- accidents_year_comparision %>%
arrange(month)
kable(accidents_year_comparision, col.names = c("Month", "2021", "%", "2022", "%")) %>%
kable_styling()
| Month | 2021 | % | 2022 | % |
|---|---|---|---|---|
| Jan | 13416 | 8.2 | 9967 | 6.9 |
| Feb | 10950 | 6.7 | 10935 | 7.6 |
| Mar | 13202 | 8.1 | 12341 | 8.5 |
| Apr | 12715 | 7.8 | 11510 | 8.0 |
| May | 13811 | 8.4 | 12372 | 8.6 |
| Jun | 13936 | 8.5 | 12812 | 8.9 |
| Jul | 14300 | 8.7 | 12653 | 8.8 |
| Aug | 13415 | 8.2 | 12088 | 8.4 |
| Sep | 13792 | 8.4 | 12960 | 9.0 |
| Oct | 14834 | 9.1 | 13534 | 9.4 |
| Nov | 15473 | 9.5 | 13622 | 9.4 |
| Dec | 13709 | 8.4 | 9625 | 6.7 |
- Create a line chart to compare 2021 and 2022 month by month.
library(ggplot2)
# Create the line chart with grouping
ggplot(accidents_year_comparision, aes(x = month)) +
geom_line(aes(y = count_2021, color = "2021", group = 1), linewidth = 1) +
geom_line(aes(y = count_2022, color = "2022", group = 2), linewidth = 1) +
labs(x = "Month", y = "Number of Accidents", title = "Monthly Accidents Comparison for 2021 and 2022")
- List the top 10 local authority’s with the greatest number of accidents. find the percentage from the total accidents us total_acidents df we created earlier
accidents_by_local_authority <- clean_rtdata %>%
group_by(`local_authority_(district)`) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
#calculate the percentage to 1 decimal point
mutate(percentage = round((count / total_accidents$total) *100, 1)) %>%
#limit the results to the top 10
top_n(10, wt = count)
kable(accidents_by_local_authority) %>%
kable_styling()
| local_authority_(district) | count | percentage |
|---|---|---|
| Birmingham | 6165 | 2.0 |
| Leeds | 4140 | 1.3 |
| Manchester | 3132 | 1.0 |
| Bradford | 3006 | 1.0 |
| Westminster | 2811 | 0.9 |
| Sheffield | 2750 | 0.9 |
| Liverpool | 2611 | 0.8 |
| Cornwall | 2606 | 0.8 |
| Barnet | 2302 | 0.7 |
| Bristol, City of | 2270 | 0.7 |
- Find the police force with the greatest number of accidents. Find the percentage from total accidents using the total_accidents df we created earlier.
accidents_by_police_force_total <- clean_rtdata %>%
group_by(police_force) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
# Calculate the percentage using total_accidents$total
mutate(percentage_of_total = round((count / total_accidents$total) * 100, 1)) %>%
top_n(10, wt = count)
#limit the results to the top 10
# Print the resulting data frame
kable(accidents_by_police_force_total) %>%
kable_styling()
| police_force | count | percentage_of_total |
|---|---|---|
| Metropolitan Police | 46789 | 15.2 |
| West Midlands | 13509 | 4.4 |
| West Yorkshire | 12015 | 3.9 |
| Greater Manchester | 11954 | 3.9 |
| Thames Valley | 11483 | 3.7 |
| Kent | 9995 | 3.2 |
| Hampshire | 9492 | 3.1 |
| Lancashire | 9143 | 3.0 |
| Devon and Cornwall | 8804 | 2.9 |
| Sussex | 8548 | 2.8 |
- Find the most common weather conditions when accidents occur.
accident_by_weather <- clean_rtdata %>%
dplyr::filter(!is.na(weather_conditions)) %>%
group_by(weather_conditions) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
mutate(percentage_of_total = round((count / total_accidents$total) *100, 1))
# Print the result
kable(accident_by_weather) %>%
kable_styling()
| weather_conditions | count | percentage_of_total |
|---|---|---|
| Fine no high winds | 244495 | 79.4 |
| Raining no high winds | 34877 | 11.3 |
| Other | 8802 | 2.9 |
| Snowing no high winds | 4839 | 1.6 |
| Raining + high winds | 3526 | 1.1 |
| Fine + high winds | 3148 | 1.0 |
| Fog or mist | 1690 | 0.5 |
| Snowing + high winds | 538 | 0.2 |
- Find the most common day of week and time for accidents by police force and sort by count.
accidents_by_police_force <- clean_rtdata %>%
group_by(police_force, day_of_week, time) %>%
summarise(count = n(), .groups = "drop" ) %>%
arrange(desc(count)) %>%
group_by(police_force) %>%
slice_max(order_by = count, n=1) %>%
ungroup() %>%
arrange(desc(count)) %>%
top_n(10, wt = count)
kable(accidents_by_police_force) %>%
kable_styling()
| police_force | day_of_week | time | count |
|---|---|---|---|
| Metropolitan Police | Friday | 18:00:00 | 85 |
| Greater Manchester | Wednesday | 17:30:00 | 31 |
| West Midlands | Thursday | 17:00:00 | 31 |
| West Yorkshire | Friday | 16:00:00 | 29 |
| Devon and Cornwall | Wednesday | 17:00:00 | 25 |
| Hampshire | Thursday | 17:00:00 | 25 |
| Lancashire | Tuesday | 15:30:00 | 24 |
| Kent | Thursday | 08:30:00 | 23 |
| Surrey | Friday | 17:00:00 | 22 |
| Thames Valley | Tuesday | 17:30:00 | 22 |
- Find the most common road type and light conditions for fatal accidents to occur. Find the percentage using the total_fatal_accidents df we created earlier
fatal_accidents_by_road_type <- clean_rtdata %>%
dplyr::filter(accident_severity == "Fatal") %>%
group_by(road_type, accident_severity, light_conditions) %>%
summarise(count =n(), .groups = "drop") %>%
arrange(desc(count)) %>%
mutate(percentage = round((count / total_fatal_accidents$Fatal) * 100, 1)) %>%
top_n(10, wt = count)
kable(fatal_accidents_by_road_type) %>%
kable_styling()
| road_type | accident_severity | light_conditions | count | percentage |
|---|---|---|---|---|
| Single carriageway | Fatal | Daylight | 1837 | 46.5 |
| Single carriageway | Fatal | Darkness - lights lit | 621 | 15.7 |
| Single carriageway | Fatal | Darkness - no lighting | 489 | 12.4 |
| Dual carriageway | Fatal | Daylight | 463 | 11.7 |
| Dual carriageway | Fatal | Darkness - lights lit | 195 | 4.9 |
| Dual carriageway | Fatal | Darkness - no lighting | 156 | 3.9 |
| Roundabout | Fatal | Daylight | 54 | 1.4 |
| One way street | Fatal | Daylight | 33 | 0.8 |
| Single carriageway | Fatal | Darkness - lighting unknown | 26 | 0.7 |
| Roundabout | Fatal | Darkness - lights lit | 23 | 0.6 |
- Find the most common road speed limit for accidents.Sort by count to view highest accident rate.
all_accidents_by_speedlimit <- clean_rtdata %>%
group_by(accident_severity, speed_limit) %>%
summarise(count =n(), .groups = "drop") %>%
arrange(desc(count))
#pivot the data to wide format to allow for line plot
all_accidents_by_speedlimit_wide <- all_accidents_by_speedlimit %>%
pivot_wider(names_from = accident_severity, values_from = count, values_fill = list(count = 0))
kable(all_accidents_by_speedlimit_wide) %>%
kable_styling()
| speed_limit | Slight | Serious | Fatal |
|---|---|---|---|
| 30 | 174427 | 24076 | 1537 |
| 60 | 36696 | 8817 | 1313 |
| 40 | 21987 | 3295 | 368 |
| 70 | 19150 | 2711 | 501 |
| 50 | 8506 | 1464 | 220 |
| 20 | 2508 | 377 | 14 |
| 10 | 3 | 0 | 0 |
| 15 | 2 | 0 | 0 |
- Create a line graph to show relationship between speed and accident severity.
ggplot(all_accidents_by_speedlimit_wide, aes(x = speed_limit)) +
geom_line(aes(y = Fatal, colour = "Fatal", group = 1), linewidth = 1) +
geom_line(aes(y = Serious, colour = "Serious", group = 1), linewidth = 1) +
geom_line(aes(y = Slight, colour = "Slight", group = 1), size = 1) +
labs(x = "Speed Limit", y = "Number of Accidents", title = "Number of accidents by speed limit") +
scale_colour_manual(values = c("Fatal" = "red", "Serious" = "blue", "Slight" = "green"))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
- Create a summary of vehicle type and severity. Sorted by the highest number of accidents.
accidents_summary <- clean_rtdata %>%
# dplyr::filter(accident_severity == "Fatal") %>% #filter by Fatal if required
group_by(vehicle_type, accident_severity) %>%
summarise(count = n(), .groups = "drop") %>%
# calculate the percentage using the total_accidents$total df we created earlier.
mutate (percentage = round((count / total_accidents$total) * 100, 1)) %>%
arrange(desc(count)) %>%
top_n(20, wt=count)
#use kable to give a richer format output
kable(accidents_summary, col.names = c("Vehicle Type", "Accident Severity", "Count", "Percentage")) %>%
kable_styling()
| Vehicle Type | Accident Severity | Count | Percentage |
|---|---|---|---|
| Car | Slight | 205078 | 66.6 |
| Car | Serious | 31583 | 10.3 |
| Van / Goods 3.5 tonnes mgw or under | Slight | 13364 | 4.3 |
| Motorcycle over 500cc | Slight | 9587 | 3.1 |
| Bus or coach (17 or more pass seats) | Slight | 7465 | 2.4 |
| Motorcycle 125cc and under | Slight | 5845 | 1.9 |
| Goods 7.5 tonnes mgw and over | Slight | 5587 | 1.8 |
| Taxi/Private hire car | Slight | 4742 | 1.5 |
| Motorcycle 50cc and under | Slight | 3159 | 1.0 |
| Car | Fatal | 3132 | 1.0 |
| Motorcycle over 125cc and up to 500cc | Slight | 2822 | 0.9 |
| Other vehicle | Slight | 2127 | 0.7 |
| Van / Goods 3.5 tonnes mgw or under | Serious | 2121 | 0.7 |
| Goods over 3.5t. and under 7.5t | Slight | 2119 | 0.7 |
| Motorcycle over 500cc | Serious | 1496 | 0.5 |
| Bus or coach (17 or more pass seats) | Serious | 1135 | 0.4 |
| Motorcycle 125cc and under | Serious | 926 | 0.3 |
| Goods 7.5 tonnes mgw and over | Serious | 878 | 0.3 |
| Taxi/Private hire car | Serious | 742 | 0.2 |
| Minibus (8 - 16 passenger seats) | Slight | 693 | 0.2 |
- create a map showing all the fatal accidents in 2021 where the speed limit is greater or equal to 70Mph
# The following librarys and API were loaded earlier.
# library(leafet)
# library(ggmap)
# register_google(key = Sys.getenv("GOOGLE_API_KEY")
# Get a map centered around the lat/long data in clean_rtdata df
map <- get_map(location = c(lon = mean(clean_rtdata$longitude), lat = mean(clean_rtdata$latitude)), zoom = 10, maptype = "terrain")
## ℹ <https://maps.googleapis.com/maps/api/staticmap?center=52.487001,-1.368882&zoom=10&size=640x640&scale=2&maptype=terrain&language=en-EN&key=xxx-9c2pAWnjNTPr1LgY>
# Filter the data for fatal accidents and speed limit of 70 mph
fatal_accidents <- clean_rtdata %>%
dplyr::filter(accident_severity == "Fatal", speed_limit >= 70, year == 2021)
# Create a leaflet map with marker clustering
leaflet(fatal_accidents) %>%
addTiles() %>%
addCircleMarkers(~longitude, ~latitude, color = "red", radius = 1, popup = ~paste("Severity:", accident_severity))
- create a map showing all the fatal accidents in 2022 where the speed limit is greater or equal to 70Mph
# Get a map centered around the lat/long data in clean_rtdata df
map <- get_map(location = c(lon = mean(clean_rtdata$longitude), lat = mean(clean_rtdata$latitude)), zoom = 10, maptype = "terrain")
## ℹ <https://maps.googleapis.com/maps/api/staticmap?center=52.487001,-1.368882&zoom=10&size=640x640&scale=2&maptype=terrain&language=en-EN&key=xxx-9c2pAWnjNTPr1LgY>
# Filter the data for fatal accidents and speed limit of 70 mph
fatal_accidents <- clean_rtdata %>%
dplyr::filter(accident_severity == "Fatal", speed_limit >= 70, year == 2022)
# Create a leaflet map with marker clustering
leaflet(fatal_accidents) %>%
addTiles() %>%
addCircleMarkers(~longitude, ~latitude, color = "red", radius = 1, popup = ~paste("Severity:", accident_severity))
References
Dataset Citation
- Source: Kaggle
- Dataset: UK Road Accidents
License The dataset is released under the CC0 1.0 Universal (CC0 1.0) Public Domain Dedication.
To cite package ‘ggmap’ in publications use:
D. Kahle and H. Wickham. ggmap: Spatial Visualization with ggplot2. The R Journal, 5(1), 144-161. URL http://journal.r-project.org/archive/2013-1/kahle-wickham.pdf
A BibTeX entry for LaTeX users is
@Article{, author = {David Kahle and Hadley Wickham}, title = {ggmap: Spatial Visualization with ggplot2}, journal = {The R Journal}, year = {2013}, volume = {5}, number = {1}, pages = {144–161}, url = {https://journal.r-project.org/archive/2013-1/kahle-wickham.pdf}, } To cite package ‘leaflet’ in publications use:
Cheng J, Schloerke B, Karambelkar B, Xie Y (2024). leaflet: Create Interactive Web Maps with the JavaScript ‘Leaflet’ Library. R package version 2.2.2, https://CRAN.R-project.org/package=leaflet.
A BibTeX entry for LaTeX users is
@Manual{, title = {leaflet: Create Interactive Web Maps with the JavaScript ‘Leaflet’ Library}, author = {Joe Cheng and Barret Schloerke and Bhaskar Karambelkar and Yihui Xie}, year = {2024}, note = {R package version 2.2.2}, url = {https://CRAN.R-project.org/package=leaflet}, } To cite package ‘tidyverse’ in publications use:
Wickham H, Averick M, Bryan J, Chang W, McGowan LD, François R, Grolemund G, Hayes A, Henry L, Hester J, Kuhn M, Pedersen TL, Miller E, Bache SM, Müller K, Ooms J, Robinson D, Seidel DP, Spinu V, Takahashi K, Vaughan D, Wilke C, Woo K, Yutani H (2019). “Welcome to the tidyverse.” Journal of Open Source Software, 4(43), 1686. doi:10.21105/joss.01686 https://doi.org/10.21105/joss.01686.
A BibTeX entry for LaTeX users is
@Article{, title = {Welcome to the {tidyverse}}, author = {Hadley Wickham and Mara Averick and Jennifer Bryan and Winston Chang and Lucy D’Agostino McGowan and Romain François and Garrett Grolemund and Alex Hayes and Lionel Henry and Jim Hester and Max Kuhn and Thomas Lin Pedersen and Evan Miller and Stephan Milton Bache and Kirill Müller and Jeroen Ooms and David Robinson and Dana Paige Seidel and Vitalie Spinu and Kohske Takahashi and Davis Vaughan and Claus Wilke and Kara Woo and Hiroaki Yutani}, year = {2019}, journal = {Journal of Open Source Software}, volume = {4}, number = {43}, pages = {1686}, doi = {10.21105/joss.01686}, } To cite lubridate in publications use:
Garrett Grolemund, Hadley Wickham (2011). Dates and Times Made Easy with lubridate. Journal of Statistical Software, 40(3), 1-25. URL https://www.jstatsoft.org/v40/i03/.
A BibTeX entry for LaTeX users is
@Article{, title = {Dates and Times Made Easy with {lubridate}}, author = {Garrett Grolemund and Hadley Wickham}, journal = {Journal of Statistical Software}, year = {2011}, volume = {40}, number = {3}, pages = {1–25}, url = {https://www.jstatsoft.org/v40/i03/}, } To cite package ‘kableExtra’ in publications use:
Zhu H (2024). kableExtra: Construct Complex Table with ‘kable’ and Pipe Syntax. R package version 1.4.0, https://CRAN.R-project.org/package=kableExtra.
A BibTeX entry for LaTeX users is
@Manual{, title = {kableExtra: Construct Complex Table with ‘kable’ and Pipe Syntax}, author = {Hao Zhu}, year = {2024}, note = {R package version 1.4.0}, url = {https://CRAN.R-project.org/package=kableExtra}, }