Code
library(readr)
library(dplyr)
library(stringr)
library(DT)
library(gt)
library(ggplot2)
library(scales)
payroll_data <- read_csv("data/mp01/nyc_payroll_export.csv")
colnames(payroll_data) [1] "fiscal_year" "payroll_number"
[3] "agency_name" "last_name"
[5] "first_name" "mid_init"
[7] "agency_start_date" "work_location_borough"
[9] "title_description" "leave_status_as_of_june_30"
[11] "base_salary" "pay_basis"
[13] "regular_hours" "regular_gross_paid"
[15] "ot_hours" "total_ot_paid"
[17] "total_other_pay"
Code
payroll_data <- payroll_data %>%
mutate(
agency_name = str_to_title(agency_name),
last_name = str_to_title(last_name),
first_name = str_to_title(first_name),
work_location_borough = str_to_title(work_location_borough),
title_description = str_to_title(title_description),
leave_status = str_to_title(leave_status_as_of_june_30)
)
colnames(payroll_data) [1] "fiscal_year" "payroll_number"
[3] "agency_name" "last_name"
[5] "first_name" "mid_init"
[7] "agency_start_date" "work_location_borough"
[9] "title_description" "leave_status_as_of_june_30"
[11] "base_salary" "pay_basis"
[13] "regular_hours" "regular_gross_paid"
[15] "ot_hours" "total_ot_paid"
[17] "total_other_pay" "leave_status"
Code
payroll_data <- payroll_data %>%
rename(
fiscal_year = `fiscal_year`,
agency_name = `agency_name`,
last_name = `last_name`,
first_name = `first_name`,
title = `title_description`,
salary = `base_salary`,
pay_basis = `pay_basis`,
reg_hours = `regular_hours`,
borough = `work_location_borough`
)
payroll_data <- payroll_data %>%
mutate(salary = as.numeric(salary),
reg_hours = as.numeric(reg_hours),
ot_hours = as.numeric(ot_hours))
glimpse(payroll_data)Rows: 6,225,611
Columns: 18
$ fiscal_year <dbl> 2024, 2024, 2024, 2024, 2024, 2024, 2024, 2β¦
$ payroll_number <dbl> 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67,β¦
$ agency_name <chr> "Admin For Children's Svcs", "Admin For Chiβ¦
$ last_name <chr> "Faye Fall", "Kilgore", "Wisdom", "Miller",β¦
$ first_name <chr> "Sokhna", "Orlantha", "Cherise", "Moya-Gayeβ¦
$ mid_init <chr> "M", "B", "M", "S", "M", "L", "O", NA, "N",β¦
$ agency_start_date <dttm> 2023-11-20, 2023-08-28, 2022-10-24, 2023-0β¦
$ borough <chr> "Bronx", "Brooklyn", "Manhattan", "Manhattaβ¦
$ title <chr> "Child Protective Specialist", "Child Proteβ¦
$ leave_status_as_of_june_30 <chr> "ACTIVE", "ACTIVE", "ON LEAVE", "ON LEAVE",β¦
$ salary <dbl> 62043, 62043, 43144, 62043, 60236, 62043, 6β¦
$ pay_basis <chr> "per Annum", "per Annum", "per Annum", "perβ¦
$ reg_hours <dbl> 1050.00, 1470.00, 1251.50, 1400.75, 700.00,β¦
$ regular_gross_paid <dbl> 31267.96, 44660.96, 28649.20, 44515.43, 221β¦
$ ot_hours <dbl> 12.00, 99.75, 30.00, 44.75, 53.00, 146.00, β¦
$ total_ot_paid <dbl> 425.00, 3859.84, 802.42, 1476.98, 1933.33, β¦
$ total_other_pay <dbl> 78.04, 78.14, 78.26, 78.37, 78.47, 78.86, 7β¦
$ leave_status <chr> "Active", "Active", "On Leave", "On Leave",β¦
