Chapter 4 Data cleaning

4.1 What did we do?

If you don’t want to look at all the lengthy work, Kavya and Zuofu are excited to talk through the process with you!

4.1.1 Load data, select and rename columns

If you want to reproduce our work, make sure to change your file path :)

4.1.2 Filter schools that have a rank

myData1415_sub_characterCollege <- myData1415_sub %>%
  filter(INSTNM %in% college) %>%
  mutate(INSTNM = as.character(INSTNM)) %>%
  mutate(UGDS_1415 = as.numeric(as.character(UGDS_1415))) %>%
  mutate(UGDS_WHITE_1415 = as.numeric(as.character(UGDS_WHITE_1415))) %>%
  mutate(SAT_AVG_1415 = as.numeric(as.character(SAT_AVG_1415))) %>%
  mutate(ACTCMMID_1415 = as.numeric(as.character(ACTCMMID_1415))) %>%
  mutate(ADM_RATE_1415 = as.numeric(as.character(ADM_RATE_1415))) %>%
  mutate(COSTT4_A_1415 = as.numeric(as.character(COSTT4_A_1415)))
myData1516_sub_characterCollege <- myData1516_sub %>%
  filter(INSTNM %in% college) %>%
  mutate(INSTNM = as.character(INSTNM)) %>%
  mutate(UGDS_1516 = as.numeric(as.character(UGDS_1516))) %>%
  mutate(UGDS_WHITE_1516 = as.numeric(as.character(UGDS_WHITE_1516))) %>%
  mutate(SAT_AVG_1516 = as.numeric(as.character(SAT_AVG_1516))) %>%
  mutate(ACTCMMID_1516 = as.numeric(as.character(ACTCMMID_1516))) %>%
  mutate(ADM_RATE_1516 = as.numeric(as.character(ADM_RATE_1516))) %>%
  mutate(COSTT4_A_1516 = as.numeric(as.character(COSTT4_A_1516)))
myData1617_sub_characterCollege <- myData1617_sub %>%
  filter(INSTNM %in% college) %>%
  mutate(INSTNM = as.character(INSTNM)) %>%
  mutate(UGDS_1617 = as.numeric(as.character(UGDS_1617))) %>%
  mutate(UGDS_WHITE_1617 = as.numeric(as.character(UGDS_WHITE_1617))) %>%
  mutate(SAT_AVG_1617 = as.numeric(as.character(SAT_AVG_1617))) %>%
  mutate(ACTCMMID_1617 = as.numeric(as.character(ACTCMMID_1617))) %>%
  mutate(ADM_RATE_1617 = as.numeric(as.character(ADM_RATE_1617))) %>%
  mutate(COSTT4_A_1617 = as.numeric(as.character(COSTT4_A_1617)))

4.1.3 Join datasets

## [1] 175  30
## [1] 149  30
myData1415_sub_characterUniversity <- myData1415_sub %>%
  filter(INSTNM %in% university) %>%
  mutate(INSTNM = as.character(INSTNM)) %>%
  mutate(UGDS_1415 = as.numeric(as.character(UGDS_1415))) %>%
  mutate(UGDS_WHITE_1415 = as.numeric(as.character(UGDS_WHITE_1415))) %>%
  mutate(SAT_AVG_1415 = as.numeric(as.character(SAT_AVG_1415))) %>%
  mutate(ACTCMMID_1415 = as.numeric(as.character(ACTCMMID_1415))) %>%
  mutate(ADM_RATE_1415 = as.numeric(as.character(ADM_RATE_1415))) %>%
  mutate(COSTT4_A_1415 = as.numeric(as.character(COSTT4_A_1415)))
myData1516_sub_characterUniversity <- myData1516_sub %>%
  filter(INSTNM %in% university) %>%
  mutate(INSTNM = as.character(INSTNM)) %>%
  mutate(UGDS_1516 = as.numeric(as.character(UGDS_1516))) %>%
  mutate(UGDS_WHITE_1516 = as.numeric(as.character(UGDS_WHITE_1516))) %>%
  mutate(SAT_AVG_1516 = as.numeric(as.character(SAT_AVG_1516))) %>%
  mutate(ACTCMMID_1516 = as.numeric(as.character(ACTCMMID_1516))) %>%
  mutate(ADM_RATE_1516 = as.numeric(as.character(ADM_RATE_1516))) %>%
  mutate(COSTT4_A_1516 = as.numeric(as.character(COSTT4_A_1516)))
myData1617_sub_characterUniversity <- myData1617_sub %>%
  filter(INSTNM %in% university) %>%
  mutate(INSTNM = as.character(INSTNM)) %>%
  mutate(UGDS_1617 = as.numeric(as.character(UGDS_1617))) %>%
  mutate(UGDS_WHITE_1617 = as.numeric(as.character(UGDS_WHITE_1617))) %>%
  mutate(SAT_AVG_1617 = as.numeric(as.character(SAT_AVG_1617))) %>%
  mutate(ACTCMMID_1617 = as.numeric(as.character(ACTCMMID_1617))) %>%
  mutate(ADM_RATE_1617 = as.numeric(as.character(ADM_RATE_1617))) %>%
  mutate(COSTT4_A_1617 = as.numeric(as.character(COSTT4_A_1617)))
## [1] 120  30