Cleaning

Tasks

  1. Start with your original dataset (e.g., .csv)
  2. Clearly identify and explain issues in the raw data (e.g., missing values, duplicates, inconsistent formats)
  3. Show how you cleaned the data step by step
  4. Use comments in your R code to explain what each step does
  5. Save your cleaned dataset as .RData and mention this in your write-up

Preliminary Set-Up

Load the Package

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Import Original Dataset

drugwar <- read_csv("data/drugwarkillings.csv")
Rows: 6531 Columns: 31
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (20): event_id_cnty, event_date, disorder_type, event_type, sub_event_ty...
dbl (11): year, time_precision, inter1, inter2, interaction, iso, latitude, ...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Check the Dataset

Take a glimpse at the dataset

# Check the format, rows and columns, and column names

glimpse(drugwar) # this is used to check the format of the columns and see what is needed to be cleaned
Rows: 6,531
Columns: 31
$ event_id_cnty      <chr> "PHL14523", "PHL14680", "PHL14507", "PHL14502", "PH…
$ event_date         <chr> "30 June 2022", "29 June 2022", "28 June 2022", "27…
$ year               <dbl> 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 202…
$ time_precision     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ disorder_type      <chr> "Political violence", "Political violence", "Politi…
$ event_type         <chr> "Battles", "Violence against civilians", "Violence …
$ sub_event_type     <chr> "Armed clash", "Attack", "Attack", "Attack", "Attac…
$ actor1             <chr> "ASG: Abu Sayyaf", "Armed Drug Suspects (Philippine…
$ assoc_actor_1      <chr> "Armed Drug Suspects (Philippines)", NA, NA, NA, NA…
$ inter1             <dbl> 2, 3, 3, 3, 3, 1, 3, 3, 1, 1, 3, 3, 3, 3, 2, 1, 3, …
$ actor2             <chr> "Military Forces of the Philippines (2016-2022)", "…
$ assoc_actor_2      <chr> NA, "Government of the Philippines (2016-2022)", "D…
$ inter2             <dbl> 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 7, 7, 1, 7, 7, …
$ interaction        <dbl> 12, 37, 37, 37, 37, 17, 37, 37, 17, 17, 37, 13, 37,…
$ civilian_targeting <chr> NA, "Civilian targeting", "Civilian targeting", "Ci…
$ iso                <dbl> 608, 608, 608, 608, 608, 608, 608, 608, 608, 608, 6…
$ region             <chr> "Southeast Asia", "Southeast Asia", "Southeast Asia…
$ country            <chr> "Philippines", "Philippines", "Philippines", "Phili…
$ admin1             <chr> "Bangsamoro Autonomous Region in Muslim Mindanao", …
$ admin2             <chr> "Sulu", "Metropolitan Manila", "Negros Occidental",…
$ admin3             <chr> "Parang", "Valenzuela", "Bacolod", "Caloocan City",…
$ location           <chr> "Lumbaan Mahaba", "Valenzuela", "Bacolod", "Calooca…
$ latitude           <dbl> 5.9092, 14.6770, 10.6666, 14.6495, 14.6257, 10.1601…
$ longitude          <dbl> 120.9445, 120.9877, 122.9500, 120.9678, 121.1225, 1…
$ geo_precision      <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, …
$ source             <chr> "Philippine Daily Inquirer; ABS-CBN; Manila Bulleti…
$ source_scale       <chr> "National", "National", "National", "National", "Na…
$ notes              <chr> "On 30 June 2022, a suspected Abu Sayyaf militant w…
$ fatalities         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 2, 1, 1, …
$ tags               <chr> NA, "local administrators", NA, NA, NA, NA, NA, NA,…
$ timestamp          <dbl> 1657027838, 1687837303, 1657027838, 1657027838, 165…

Check the head of the dataset

# Check the first ten (10) rows

head(drugwar, 10)
# A tibble: 10 × 31
   event_id_cnty event_date    year time_precision disorder_type      event_type
   <chr>         <chr>        <dbl>          <dbl> <chr>              <chr>     
 1 PHL14523      30 June 2022  2022              1 Political violence Battles   
 2 PHL14680      29 June 2022  2022              1 Political violence Violence …
 3 PHL14507      28 June 2022  2022              1 Political violence Violence …
 4 PHL14502      27 June 2022  2022              1 Political violence Violence …
 5 PHL14503      27 June 2022  2022              1 Political violence Violence …
 6 PHL14497      25 June 2022  2022              1 Political violence Violence …
 7 PHL14471      23 June 2022  2022              1 Political violence Violence …
 8 PHL14470      22 June 2022  2022              1 Political violence Violence …
 9 PHL14473      21 June 2022  2022              1 Strategic develop… Strategic…
10 PHL14477      21 June 2022  2022              1 Political violence Violence …
# ℹ 25 more variables: sub_event_type <chr>, actor1 <chr>, assoc_actor_1 <chr>,
#   inter1 <dbl>, actor2 <chr>, assoc_actor_2 <chr>, inter2 <dbl>,
#   interaction <dbl>, civilian_targeting <chr>, iso <dbl>, region <chr>,
#   country <chr>, admin1 <chr>, admin2 <chr>, admin3 <chr>, location <chr>,
#   latitude <dbl>, longitude <dbl>, geo_precision <dbl>, source <chr>,
#   source_scale <chr>, notes <chr>, fatalities <dbl>, tags <chr>,
#   timestamp <dbl>

Check the Data Summary through SkimR

Load the Package

library(skimr)

Check the Summary through skim

skim(drugwar)
Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
Use 'xfun::attr2()' instead.
See help("Deprecated")
Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
Use 'xfun::attr2()' instead.
See help("Deprecated")
Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
Use 'xfun::attr2()' instead.
See help("Deprecated")
Data summary
Name drugwar
Number of rows 6531
Number of columns 31
_______________________
Column type frequency:
character 20
numeric 11
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
event_id_cnty 0 1.00 6 8 0 6531 0
event_date 0 1.00 11 17 0 1631 0
disorder_type 0 1.00 14 22 0 3 0
event_type 0 1.00 5 26 0 6 0
sub_event_type 0 1.00 5 30 0 15 0
actor1 0 1.00 11 93 0 31 0
assoc_actor_1 6287 0.04 15 267 0 80 0
actor2 94 0.99 11 93 0 30 0
assoc_actor_2 403 0.94 15 159 0 118 0
civilian_targeting 517 0.92 18 18 0 1 0
region 0 1.00 14 14 0 1 0
country 0 1.00 11 11 0 1 0
admin1 0 1.00 6 47 0 17 0
admin2 0 1.00 4 21 0 74 0
admin3 0 1.00 3 25 0 713 0
location 0 1.00 3 28 0 1425 0
source 0 1.00 2 145 0 748 0
source_scale 0 1.00 5 22 0 12 0
notes 0 1.00 64 1196 0 6519 0
tags 6328 0.03 16 78 0 24 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1 2.017610e+03 1.66 2.016000e+03 2.016000e+03 2.017000e+03 2.019000e+03 2.022000e+03 ▇▂▂▁▁
time_precision 0 1 1.110000e+00 0.33 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 3.000000e+00 ▇▁▁▁▁
inter1 0 1 1.890000e+00 1.09 1.000000e+00 1.000000e+00 1.000000e+00 3.000000e+00 8.000000e+00 ▇▆▁▁▁
inter2 0 1 6.640000e+00 1.42 0.000000e+00 7.000000e+00 7.000000e+00 7.000000e+00 8.000000e+00 ▁▁▁▁▇
interaction 0 1 2.495000e+01 10.66 1.000000e+01 1.700000e+01 1.700000e+01 3.700000e+01 6.600000e+01 ▇▁▅▁▁
iso 0 1 6.080000e+02 0.00 6.080000e+02 6.080000e+02 6.080000e+02 6.080000e+02 6.080000e+02 ▁▁▇▁▁
latitude 0 1 1.349000e+01 2.69 4.680000e+00 1.314000e+01 1.459000e+01 1.481000e+01 1.858000e+01 ▁▁▂▇▁
longitude 0 1 1.218000e+02 1.43 1.173100e+02 1.209700e+02 1.210500e+02 1.227600e+02 1.263100e+02 ▁▂▇▂▁
geo_precision 0 1 1.200000e+00 0.42 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 3.000000e+00 ▇▁▂▁▁
fatalities 0 1 1.300000e+00 0.95 0.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.600000e+01 ▇▁▁▁▁
timestamp 0 1 1.652328e+09 31369877.56 1.552576e+09 1.637244e+09 1.657064e+09 1.657064e+09 1.742328e+09 ▁▂▇▁▁

Issues to fix

  1. There are a lot of columns that need to be:

    • renamed to an easier column name

    • removed and selected to be simplified in another data frame

    • check the missing values

  2. The date should not be in a character format –> change into dbl or int

  3. Additional: fix the graph to be used in the Philippine Map

Cleaning Process

Mutate the columns and Change Format

dw <- drugwar |>
  mutate(num_of_death = fatalities,    # use mutate too copy then rename the column name
         province = toupper(admin2), # this is to make sure they are in upper case for easier joining
         perpetrator = actor1,
         victim = actor2,
         region = toupper(admin1), # this is to make sure they are in upper case
         conflict = event_type,
         date = as.Date(event_date, format = "%d %B %Y"))   # use B to change the date and not m
# Make sure that it detects the name and match it with the first dataset
dw <- dw |>
  mutate(province = case_when(str_detect(province, "NCR") ~ "METRO MANILA", province == "METROPOLITAN MANILA" ~ "METRO MANILA",
                              str_detect(province, "NCR") ~ "METRO MANILA",
                              str_detect(province, "ISABELA") & str_detect(province, "CITY") ~ "ISABELA",
                              str_detect(province, "MOUNTAIN") ~ "MOUNTAIN PROVINCE",
                              str_detect(province, "DINAGAT") ~ "DINAGAT ISLANDS",
                              str_detect(province, "NOT A PROVINCE") ~ NA_character_,
                              TRUE ~ province)) |>
  select(region, province, perpetrator, victim, num_of_death, date, year, conflict)   # use select to only use the columns needed

tail(dw)
# A tibble: 6 × 8
  region      province perpetrator victim num_of_death date        year conflict
  <chr>       <chr>    <chr>       <chr>         <dbl> <date>     <dbl> <chr>   
1 ILOCOS REG… ILOCOS … Anti-Drug … Civil…            1 2016-06-30  2016 Violenc…
2 NATIONAL C… METRO M… Armed Drug… Civil…            2 2016-06-30  2016 Violenc…
3 CALABARZON  CAVITE   Police For… Civil…            2 2016-06-30  2016 Violenc…
4 CALABARZON  CAVITE   Police For… Civil…            1 2016-06-30  2016 Violenc…
5 CALABARZON  LAGUNA   Police For… Civil…            2 2016-06-30  2016 Violenc…
6 CENTRAL VI… BOHOL    Police For… Civil…            2 2016-06-30  2016 Violenc…

Take a glimpse at the dataset

glimpse(dw)
Rows: 6,531
Columns: 8
$ region       <chr> "BANGSAMORO AUTONOMOUS REGION IN MUSLIM MINDANAO", "NATIO…
$ province     <chr> "SULU", "METRO MANILA", "NEGROS OCCIDENTAL", "METRO MANIL…
$ perpetrator  <chr> "ASG: Abu Sayyaf", "Armed Drug Suspects (Philippines)", "…
$ victim       <chr> "Military Forces of the Philippines (2016-2022)", "Civili…
$ num_of_death <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, …
$ date         <date> 2022-06-30, 2022-06-29, 2022-06-28, 2022-06-27, 2022-06-…
$ year         <dbl> 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 202…
$ conflict     <chr> "Battles", "Violence against civilians", "Violence agains…

Philippine Map

Load the package

library(sf) # According to my research, this is used for spatial data in which will read the file into the dataset
Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(viridis) # optional: this is for the color that will be used in the graph
Loading required package: viridisLite

Load the shapefile (.shp) into the dataset

provinces_from_shp <- st_read("data/PH_Adm2_ProvDists.shp")
Multiple layers are present in data source /Users/sarahjodycastaneto/Documents/djr/sarahjody-finalproject-website/data/PH_Adm2_ProvDists.shp, reading layer `PH_Adm2_ProvDists.shp'.
Use `st_layers' to list all layer names and their type in a data source.
Set the `layer' argument in `st_read' to read a particular layer.
Warning in CPL_read_ogr(dsn, layer, query, as.character(options), quiet, :
automatically selected the first layer in a data source containing more than
one.
Reading layer `PH_Adm2_ProvDists.shp' from data source 
  `/Users/sarahjodycastaneto/Documents/djr/sarahjody-finalproject-website/data/PH_Adm2_ProvDists.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 88 features and 8 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -456122.9 ymin: 508012.9 xmax: 898178.5 ymax: 2336000
Projected CRS: WGS 84 / UTM zone 51N

Inspect the data

head(provinces_from_shp)
Simple feature collection with 6 features and 8 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 152028.4 ymin: 1728558 xmax: 430000.8 ymax: 2336000
Projected CRS: WGS 84 / UTM zone 51N
  adm1_psgc adm2_psgc      adm2_en geo_level len_crs   area_crs len_km area_km2
1     1e+08 102800000 Ilocos Norte      Prov  309785 3276945154    309     3276
2     1e+08 102900000   Ilocos Sur      Prov  452374 2467458323    452     2467
3     1e+08 103300000     La Union      Prov  262415 1414080983    262     1414
4     1e+08 105500000   Pangasinan      Prov  789136 5161200257    789     5161
5     2e+08 200900000      Batanes      Prov  230060  201280837    230      201
6     2e+08 201500000      Cagayan      Prov 1135169 8794377504   1135     8794
                        geometry
1 MULTIPOLYGON (((285928.4 20...
2 MULTIPOLYGON (((242575.4 19...
3 MULTIPOLYGON (((240302.2 18...
4 MULTIPOLYGON (((171359.2 18...
5 MULTIPOLYGON (((390824.6 23...
6 MULTIPOLYGON (((390811.2 21...
colnames(provinces_from_shp)
[1] "adm1_psgc" "adm2_psgc" "adm2_en"   "geo_level" "len_crs"   "area_crs" 
[7] "len_km"    "area_km2"  "geometry" 

Mutate the names and selecting the needed columns

cleanersf_province <- provinces_from_shp |>
  mutate(region_numcode = adm1_psgc,
         province_numcode = adm1_psgc,
         province = toupper(adm2_en), # this is to make sure that they are all in upper case
         category = geo_level,
         perimeter = len_crs,
         area = area_crs,
         perimeter_km = len_km,
         area_km = area_km2,
         spatialdata_multipolygon = geometry) |>
  select(region_numcode, province_numcode, province, category, perimeter, area, perimeter_km, area_km, spatialdata_multipolygon)

head(cleanersf_province, 10)
Simple feature collection with 10 features and 8 fields
Active geometry column: geometry
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 152028.4 ymin: 1594264 xmax: 449840.4 ymax: 2336000
Projected CRS: WGS 84 / UTM zone 51N
   region_numcode province_numcode      province category perimeter        area
1           1e+08            1e+08  ILOCOS NORTE     Prov    309785  3276945154
2           1e+08            1e+08    ILOCOS SUR     Prov    452374  2467458323
3           1e+08            1e+08      LA UNION     Prov    262415  1414080983
4           1e+08            1e+08    PANGASINAN     Prov    789136  5161200257
5           2e+08            2e+08       BATANES     Prov    230060   201280837
6           2e+08            2e+08       CAGAYAN     Prov   1135169  8794377504
7           2e+08            2e+08       ISABELA     Prov    606202 10489909469
8           2e+08            2e+08 NUEVA VIZCAYA     Prov    403864  4126548282
9           2e+08            2e+08       QUIRINO     Prov    323147  2767354533
10          3e+08            3e+08        BATAAN     Prov    290679  1246275464
   perimeter_km area_km       spatialdata_multipolygon
1           309    3276 MULTIPOLYGON (((285928.4 20...
2           452    2467 MULTIPOLYGON (((242575.4 19...
3           262    1414 MULTIPOLYGON (((240302.2 18...
4           789    5161 MULTIPOLYGON (((171359.2 18...
5           230     201 MULTIPOLYGON (((390824.6 23...
6          1135    8794 MULTIPOLYGON (((390811.2 21...
7           606   10489 MULTIPOLYGON (((412243.1 19...
8           403    4126 MULTIPOLYGON (((334226.3 18...
9           323    2767 MULTIPOLYGON (((399555.6 18...
10          290    1246 MULTIPOLYGON (((236777.5 16...
                         geometry
1  MULTIPOLYGON (((285928.4 20...
2  MULTIPOLYGON (((242575.4 19...
3  MULTIPOLYGON (((240302.2 18...
4  MULTIPOLYGON (((171359.2 18...
5  MULTIPOLYGON (((390824.6 23...
6  MULTIPOLYGON (((390811.2 21...
7  MULTIPOLYGON (((412243.1 19...
8  MULTIPOLYGON (((334226.3 18...
9  MULTIPOLYGON (((399555.6 18...
10 MULTIPOLYGON (((236777.5 16...
# Making sure that Metro Manila is inside the dataset

cleanersf_province <- cleanersf_province |>
  mutate(province = case_when(str_detect(province, "NCR") ~ "METRO MANILA",  
                              province == "NATIONAL CAPITAL REGION" ~ "METRO MANILA", TRUE ~ province))

Join both columns together

province <- left_join(cleanersf_province, dw, by = c("province" = "province"))
Warning in sf_column %in% names(g): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1 of `x` matches multiple rows in `y`.
ℹ Row 2 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.

Check if they have the same names

unique(dw$province)
 [1] "SULU"                  "METRO MANILA"          "NEGROS OCCIDENTAL"    
 [4] "RIZAL"                 "CEBU"                  "BULACAN"              
 [7] "LEYTE"                 "AGUSAN DEL NORTE"      "MAGUINDANAO DEL NORTE"
[10] "PAMPANGA"              "COTABATO"              "ILOILO"               
[13] "LAGUNA"                "CAVITE"                "BOHOL"                
[16] "TAWI-TAWI"             "KALINGA"               "CAMARINES NORTE"      
[19] "BENGUET"               "QUEZON"                "MISAMIS ORIENTAL"     
[22] "ZAMBOANGA DEL SUR"     "NEGROS ORIENTAL"       "LANAO DEL SUR"        
[25] "NUEVA ECIJA"           "BATANGAS"              "CAMARINES SUR"        
[28] "ALBAY"                 "MASBATE"               "SURIGAO DEL SUR"      
[31] "ZAMBALES"              "DAVAO DE ORO"          "SOUTH COTABATO"       
[34] "ZAMBOANGA SIBUGAY"     "BATAAN"                "TARLAC"               
[37] "DAVAO DEL SUR"         "MAGUINDANAO DEL SUR"   "SOUTHERN LEYTE"       
[40] "SORSOGON"              "DAVAO DEL NORTE"       "PANGASINAN"           
[43] "ILOCOS SUR"            "SAMAR"                 "ILOCOS NORTE"         
[46] "BUKIDNON"              "CAGAYAN"               "BASILAN"              
[49] "CAPIZ"                 "OCCIDENTAL MINDORO"    "DAVAO OCCIDENTAL"     
[52] "AGUSAN DEL SUR"        "ORIENTAL MINDORO"      "ZAMBOANGA DEL NORTE"  
[55] "ISABELA"               "LANAO DEL NORTE"       "SARANGANI"            
[58] "SULTAN KUDARAT"        "LA UNION"              "CATANDUANES"          
[61] "ABRA"                  "SURIGAO DEL NORTE"     "MISAMIS OCCIDENTAL"   
[64] "QUIRINO"               "PALAWAN"               "SIQUIJOR"             
[67] "ANTIQUE"               "GUIMARAS"              "NUEVA VIZCAYA"        
[70] "DAVAO ORIENTAL"        "AKLAN"                 "NORTHERN SAMAR"       
[73] "APAYAO"                "IFUGAO"               
unique(cleanersf_province$province)
 [1] "ILOCOS NORTE"                     "ILOCOS SUR"                      
 [3] "LA UNION"                         "PANGASINAN"                      
 [5] "BATANES"                          "CAGAYAN"                         
 [7] "ISABELA"                          "NUEVA VIZCAYA"                   
 [9] "QUIRINO"                          "BATAAN"                          
[11] "BULACAN"                          "NUEVA ECIJA"                     
[13] "PAMPANGA"                         "TARLAC"                          
[15] "ZAMBALES"                         "AURORA"                          
[17] "BATANGAS"                         "CAVITE"                          
[19] "LAGUNA"                           "QUEZON"                          
[21] "RIZAL"                            "ALBAY"                           
[23] "CAMARINES NORTE"                  "CAMARINES SUR"                   
[25] "CATANDUANES"                      "MASBATE"                         
[27] "SORSOGON"                         "AKLAN"                           
[29] "ANTIQUE"                          "CAPIZ"                           
[31] "ILOILO"                           "NEGROS OCCIDENTAL"               
[33] "GUIMARAS"                         "BOHOL"                           
[35] "CEBU"                             "NEGROS ORIENTAL"                 
[37] "SIQUIJOR"                         "EASTERN SAMAR"                   
[39] "LEYTE"                            "NORTHERN SAMAR"                  
[41] "SAMAR"                            "SOUTHERN LEYTE"                  
[43] "BILIRAN"                          "ZAMBOANGA DEL NORTE"             
[45] "ZAMBOANGA DEL SUR"                "ZAMBOANGA SIBUGAY"               
[47] "CITY OF ISABELA (NOT A PROVINCE)" "BUKIDNON"                        
[49] "CAMIGUIN"                         "LANAO DEL NORTE"                 
[51] "MISAMIS OCCIDENTAL"               "MISAMIS ORIENTAL"                
[53] "DAVAO DEL NORTE"                  "DAVAO DEL SUR"                   
[55] "DAVAO ORIENTAL"                   "DAVAO DE ORO"                    
[57] "DAVAO OCCIDENTAL"                 "COTABATO"                        
[59] "SOUTH COTABATO"                   "SULTAN KUDARAT"                  
[61] "SARANGANI"                        "METRO MANILA"                    
[63] "ABRA"                             "BENGUET"                         
[65] "IFUGAO"                           "KALINGA"                         
[67] "MOUNTAIN PROVINCE"                "APAYAO"                          
[69] "AGUSAN DEL NORTE"                 "AGUSAN DEL SUR"                  
[71] "SURIGAO DEL NORTE"                "SURIGAO DEL SUR"                 
[73] "DINAGAT ISLANDS"                  "MARINDUQUE"                      
[75] "OCCIDENTAL MINDORO"               "ORIENTAL MINDORO"                
[77] "PALAWAN"                          "ROMBLON"                         
[79] "BASILAN"                          "LANAO DEL SUR"                   
[81] "SULU"                             "TAWI-TAWI"                       
[83] "MAGUINDANAO DEL NORTE"            "MAGUINDANAO DEL SUR"             
[85] NA                                

Save the Data

save(dw, file = "data/drugwar.RData") # we need to save the data in RData since it preserves the format and is smaller in size
save(cleanersf_province, file = "data/shapefile.RData") # this is to preserve the .shp format