R: Code

vancrime2012_mana_hashimoto-html

 

######################################################
# Vancouver Crime Data 2012: Data Processing Script
# Date:October 20/2016
# By: Mana Hashimoto
# Desc: GEOB472
######################################################

# ------------------------------------------------------------------ #
# ---------------------- Install Libraries ------------------------- #
# ------------------------------------------------------------------ #
install.packages("GISTools")
install.packages("RJSONIO")
install.packages("rgdal")
install.packages("RCurl")
install.packages("curl")

# ------------------------------------------------------------------ #
# ----------------------- Load Libraries -------------------------- #
# ------------------------------------------------------------------ #
library(GISTools)
library(RJSONIO)
library(rgdal)
library(RCurl)
library(curl)
library(sp)

# ------------------------------------------------------------------ #
# ---------------------------- Acquire ----------------------------- #
# ------------------------------------------------------------------ #

# access downloaded data from the local drive
data<- read.csv("/Users/manahashimoto/Desktop/CRIME ASS/crime_csv_2012_nolatlong.csv")
# inspect data
print(head(data))

# ------------------------------------------------- #
# -------------- Parse: Geocoder ------------------- #
# ------------------------------------------------- #
# change intersection to 00's
data$h_block= gsub("X","0", data$HUNDRED_BLOCK)
print(head(data))

# Join the strings from each column together & add "Vancouver, BC":
data$full_address = paste(data$h_block,
"Vancouver, BC",
sep=", "),
sep=" ")

# removing "OFFSET TO PROTECT PRIVACY " from the full_address entries
data$full_address = gsub("OFFSET TO PROTECT PRIVACY ", "", data$full_address)
print(head(data$full_address))
# a function taking a full address string, formatting it, and making
# a call to the BC government's geocoding API
bc_geocode = function(search){
# return a warning message if input is not a character string
if(!is.character(search)){stop("'search' must be a character string")}

# formatting characters that need to be escaped in the URL, ie:
# substituting spaces ' ' for '%20'.
search = RCurl::curlEscape(search)

# first portion of the API call URL
base_url = "http://apps.gov.bc.ca/pub/geocoder/addresses.json?addressString="

# constant end of the API call URL
url_tail = "&locationDescriptor=any&maxResults=1&interpolation=adaptive&echo=true&setBack=0&outputSRS=4326&minScore=1&provinceCode=BC"

# combining the URL segments into one string
final_url = paste0(base_url, search, url_tail)

# making the call to the geocoding API by getting the response from the URL
response = RCurl::getURL(final_url)

# parsing the JSON response into an R list
response_parsed = RJSONIO::fromJSON(response)

# if there are coordinates in the response, assign them to `geocoords`
if(length(response_parsed$features[[1]]$geometry[[3]]) > 0){
geocoords = list(lon = response_parsed$features[[1]]$geometry[[3]][1],
lat = response_parsed$features[[1]]$geometry[[3]][2])
}else{
geocoords = NA
}

# returns the `geocoords` object
return(geocoords)
}

# Geocode the events - we use the BC Government's geocoding API
# Create an empty vector for lat and lon coordinates
lat = c()
lon = c()

# loop through the addresses
for(i in 1:length(data$full_address)){
# store the address at index "i" as a character
address = data$full_address[i]
# append the latitude of the geocoded address to the lat vector
lat = c(lat, bc_geocode(address)$lat)
# append the longitude of the geocoded address to the lon vector
lon = c(lon, bc_geocode(address)$lon)
# at each iteration through the loop, print the coordinates - takes about 20 min.
print(paste("#", i, ", ", lat[i], lon[i], sep = ","))
}

#add the lat lon coordinates to the dataframe
data$lat = lat
data$lon = lon

# after geocoding, it's a good idea to write your file out!
# you will need to modify the directory where you want R to write
# your data!
ofile = '/Users/manahashimoto/Desktop/lon-lat_vancrime2012_mana_hashimoto.csv'

write.csv(data, ofile)

data<-read.csv('/Users/manahashimoto/Desktop/CRIME ASS/lon-lat_vancrime2012_mana_hashimoto.csv')
# ------------------------------------------------- #
# --------------------- Mine ---------------------- #
# ------------------------------------------------- #
# --- Examine the unique cases --- #

# examine how the cases are grouped - are these intuitive?
unique(data$TYPE)

# examine the types of cases - can we make new groups that are more useful?
# Print each unique case on a new line for easier inspection
for (i in 1:length(unique(data$TYPE))){
print(unique(data$TYPE)[i], max.levels=0)
}

# Break-ins
break_ins = c('Break and Enter Residential/Other',
'Break and Enter Commercial')

#Theft
theft= c('Theft from Vehicle',
'Other Theft',
' Theft of Vehicle')
#Mischief
mischief= c('Mischief')

# Offences and Homocides
offences_homocides= c('Offence Against a Person',
' Homocide')

# give class id numbers:
data$cid = 9999
for(i in 1:length(data$TYPE)){
if(data$TYPE[i] %in% break_ins){
data$cid[i] = 1
}else if(data$TYPE[i] %in% mischief){
data$cid[i] = 2
}else if(data$TYPE[i] %in% offences_homocides){
data$cid[i] = 3
}else if(data$TYPE[i] %in% theft){
data$cid[i] = 4
}
}

# --- handle overlapping points --- #
# Set offset for points in same location:
data$lat_offset = data$lat
data$lon_offset = data$lon

# Run loop - if value overlaps, offset it by a random number
for(i in 1:length(data$lat)){
if ( (data$lat_offset[i] %in% data$lat_offset) && (data$lon_offset[i] %in% data$lon_offset)){
data$lat_offset[i] = data$lat_offset[i] + runif(1, 0.0001, 0.0005)
data$lon_offset[i] = data$lon_offset[i] + runif(1, 0.0001, 0.0005)
}
}

R also allows us to easily compute the frequencies of the crimes:


# --- what are the top calls? --- #
# get a frequency distribution of the calls:
top_crimes = data.frame(table(data$TYPE))
top_crimes = top_crimes[order(top_calls$Freq), ]
print(top_crimes)

                               Var1  Freq
1        Break and Enter Commercial  1687
2 Break and Enter Residential/Other  3311
3                          Homicide     8
4                          Mischief  4243
5          Offence Against a Person  3780
6                       Other Theft 11788
7                Theft from Vehicle  8097
8                  Theft of Vehicle  1151

# ------------------------------------------------------------------ #
# ---------------------------- Filter ------------------------------ #
# ------------------------------------------------------------------ #
# Subset only the data if the coordinates are within our bounds or if
# it is not a NA
data_filter = subset(data, (lat <= 49.313162) & (lat >= 49.199554) &
(lon <= -123.019028) & (lon >= -123.271371) & is.na(lon) == FALSE )
# plot the data
plot(data_filter$lon, data_filter$lat)

# If everthing looks good, we might also write out our file:
ofile_filtered = '/Users/manahashimoto/Desktop/CRIME ASS/vancrime2012_mana_hashimoto.csv'
write.csv(data_filter, ofile_filtered)

# --- Convert Data to Shapefile --- #
# store coordinates in dataframe
coords_vancrime2012 = data.frame(data_filter$lon_offset, data_filter$lat_offset)

# create spatialPointsDataFrame
data_shp = SpatialPointsDataFrame(coords = coords_vancrime2012, data = data_filter)

# set the projection to wgs84
projection_wgs84 = CRS("+proj=longlat +datum=WGS84")
proj4string(data_shp) = projection_wgs84

# set an output folder for our geojson files:
geofolder = "/Users/manahashimoto/Desktop/CRIME ASS/CRIME2012/2012files/"

# Join the folderpath to each file name:
opoints_shp = paste(geofolder, "vancrime_", "2012", ".shp", sep = "")
print(opoints_shp)

opoints_geojson = paste(geofolder, "calls_", "1401", ".geojson", sep = "")
print(opoints_geojson)

# write the file to a shp
writeOGR(data_shp, opoints_shp, layer = "data_shp", driver = "ESRI Shapefile",
check_exists = FALSE)

# write file to geojson
writeOGR(data_shp, opoints_geojson, layer = "data_shp", driver = "GeoJSON",
check_exists = FALSE)
# --- aggregate to a grid --- #
# ref: http://www.inside-r.org/packages/cran/GISTools/docs/poly.counts
# set the file name - combine the shpfolder with the name of the grid
grid_fn = 'https://raw.githubusercontent.com/joeyklee/aloha-r/master/data/calls_2014/geo/hgrid_250m.geojson'

# read in the hex grid setting the projection to utm10n
hexgrid = readOGR(grid_fn, 'OGRGeoJSON')

hexgrid_wgs84 = spTransform(hexgrid, projection_wgs84)

# Use the poly.counts() function to count the number of occurrences of crimes per grid cell
grid_cnt = poly.counts(data_shp, hexgrid_wgs84)

# define the output names:
ohex_shp = paste(geofolder, "hexgrid_250m_", "2012", "_cnts",
".shp", sep = "")
print(ohex_shp)

ohex_geojson = paste(geofolder, "hexgrid_250m_","2012","_cnts",".geojson")
print(ohex_geojson)

# write the file to a shp
writeOGR(check_exists = FALSE, hexgrid_wgs84, ohex_shp,
layer = "hexgrid_wgs84", driver = "ESRI Shapefile")

# write file to geojson
writeOGR(check_exists = FALSE, hexgrid_wgs84, ohex_geojson,
layer = "hexgrid_wgs84", driver = "GeoJSON")

 
##################
 

Applying Leaflet

# install the leaflet library install.packages("leaflet") # add the leaflet library to your script library(leaflet) library(sp) # initiate the leaflet instance and store it to a variable m = leaflet() # we want to add map tiles so we use the addTiles() function - the default is openstreetmap m = addTiles(m) # reading in a csv file - our data_filter.csv from the vancrime2012 csv file. # this string tells my R where to find data_filter.csv on my laptop. # you will need to change it to tell your R where to find your own # file! filename = "/Users/manahashimoto/Desktop/CRIME ASS/vancrime2012_mana_hashimoto.csv" data_filter = read.csv(filename, header = TRUE) # subset out your data_filter: df_breakins = subset(data_filter, cid == 1) df_mischief = subset(data_filter, cid == 2) df_offences_homocides = subset(data_filter, cid == 3) df_theft = subset(data_filter, cid == 4) # initiate leaflet m = leaflet() # add openstreetmap tiles (default) m = addTiles(m) # you can now see that our maptiles are rendered m colorFactors = colorFactor(c('red', 'orange', 'purple', 'blue', 'pink', 'brown'), domain = data_filter$cid) m = addCircleMarkers(m, lng = df_breakins$lon_offset, # we feed the longitude coordinates lat = df_breakins$lat_offset, popup = df_breakins$TYPE, radius = 2, stroke = FALSE, fillOpacity = 0.75, color = colorFactors(df_breakins$cid), group = "1 - Break-ins") m = addCircleMarkers(m, lng = df_mischief$lon_offset, lat=df_mischief$lat_offset, popup = df_mischief$TYPE, radius = 2, stroke = FALSE, fillOpacity = 0.75, color = colorFactors(df_mischief$cid), group = "2 - Mischief") m = addCircleMarkers(m, lng = df_offences_homocides$lon_offset, lat=df_offences_homocides$lat_offset, popup = df_offences_homocides$TYPE, radius = 2, stroke = FALSE, fillOpacity = 0.75, color = colorFactors(df_offences_homocides$cid), group = "3 - Offenses and Homocides") m = addCircleMarkers(m, lng = df_theft$lon_offset, lat=df_theft$lat_offset, popup = df_theft$TYPE, radius = 2, stroke = FALSE, fillOpacity = 0.75, color = colorFactors(df_theft$cid), group = "4 -Theft") m = addTiles(m, group = "OSM (default)") m = addProviderTiles(m,"Stamen.Toner", group = "Toner") m = addProviderTiles(m, "Stamen.TonerLite", group = "Toner Lite") m = addLayersControl(m, baseGroups = c("Toner Lite","Toner"), overlayGroups = c("1 - Break-ins", "2 - Mischief", "3 - Offences and Homocides","4 - Theft") ) # make the map m # subset out your data_filter: df_breakins = subset(data_filter, cid == 1) df_mischief = subset(data_filter, cid == 2) df_offences_homocides = subset(data_filter, cid == 3) df_theft = subset(data_filter, cid == 4) # Now we're going to put those variables into a **list** so we can loop through them: data_filterlist = list(df_breakins = subset(data_filter, cid == 1), df_mischief = subset(data_filter, cid == 2), df_offences_homocides = subset(data_filter, cid == 3), df_theft = subset(data_filter, cid == 4)) # Remember we also had these groups associated with each variable? Let's put them in a list too: layerlist = c("1 - Breakins", "2 - Mischief", "3 - Offences Homocides","4 - Theft") # We can keep that same color variable: colorFactors = colorFactor(c('red', 'orange', 'purple', 'blue', 'pink', 'brown'), domain=data_filter$cid) # Now we have our loop - each time through the loop, it is adding our markers to the map object: for (i in 1:length(data_filterlist)){ m = addCircleMarkers(m, lng=data_filterlist[[i]]$lon_offset, lat=data_filterlist[[i]]$lat_offset, popup=data_filterlist[[i]]$TYPE, radius=2, stroke = FALSE, fillOpacity = 0.75, color = colorFactors(data_filterlist[[i]]$cid), group = layerlist[i] ) } m = addTiles(m, "Stamen.TonerLite", group = "Toner Lite") m = addLayersControl(m, overlayGroups = c("Toner Lite"), baseGroups = layerlist ) m # define the filepath hex_1401_fn = 'https://raw.githubusercontent.com/joeyklee/aloha-r/master/data/calls_2014/geo/hgrid_250m_1401_counts.geojson' # read in the geojson hex_1401 = readOGR(hex_1401_fn, "OGRGeoJSON") m = leaflet() m = addProviderTiles(m, "Stamen.TonerLite", group = "Toner Lite") # Create a continuous palette function pal = colorNumeric( palette = "Greens", domain = hex_1401$data ) # add the polygons to the map m = addPolygons(m, data = hex_1401, stroke = FALSE, smoothFactor = 0.2, fillOpacity = 1, color = ~pal(hex_1401$data), popup = paste("Crime Density: ", hex_1401$data, sep="") ) m = addLegend(m, "bottomright", pal = pal, values = hex_1401$data, title = "Crime Density", labFormat = labelFormat(prefix = " "), opacity = 0.75 ) m # initiate leaflet map layer m = leaflet() m = addProviderTiles(m, "Stamen.TonerLite", group = "Toner Lite") # --- hex grid --- # # store the file name of the geojson hex grid hex_1401_fn = 'https://raw.githubusercontent.com/joeyklee/aloha-r/master/data/calls_2014/geo/hgrid_250m.geojson' # read in the data hex_1401 = readOGR(hex_1401_fn, "OGRGeoJSON") # Create a continuous palette function pal = colorNumeric( palette = "Greens", domain = hex_1401$data ) # add hex grid m = addPolygons(m, data = hex_1401, stroke = FALSE, smoothFactor = 0.2, fillOpacity = 1, color = ~pal(hex_1401$data), popup= paste("Crime Density: ",hex_1401$data, sep=""), group = "hex" ) # add legend m = addLegend(m, "bottomright", pal = pal, values = hex_1401$data, title = "Crime Density", labFormat = labelFormat(prefix = " "), opacity = 0.75 ) # --- points data --- # # subset out your data: df_breakins = subset(data_filter, cid == 1) df_mischief = subset(data_filter, cid == 2) df_offences_homocides = subset(data_filter, cid == 3) df_theft= subset(data_filter, cid== 4) data_filterlist = list(df_breakins = subset(data_filter, cid == 1), df_mischief = subset(data_filter, cid == 2), df_offences_homocides = subset(data_filter, cid == 3), df_theft = subset(data_filter, cid == 4)) layerlist = c("1 - Breakins", "2 - Mischief", "3 - Offences Homocides","4 - Theft") colorFactors = colorFactor(c('orange', 'purple', 'blue', 'brown', 'red', 'brown'), domain=data_filter$cid) for (i in 1:length(data_filterlist)){ m = addCircleMarkers(m, lng=data_filterlist[[i]]$lon_offset, lat=data_filterlist[[i]]$lat_offset, popup=data_filterlist[[i]]$TYPE, radius=2, stroke = FALSE, fillOpacity = 0.75, color = colorFactors(data_filterlist[[i]]$cid), group = layerlist[i] ) } m = addLayersControl(m, overlayGroups = c("Toner Lite", "hex"), baseGroups = layerlist ) # show map m