R: Code – Mana Hashimoto

vancrime2012_mana_hashimoto-html

 

######################################################
# Vancouver Crime Data 2012: Data Processing Script
# Date:October 20/2016
# By: Mana Hashimoto
# Desc: GEOB472
######################################################

# ------------------------------------------------------------------ #
# ---------------------- Install Libraries ------------------------- #
# ------------------------------------------------------------------ #
install.packages("GISTools")
install.packages("RJSONIO")
install.packages("rgdal")
install.packages("RCurl")
install.packages("curl")

# ------------------------------------------------------------------ #
# ----------------------- Load Libraries -------------------------- #
# ------------------------------------------------------------------ #
library(GISTools)
library(RJSONIO)
library(rgdal)
library(RCurl)
library(curl)
library(sp)

# ------------------------------------------------------------------ #
# ---------------------------- Acquire ----------------------------- #
# ------------------------------------------------------------------ #

# access downloaded data from the local drive
data<- read.csv("/Users/manahashimoto/Desktop/CRIME ASS/crime_csv_2012_nolatlong.csv")
# inspect data
print(head(data))

# ------------------------------------------------- #
# -------------- Parse: Geocoder ------------------- #
# ------------------------------------------------- #
# change intersection to 00's
data$h_block= gsub("X","0", data$HUNDRED_BLOCK)
print(head(data))

# Join the strings from each column together & add "Vancouver, BC":
data$full_address = paste(data$h_block,
"Vancouver, BC",
sep=", "),
sep=" ")

# removing "OFFSET TO PROTECT PRIVACY " from the full_address entries
data$full_address = gsub("OFFSET TO PROTECT PRIVACY ", "", data$full_address)
print(head(data$full_address))
# a function taking a full address string, formatting it, and making
# a call to the BC government's geocoding API
bc_geocode = function(search){
# return a warning message if input is not a character string
if(!is.character(search)){stop("'search' must be a character string")}

# formatting characters that need to be escaped in the URL, ie:
# substituting spaces ' ' for '%20'.
search = RCurl::curlEscape(search)

# first portion of the API call URL
base_url = "http://apps.gov.bc.ca/pub/geocoder/addresses.json?addressString="

# constant end of the API call URL
url_tail = "&locationDescriptor=any&maxResults=1&interpolation=adaptive&echo=true&setBack=0&outputSRS=4326&minScore=1&provinceCode=BC"

# combining the URL segments into one string
final_url = paste0(base_url, search, url_tail)

# making the call to the geocoding API by getting the response from the URL
response = RCurl::getURL(final_url)

# parsing the JSON response into an R list
response_parsed = RJSONIO::fromJSON(response)

# if there are coordinates in the response, assign them to `geocoords`
if(length(response_parsed$features[[1]]$geometry[[3]]) > 0){
geocoords = list(lon = response_parsed$features[[1]]$geometry[[3]][1],
lat = response_parsed$features[[1]]$geometry[[3]][2])
}else{
geocoords = NA
}

# returns the `geocoords` object
return(geocoords)
}

# Geocode the events - we use the BC Government's geocoding API
# Create an empty vector for lat and lon coordinates
lat = c()
lon = c()

# loop through the addresses
for(i in 1:length(data$full_address)){
# store the address at index "i" as a character
address = data$full_address[i]
# append the latitude of the geocoded address to the lat vector
lat = c(lat, bc_geocode(address)$lat)
# append the longitude of the geocoded address to the lon vector
lon = c(lon, bc_geocode(address)$lon)
# at each iteration through the loop, print the coordinates - takes about 20 min.
print(paste("#", i, ", ", lat[i], lon[i], sep = ","))
}

#add the lat lon coordinates to the dataframe
data$lat = lat
data$lon = lon

# after geocoding, it's a good idea to write your file out!
# you will need to modify the directory where you want R to write
# your data!
ofile = '/Users/manahashimoto/Desktop/lon-lat_vancrime2012_mana_hashimoto.csv'

write.csv(data, ofile)

data<-read.csv('/Users/manahashimoto/Desktop/CRIME ASS/lon-lat_vancrime2012_mana_hashimoto.csv')
# ------------------------------------------------- #
# --------------------- Mine ---------------------- #
# ------------------------------------------------- #
# --- Examine the unique cases --- #

# examine how the cases are grouped - are these intuitive?
unique(data$TYPE)

# examine the types of cases - can we make new groups that are more useful?
# Print each unique case on a new line for easier inspection
for (i in 1:length(unique(data$TYPE))){
print(unique(data$TYPE)[i], max.levels=0)
}

# Break-ins
break_ins = c('Break and Enter Residential/Other',
'Break and Enter Commercial')

#Theft
theft= c('Theft from Vehicle',
'Other Theft',
' Theft of Vehicle')
#Mischief
mischief= c('Mischief')

# Offences and Homocides
offences_homocides= c('Offence Against a Person',
' Homocide')

# give class id numbers:
data$cid = 9999
for(i in 1:length(data$TYPE)){
if(data$TYPE[i] %in% break_ins){
data$cid[i] = 1
}else if(data$TYPE[i] %in% mischief){
data$cid[i] = 2
}else if(data$TYPE[i] %in% offences_homocides){
data$cid[i] = 3
}else if(data$TYPE[i] %in% theft){
data$cid[i] = 4
}
}

# --- handle overlapping points --- #
# Set offset for points in same location:
data$lat_offset = data$lat
data$lon_offset = data$lon

# Run loop - if value overlaps, offset it by a random number
for(i in 1:length(data$lat)){
if ( (data$lat_offset[i] %in% data$lat_offset) && (data$lon_offset[i] %in% data$lon_offset)){
data$lat_offset[i] = data$lat_offset[i] + runif(1, 0.0001, 0.0005)
data$lon_offset[i] = data$lon_offset[i] + runif(1, 0.0001, 0.0005)
}
}

R also allows us to easily compute the frequencies of the crimes:


# --- what are the top calls? --- #
# get a frequency distribution of the calls:
top_crimes = data.frame(table(data$TYPE))
top_crimes = top_crimes[order(top_calls$Freq), ]
print(top_crimes)

                               Var1  Freq
1        Break and Enter Commercial  1687
2 Break and Enter Residential/Other  3311
3                          Homicide     8
4                          Mischief  4243
5          Offence Against a Person  3780
6                       Other Theft 11788
7                Theft from Vehicle  8097
8                  Theft of Vehicle  1151

# ------------------------------------------------------------------ #
# ---------------------------- Filter ------------------------------ #
# ------------------------------------------------------------------ #
# Subset only the data if the coordinates are within our bounds or if
# it is not a NA
data_filter = subset(data, (lat <= 49.313162) & (lat >= 49.199554) &
(lon <= -123.019028) & (lon >= -123.271371) & is.na(lon) == FALSE )
# plot the data
plot(data_filter$lon, data_filter$lat)

# If everthing looks good, we might also write out our file:
ofile_filtered = '/Users/manahashimoto/Desktop/CRIME ASS/vancrime2012_mana_hashimoto.csv'
write.csv(data_filter, ofile_filtered)

# --- Convert Data to Shapefile --- #
# store coordinates in dataframe
coords_vancrime2012 = data.frame(data_filter$lon_offset, data_filter$lat_offset)

# create spatialPointsDataFrame
data_shp = SpatialPointsDataFrame(coords = coords_vancrime2012, data = data_filter)

# set the projection to wgs84
projection_wgs84 = CRS("+proj=longlat +datum=WGS84")
proj4string(data_shp) = projection_wgs84

# set an output folder for our geojson files:
geofolder = "/Users/manahashimoto/Desktop/CRIME ASS/CRIME2012/2012files/"

# Join the folderpath to each file name:
opoints_shp = paste(geofolder, "vancrime_", "2012", ".shp", sep = "")
print(opoints_shp)

opoints_geojson = paste(geofolder, "calls_", "1401", ".geojson", sep = "")
print(opoints_geojson)

# write the file to a shp
writeOGR(data_shp, opoints_shp, layer = "data_shp", driver = "ESRI Shapefile",
check_exists = FALSE)

# write file to geojson
writeOGR(data_shp, opoints_geojson, layer = "data_shp", driver = "GeoJSON",
check_exists = FALSE)
# --- aggregate to a grid --- #
# ref: http://www.inside-r.org/packages/cran/GISTools/docs/poly.counts
# set the file name - combine the shpfolder with the name of the grid
grid_fn = 'https://raw.githubusercontent.com/joeyklee/aloha-r/master/data/calls_2014/geo/hgrid_250m.geojson'

# read in the hex grid setting the projection to utm10n
hexgrid = readOGR(grid_fn, 'OGRGeoJSON')

hexgrid_wgs84 = spTransform(hexgrid, projection_wgs84)

# Use the poly.counts() function to count the number of occurrences of crimes per grid cell
grid_cnt = poly.counts(data_shp, hexgrid_wgs84)

# define the output names:
ohex_shp = paste(geofolder, "hexgrid_250m_", "2012", "_cnts",
".shp", sep = "")
print(ohex_shp)

ohex_geojson = paste(geofolder, "hexgrid_250m_","2012","_cnts",".geojson")
print(ohex_geojson)

# write the file to a shp
writeOGR(check_exists = FALSE, hexgrid_wgs84, ohex_shp,
layer = "hexgrid_wgs84", driver = "ESRI Shapefile")

# write file to geojson
writeOGR(check_exists = FALSE, hexgrid_wgs84, ohex_geojson,
layer = "hexgrid_wgs84", driver = "GeoJSON")

 
##################
 
Applying Leaflet
# install the leaflet library
install.packages("leaflet")

# add the leaflet library to your script
library(leaflet)
library(sp)
# initiate the leaflet instance and store it to a variable
m = leaflet()

# we want to add map tiles so we use the addTiles() function - the default is openstreetmap
m = addTiles(m)

# reading in a csv file - our data_filter.csv from the vancrime2012 csv file. 
# this string tells my R where to find data_filter.csv on my laptop.
# you will need to change it to tell your R where to find your own
# file! 
filename = "/Users/manahashimoto/Desktop/CRIME ASS/vancrime2012_mana_hashimoto.csv"
data_filter = read.csv(filename, header = TRUE)


# subset out your data_filter:
df_breakins = subset(data_filter, cid == 1)
df_mischief = subset(data_filter, cid == 2)
df_offences_homocides = subset(data_filter, cid == 3)
df_theft = subset(data_filter, cid == 4)

# initiate leaflet
m = leaflet()

# add openstreetmap tiles (default)
m = addTiles(m)

# you can now see that our maptiles are rendered
m

colorFactors = colorFactor(c('red', 'orange', 'purple', 'blue', 'pink', 'brown'),
                           domain = data_filter$cid)


m = addCircleMarkers(m, 
                     lng = df_breakins$lon_offset, # we feed the longitude coordinates 
                     lat = df_breakins$lat_offset,
                     popup = df_breakins$TYPE,
                     radius = 2, 
                     stroke = FALSE, 
                     fillOpacity = 0.75, 
                     color = colorFactors(df_breakins$cid),
                     group = "1 - Break-ins")

m = addCircleMarkers(m, 
                     lng = df_mischief$lon_offset, lat=df_mischief$lat_offset, 
                     popup = df_mischief$TYPE,
                     radius = 2, 
                     stroke = FALSE, fillOpacity = 0.75,
                     color = colorFactors(df_mischief$cid),
                     group = "2 - Mischief")

m = addCircleMarkers(m, 
                     lng = df_offences_homocides$lon_offset, lat=df_offences_homocides$lat_offset, 
                     popup = df_offences_homocides$TYPE,
                     radius = 2, 
                     stroke = FALSE, fillOpacity = 0.75,
                     color = colorFactors(df_offences_homocides$cid),
                     group = "3 - Offenses and Homocides")


m = addCircleMarkers(m, 
                     lng = df_theft$lon_offset, lat=df_theft$lat_offset, 
                     popup = df_theft$TYPE,
                     radius = 2, 
                     stroke = FALSE, fillOpacity = 0.75,
                     color = colorFactors(df_theft$cid),
                     group = "4 -Theft")

m = addTiles(m, group = "OSM (default)") 
m = addProviderTiles(m,"Stamen.Toner", group = "Toner")
m = addProviderTiles(m, "Stamen.TonerLite", group = "Toner Lite")

m = addLayersControl(m,
                     baseGroups = c("Toner Lite","Toner"),
                     overlayGroups = c("1 - Break-ins", "2 - Mischief",
                                       "3 - Offences and Homocides","4 - Theft")
)

# make the map
m

# subset out your data_filter:
df_breakins = subset(data_filter, cid == 1)
df_mischief = subset(data_filter, cid == 2)
df_offences_homocides = subset(data_filter, cid == 3)
df_theft = subset(data_filter, cid == 4)

# Now we're going to put those variables into a **list** so we can loop through them:
data_filterlist = list(df_breakins = subset(data_filter, cid == 1),
                       df_mischief = subset(data_filter, cid == 2),
                       df_offences_homocides = subset(data_filter, cid == 3),
                       df_theft = subset(data_filter, cid == 4))

# Remember we also had these groups associated with each variable? Let's put them in a list too:
layerlist = c("1 - Breakins", "2 - Mischief",
              "3 - Offences Homocides","4 - Theft")

# We can keep that same color variable:
colorFactors = colorFactor(c('red', 'orange', 'purple', 'blue', 'pink', 'brown'), domain=data_filter$cid)

# Now we have our loop - each time through the loop, it is adding our markers to the map object:
for (i in 1:length(data_filterlist)){
  m = addCircleMarkers(m, 
                       lng=data_filterlist[[i]]$lon_offset,
                       lat=data_filterlist[[i]]$lat_offset, 
                       popup=data_filterlist[[i]]$TYPE, 
                       radius=2,
                       stroke = FALSE, 
                       fillOpacity = 0.75,
                       color = colorFactors(data_filterlist[[i]]$cid),
                       group = layerlist[i]
  )
}

m = addTiles(m, "Stamen.TonerLite", group = "Toner Lite") 
m = addLayersControl(m,
                     overlayGroups = c("Toner Lite"),
                     baseGroups = layerlist
)
m

# define the filepath
hex_1401_fn = 'https://raw.githubusercontent.com/joeyklee/aloha-r/master/data/calls_2014/geo/hgrid_250m_1401_counts.geojson'

# read in the geojson
hex_1401 = readOGR(hex_1401_fn, "OGRGeoJSON")

m = leaflet()
m = addProviderTiles(m, "Stamen.TonerLite", group = "Toner Lite")

# Create a continuous palette function
pal = colorNumeric(
  palette = "Greens",
  domain = hex_1401$data
)

# add the polygons to the map
m = addPolygons(m, 
                data = hex_1401,
                stroke = FALSE, 
                smoothFactor = 0.2, 
                fillOpacity = 1,
                color = ~pal(hex_1401$data),
                popup = paste("Crime Density: ", hex_1401$data, sep="")
)

m = addLegend(m, "bottomright", pal = pal, values = hex_1401$data,
              title = "Crime Density",
              labFormat = labelFormat(prefix = " "),
              opacity = 0.75
)

m

# initiate leaflet map layer
m = leaflet()
m = addProviderTiles(m, "Stamen.TonerLite", group = "Toner Lite") 

# --- hex grid --- #
# store the file name of the geojson hex grid
hex_1401_fn = 'https://raw.githubusercontent.com/joeyklee/aloha-r/master/data/calls_2014/geo/hgrid_250m.geojson'

# read in the data
hex_1401 = readOGR(hex_1401_fn, "OGRGeoJSON")

# Create a continuous palette function
pal = colorNumeric(
  palette = "Greens",
  domain = hex_1401$data
)

# add hex grid
m = addPolygons(m, 
                data = hex_1401,
                stroke = FALSE, 
                smoothFactor = 0.2, 
                fillOpacity = 1,
                color = ~pal(hex_1401$data),
                popup= paste("Crime Density: ",hex_1401$data, sep=""),
                group = "hex"
)

# add legend
m = addLegend(m, "bottomright", pal = pal, values = hex_1401$data,
              title = "Crime Density",
              labFormat = labelFormat(prefix = " "),
              opacity = 0.75
)

# --- points data --- #

# subset out your data:
df_breakins = subset(data_filter, cid == 1)
df_mischief = subset(data_filter, cid == 2)
df_offences_homocides = subset(data_filter, cid == 3)
df_theft= subset(data_filter, cid== 4)

data_filterlist = list(df_breakins = subset(data_filter, cid == 1),
                       df_mischief = subset(data_filter, cid == 2),
                       df_offences_homocides = subset(data_filter, cid == 3),
                       df_theft = subset(data_filter, cid == 4))

layerlist = c("1 - Breakins", "2 - Mischief",
              "3 - Offences Homocides","4 - Theft")

colorFactors = colorFactor(c('orange', 'purple', 'blue', 'brown', 'red', 'brown'), domain=data_filter$cid)
for (i in 1:length(data_filterlist)){
  m = addCircleMarkers(m, 
                       lng=data_filterlist[[i]]$lon_offset, lat=data_filterlist[[i]]$lat_offset, 
                       popup=data_filterlist[[i]]$TYPE, 
                       radius=2,
                       stroke = FALSE, fillOpacity = 0.75,
                       color = colorFactors(data_filterlist[[i]]$cid),
                       group = layerlist[i]
  )
  
}

m = addLayersControl(m,
                     overlayGroups = c("Toner Lite", "hex"),
                     baseGroups = layerlist
)

# show map
m