# NOTE 1: Hashtags are our comments on the code - these are invisible to R when they're on their own line OR at the end of a line of code # NOTE 2: If you see an arrow in a comment (# <--), this points out a section of code that you will need to change! # NOTE 3: We recommend that you label your data frames with the year and species so you can keep track of them! (eg, df_2015_coyote) # 1. Set up ------------------------------------------------------------------ # Install packages install.packages(c('tidycensus', 'rinat', 'rgbif', 'R.filesets', 'stars', 'dplyr', 'raster', 'dplyr', 'tidyverse', 'maps', 'gridExtra')) # Load required packages to pull in data, work with data, and make plots library('dplyr') library('tidycensus') library('rinat') library('rgbif') library('raster') library('stars') library('ggplot2') library('tidyverse') library('maps') library('gridExtra') rm(list=ls()) # Clear existing packages so you're only using the ones we loaded above select <- dplyr::select # The select command exists for multiple packages, so this tells R to use select from the dplyr package # Set your working directory so your computer knows where to find files data_location <- "~/Downloads/Data/" # <-- Change this to your home directory # 2. Grab species data ------------------------------------------------------- # If you're curious, this link tells more about the species data: https://www.gbif.org/ # Let's start by grabbing some data! First, let's take a look at blue jays here in NC. # The first thing we need to do is find the unique "key" from the GBIF database that corresponds to the species, genus, or family we're interested in. # Testing this out with blue jays: key <- name_lookup(query='Cyanocitta cristata')$data$nubKey[1] # <-- Change what's in the quotation marks to the species, genus, or family you want to pull data on! # Great! Now that we have the key for blue jays, let's use that to pull the reported observations of blue jays! # Note a few variables you can change here...and make sure you name this object appropriately! df_2019_bluejay <- occ_data(taxonKey = key, # key is the key code you got but entering your species/genus/family in the prior line of code limit=10000, # <-- How many records do you want to get? Max is 100,000 - but that'll take a very long time. Let's start with 10,000. stateProvince = "NORTH CAROLINA", # You could look at another state if you want! If so, make sure to change your mapping code too year=2019)$data %>% # <-- Which year do you want to find data from? You will probably want to run this once for 2015 and once for 2019 as.data.frame # Take a look at what you downloaded! view(df_2019_bluejay) # 3. Map species data -------------------------------------------------------- # Now, let's see where all these blue jays have been spotted. # The first step is to tell R that this is not just a table, but an "sf object" - a set of geolocated observations. df_geo_2019_bluejay <- df_2019_bluejay %>% # <-- make sure df_year_species is the data frame you want to be working on! st_as_sf( # Convert to sf object coords=c("decimalLongitude", "decimalLatitude"), # Tell R which columns are the geological coordinates crs="WGS84") # Tell R what the Coordinate Reference System is - we won't talk about this today, but you can read a bit about it here: https://www.earthdatascience.org/courses/earth-analytics/spatial-data-r/intro-to-coordinate-reference-systems/ # Now, let's map it. map1 <- ggplot() + # These two lines will save map 1, which will be helpful later when you want to show >1 map side by side geom_sf(data = df_geo_2019_bluejay) # <-- make sure df_geo_year_species is the data frame you want to map map1 # Good start! But it would look a LOT better if we had a base map. We'll pull this "basemap" from the "maps" package. nc_basemap <- st_as_sf(maps::map("county", 'North Carolina', plot = FALSE, fill = TRUE)) # <-- If you decide to do your analysis on a different state, make sure to change that here too map2 <- ggplot() + geom_sf(data = nc_basemap) + # Plots the basemap of NC geom_sf(data = df_geo_2019_bluejay, shape = 3) # <-- make sure df_year_species is the one you want. # Shape = 3 sets each datapoint to be a cross. Different numbers will make different shapes, see here for the key: http://www.sthda.com/english/wiki/ggplot2-point-shapes # You can also change the color or size of the points! For example... shape = 3, color = "yellow", size = 0.5) map2 # That's better! # 4. Loading county boundaries ----------------------------------------------- # What if we want to look at patterns by county? # Let's pull in the county boundaries from the US Census. load(file = paste0(data_location, "Combined dataset.rda")) # We'll start with a file which contains only the county IDs and names, which we can plot. censusBounds <- combinedDataset %>% select(GEOID, NAME) # Let's see what that looks like. nc <- ggplot() + geom_sf(data = censusBounds) nc # Looks good! Let's now aggregate our species observations to the county level so we know how many sightings there were of our species for each county # 5. Aggregate & map species data by county ---------------------------------------- speciesByCounty_2019_bluejay <- censusBounds %>% # We start with our NC county outlines... st_intersects(df_geo_2019_bluejay) %>% # And then count the number of blue jays within each county n this line and the next lengths() %>% as.data.frame %>% setNames("BlueJay") # <-- This gives the column a name. When you change it, make sure to change it again in map 3 where you specify fill = BlueJay speciesByCounty_2019_bluejay # We now have the number of blue jay sightings for each county (there are 100 counties in NC) # Let's now "bind" (merge) that with the map file, censusBounds, so we know which county is which. boundsWithSpecies_2019_bluejay <- censusBounds %>% bind_cols(speciesByCounty_2019_bluejay) # Take a look! Which counties had the most sightings? view(boundsWithSpecies_2019_bluejay) # Let's map it! map3 <- ggplot() + geom_sf(data = boundsWithSpecies_2019_bluejay, aes(fill=BlueJay)) + # <-- Tells R to color in each county based on the # of sightings of species1 for that county labs(fill = "Blue Jays") # <-- Re-labels the key so it says "Blue Jays" instead of "species1" map3 # How does that compare to the previous map, of points? # Let's look at them side-by-side using grid.arrange(): grid.arrange(map2, map3, nrow = 2) # nrow lets you specify the number of rows, and ncol the number of columns # 6. Human population density ------------------------------------------------ # OK, so we've taken a look at where blue jays hang out. But where are all the people? # We've pulled population density information from the US Census's American Community Survey. # Units = people per square mile # That dataset, combinedData, also has species abundance for a few species in 2015 & 2019 # Let's take a look at it mapped map4 <- ggplot() + geom_sf(data = combinedDataset, aes(fill=density2015)) # As with map 3, this tells R to fill in each county based on pouplation density in 2015 map4 # Great! Now let's look at population density in 2019: map5 <- ggplot() + geom_sf(data = combinedDataset, aes(fill=density2019)) # We changed the fill to the density2019 column map5 grid.arrange(map4, map5, nrow = 2) # Note that ncol lets you specify the number of columns, and nrow the number of rows. # It might be nice to have CHANGE in density on one plot instead of having to map both years: # So let's make a new column in combinedDataset called densityChange: combinedDataset <- combinedDataset %>% mutate(densityChange = density2019-density2015) # Makes a new column: density in 2019 - density in 2015 #... and let's map it! map6 <- ggplot(combinedDataset) + geom_sf(data = combinedDataset, aes(fill=densityChange)) # We changed fill to densityChange map6 # We've already calculated a density_percent_change variable so that you can see which are growing fastest # This is densityChange / density2015 # A county with a density of 100 in 2015 and 110 in 2019 will have the same density_percent_change as a county with a density of 1000 in 2015 and 1100 in 2019 map7 <- ggplot() + geom_sf(data = combinedDataset, aes(fill=density_pct_change)) + # Changed fill to density_pct_change scale_fill_gradient(low="blue", high="red") # This time, let's change the colors a bit. map7 # Putting those all together: grid.arrange(map4, map5, map6, map7, nrow = 2) # 7. Overlay humans & wildlife ----------------------------------------------- # A fun extra map: you can overlay species abundance onto one of these maps! # And show again how to overlay species abundance! map8 <- ggplot() + geom_sf(data = combinedDataset, aes(fill=density2019)) + geom_sf(data = df_geo_2019_bluejay, shape = 3, color = "yellow") map8 # You're up! ------------------------------------------------------------------------- # Ok, it's your turn! # Work with your group to come up with a question and/or hypothesis # Then think of a prediction and how you're going to test it # You may want to divide and conquer - each person can pull data for 1-2 species, for example # Make sure to come back together to pool your data, interpret it, and make some fun tables and plots! # A few thoughts and tips: # 1. Analyzing change in species abundance: # You'll almost certainly want to repeat sections 2-5 for 2015 AND 2019 data for every species, genus, or family you're collecting data on # From those two datasets, you can put the county-level data you're interested in into a table in Excel # 2. There's so much data! # Yes! Think carefully about how to test your hypothesis or question. # You may want to focus in on 1-3 counties and look at abundance of lots of species. # You may want to focus in on 1-3 species and look at a bunch of counties - such as the counties that experienced the MOST and LEAST population change # 3. Making figures: # Have a blast making maps in R! # To make scatterplots (eg, change in human abundance vs change in wildlife abundance), we recommend using Excel because many of you already know how to do that! # 4. Think about what's WRONG with these data # There are some sytematic biases in these data. # What are they? # How do that affect your findings? # How could you correct for it, or is that impossible? # 5. What species should I pick? There's no wrong answer! But here are some thoughts: # Think about the species you associate with cities. And think about what species you DON'T see in cities # Instead of looking at the species level, you can look at genera, families, orders, or classes. # Do you think there may be differences between, say, mammals (class: Mammalia) and birds/reptiles (class: Reptilia) in how they are affected by population change? # And here are a few backyard birds to consider as well: # Blue jay: Cyanocitta cristata # Carolina wren: Thryothorus ludovicianus # Eastern bluebird: Sialia sialis # American robin: Turdus migratorius # Cardinal: Cardinalis cardinalis # Red-tailed hawk: Buteo jamaicensis # American goldfinch: Spinus tristis # House finch: Haemorhous mexicanus # House sparrow: Passer domesticus # Canada goose: Branta canadensis # Black vulture: Coragyps atratus # Turkey vulture: Cathartes aura # Additional variable: Light pollution --------------------------------------------------------- # This adds some complication, but we wanted to let you decide if you wanted to use it. # We'll need to load a new package to work with this type of "raster" file. library(stars) # Load the files. You're literally loading a light image of NC! raster2015 <- read_stars(paste0(data_location, "data_2015.tif")) # This part reads in the light image, which we have already cropped to NC. raster2019 <- read_stars(paste0(data_location, "data_2019.tif")) # This part reads in the light image, which we have already cropped to NC. # Let's take a look! plot(raster2015) plot(raster2019) # Now, let's get the average value in each county lumin2015 <- st_extract(raster2015, censusBounds) %>% st_as_sf() %>% st_set_geometry(NULL) %>% setNames("lumin2015") %>% bind_cols(censusBounds) lumin2019 <- st_extract(raster2019, censusBounds) %>% st_as_sf() %>% st_set_geometry(NULL) %>% setNames("lumin2019") %>% bind_cols(censusBounds) # And then calculate luminChange, first by merging the two datasets into one, then by subtracting 2019-2015 # We're also going to calculate a lumin_pct_change, as with density: lumin <- merge(lumin2015, lumin2019, by = c('GEOID', 'NAME', 'geometry')) %>% mutate(luminChange = lumin2019-lumin2015, lumin_pct_change = luminChange/lumin2015) # And we can plot luminChange and lumin_prct_change. map9 <- ggplot(lumin %>% st_as_sf()) + geom_sf(aes(fill=luminChange)) + # Fill is absolute change scale_fill_gradient(low="black", high="white") # Let's change the colors a bit. map10 <- ggplot(lumin %>% st_as_sf()) + geom_sf(aes(fill=lumin_pct_change)) + # Fill is percent change scale_fill_gradient(low="black", high="white") # Let's change the colors a bit. grid.arrange(map9, map10, ncol = 1) # This should look pretty similar to lumin2015, above.