# Baboon Data Expedition, 01 November 2021 2+2 2+3 r_is_cool <- 2 r_is_cool+1 # SET UP ------------------------------------------------------------------ ## Load packages to analyze and plot data install.packages("ggplot2") library(ggplot2) # Put the baboon_data.csv and baboon_data_2.csv files in your Desktop # In bottom right panel of RStudio, click Files > Home > Desktop > More > Set As Working Directory ## Load first dataset # We will look at bab to learn more about the raw data bab <- read.csv("baboon_data.csv") ## Preview data View(bab) str(bab) ## This dataset contains 1 row per fecal sample. # female a female's unique ID (name). # cycle_day how many days before deturgescence the fecal sample was collected. # estrogen ...you type in the rest! # swelling_size # alpha_consort # nonalpha_consort ## Load second dataset # bab2 was created from bab. # For each cycle day, we calculated the mean estrogen and swelling size as well as the sum of male consorts. bab2 <- read.csv("baboon_data_2.csv") ## Preview data, just as we did before View(bab2) ## This dataset contains 1 row per cycle_day # cycle_day number of days before (negative) or after (positive) d-day # n number of female's samples that contributed to that row of data (eg, 15 means there were 15 rows of data in bab that had cycle days of -22) # swelling_size_mean ...you type in the rest! # swelling_size_se # estrogen_mean # estrogen_se # alpha_consorts # alpha_consortship_probability # nonalpha_consorts # nonalpha_consortship_probability # KNOWING THE DATASET ------------------------------------------------------------ # This line of code below will calculate the total number of rows in the dataset: nrow(bab) # This next line of code below will calculate the number of unique values in the column "X". # Replace "X" with the name of the correct column to calculate the number of unique females in the dataset. # Make sure you don't put quotes around the name of the column! length(unique(bab$X)) # --> Answer a few questions about the dataset on your worksheet (KNOWING THE DATASET) # DATA VISUALIZATION PART ONE: FEMALE SWELLINGS AND ESTROGEN -------------------------------------------- # Now we can visualize how female sexual skin swelling sizes change over the course of her reproductive cycle. # Run the code below: ggplot(data = bab2, aes(x = cycle_day, # this indicates what the X-axis is y = swelling_size_mean, # this indicates what the Y-axis is ymin = swelling_size_mean - swelling_size_se, # lower limit of the standard error bar ymax = swelling_size_mean + swelling_size_se)) + # upper limit of the standard error bar geom_point() + # add points to the graph geom_errorbar() + # add error bars to the graph geom_line() + # connect points on the graph xlab("cycle day") + # change the x-axis label ylab("swelling size") # change the y-axis label # Save the plot as a PDF to your Desktop: ggsave("swelling_size_plot.pdf", width = 5.5, height = 3.5, units = "in") # Now you can open and view the plot on your Desktop. # Now, let's modify the above code to make a new plot! # Instead of plotting MEAN SWELLING SIZE on the y-axis, now we want to plot MEAN ESTROGEN on the y-axis. # Try modifying the code above so that the y-axis is "estrogen_mean" instead of "swelling_size_mean". # Don't forget to change the error bars as well on lines 71-72! # You should also change the name of the y-axis label on line 77. # Once your plot looks right, you can save it to your Desktop by running this line: ggsave("estrogen_plot.pdf", width = 5.5, height = 3.5, units = "in") # --> Answer a few questions about these plots in your worksheet (DATA VISUALIZATION PART ONE) # DATA VISUALIZATION PART TWO: MALE CONSORTSHIPS -------------------------------------------- # We can also visualize the mating success of alpha males on different female cycle days. # Run the code below: ggplot(data = bab2, aes(x = cycle_day, y = alpha_consortship_probability)) + geom_bar(stat = "identity") + xlab("cycle day") + ylab("probability of consorting with alpha male") # Save the plot as a PDF to your Desktop: ggsave("alpha_consorts_plot.pdf", width = 5.5, height = 3.5, units = "in") # Now, let's modify the above code to make a new plot. # Instead of plotting the probability of a consorting with an ALPHA MALE on the y-axis, # we want to plot the probability of consorting with a NON-ALPHA MALE on the y-axis. # Try modifying the code above to accomplish this. # Don't forget to change the y-axis label as well on line 102 # Once your plot looks right, you can save it to your Desktop: ggsave("nonalpha_consorts_plot.pdf", width = 5.5, height = 3.5, units = "in") # --> Answer a few questions about these plots in your worksheet (DATA VISUALIZATION PART TWO) # If you have extra time, here are some fun ways you can continue to look at this cool dataset: # EXTRA: INDIVIDUAL VARIATION ------------------------------------------- # Everyone is different. Same with baboons! How much variability do you think there is in... # female baboon sexual swelling sizes? # female baboon estrogen concentrations? # We can visualize this! ## Variation in female baboon sexual swelling size # A simple question is: How variable is the maximum swelling size across females? # To do this, we want to extract the maximum swelling size from each female WHEN CYCLE_DAY = -1. # We need to specify cycle day because we know swelling size changes across the cycle; in our plots above, -4 was the day when mean swelling size was largest. # 1. Here's one way to look at that: ggplot(data = subset(bab, cycle_day == 1), # calls the bab dataset (not bab2!) AND filters out any rows where cycle_day does not equal 1 aes(x=swelling_size)) + # defines the x-axis geom_histogram() + # making a histogram labs(x="swelling size on d-day (cycle day = 1)", y="count") # labels axes # What do you think? Does that seem like a lot of variability across females, or a little? # Does variability increase or decrease if you change the cycle day? # 2. A second way to look at this is to plot every female on the same plot, with a line for each female, similar to the second plot you made. ggplot(data = bab, # calls the dataset - notice we're using bab, not bab2! we need the raw data. aes(x = cycle_day, # defines the x-axis y = swelling_size, # defines the y-axis group = factor(female), # tells ggplot that each distinct value in the female column needs to be grouped separately. factor() just tells ggplot that female values are categories, not numbers color = factor(female))) + # similar to the line above, but will let us add colors to the plot geom_line() + # makes a line connecting the datapoints within each grouping xlab("cycle day") + # labels axis ylab("swelling size") # labels axis # There's a lot going on here! What conclusions can you draw about variability in swelling size here? # Can you think of ways to change the plot that would make this easier to look at? # Try it, and/or ask a classmate or me for help! # 3. A third way to look at this is to plot each female separately, using the facet_wrap() function in ggplot. ggplot(data = bab, # calls the dataset - notice we're using bab, not bab2! We need the raw data. aes(x = cycle_day, # defines the x-axis y = swelling_size)) + # defines the y-axis geom_line() + # makes a line connecting the datapoints xlab("cycle day") + # labels axis ylab("swelling size") + # labels axis facet_wrap(~female) + # tells ggplot to make a new graph for each unique female geom_vline(xintercept = 0, color = "red") # adds a vertical red line at x=0 to make it a little easier to compare plots # Make sure to click Zoom so you can see this image much larger. # This one will take a minute to look at... # So, what's going on here? Is this more helpful than the prior visualization? Any take-aways? # Side note: I love looking at individual-level data! I think it's fun to see what's going on on a baboon-by-baboon basis. ## Variation in female baboon estrogen concentrations # You can switch estrogen out for swelling_size in the plots above to take a look! # You'll see that estrogen is a much noisier measure than swelling size...why do you think that is? # Can you still see any patterns in the data?