## First check for the required packages, install if needed, and load the libraries.
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
::install("sangerseqR")
BiocManager::install_github("ropensci/bold")
remotes::install_github("ropensci/taxize")
remotes
if (!require("pacman")) install.packages("pacman")
::p_load(maps, ggplot2, dplyr, countrycode, rgbif, data.table, raster, mapproj, sf, terra) pacman
Continent-scale barcode coverage of lodgepole pine and big sagebrush
This notebook will pull specimen records for two selected examples to highlight the effect of local sampling on global coverage.
NOTE: Make sure GDAL is installed on the system you are running the notebooks from. This may require setting the “PROJ_LIB” and “GDAL_HOME” environment variables for your system.
Figure 2A-B. Get species keys for maps of Pinus contorta and Artemisia tridentata occurrences.
# Using t.mean raster layer created above with worldclim: Change t.mean so that all cells have zero values
<- list.files("../data/wc2.1_10m_tavg/", ".tif", full.names=TRUE)
t.mean.files
<- list.files("../data/wc2.1_10m_tavg/", ".tif", full.names=TRUE)
t.mean.files <- terra::rast(t.mean.files)
t.mean <- terra::app(t.mean, fun = mean, na.rm = TRUE)
t.mean <- t.mean
t.mean.addvalues !is.na(terra::values(t.mean.addvalues))] <- 0
t.mean.addvalues[::plot(t.mean.addvalues) terra
str(t.mean.addvalues)
S4 class 'SpatRaster' [package "terra"]
#terra::plot(t.mean.addvalues)
t.mean.addvalues
class : SpatRaster
dimensions : 1080, 2160, 1 (nrow, ncol, nlyr)
resolution : 0.1666667, 0.1666667 (x, y)
extent : -180, 180, -90, 90 (xmin, xmax, ymin, ymax)
coord. ref. : lon/lat WGS 84 (EPSG:4326)
source(s) : memory
name : mean
min value : 0
max value : 0
<- terra::ext(t.mean.addvalues) t.mean.addvalues_extent_use
# Get taxon keys from GBIF
<- name_suggest(q='Artemisia tridentata', rank='species')
key_artemisia_tridentata key_artemisia_tridentata
Records returned [2]
No. unique hierarchies [0]
Args [q=Artemisia tridentata, limit=100, rank=species, fields1=key,
fields2=canonicalName, fields3=rank]
# A tibble: 2 × 3
key canonicalName rank
<int> <chr> <chr>
1 9396703 Artemisia tridentata SPECIES
2 3121335 Artemisia tridentata SPECIES
<- name_suggest(q='Pinus contorta', rank='species')
key_pinus_contorta key_pinus_contorta
Records returned [4]
No. unique hierarchies [0]
Args [q=Pinus contorta, limit=100, rank=species, fields1=key,
fields2=canonicalName, fields3=rank]
# A tibble: 4 × 3
key canonicalName rank
<int> <chr> <chr>
1 5285750 Pinus contorta SPECIES
2 8405446 Pinus contorta SPECIES
3 7617264 Pinus contorta SPECIES
4 3908060 Lupinus pinus-contortae SPECIES
Figure 2A: Map of Pinus contorta GBIF records
# Use occ_search() to query gbif records
<- occ_search(taxonKey = 5285750, limit = 50000) # number found: 41,033
PINCON_occurrences_2 head(PINCON_occurrences_2, 2)
$meta
$meta$offset
[1] 42000
$meta$limit
[1] 50
$meta$endOfRecords
[1] FALSE
$meta$count
[1] 42063
$hierarchy
$hierarchy[[1]]
name key rank
1 Plantae 6 kingdom
2 Tracheophyta 7707728 phylum
3 Pinopsida 194 class
4 Pinales 640 order
5 Pinaceae 3925 family
6 Pinus 2684241 genus
7 Pinus contorta 5285750 species
# Count matches: 15336
<- data.frame(PINCON_occurrences_2$data) PINCON_occurrences_2_df
# Obtain coordinate data and project to desired CRS
<- data.frame(x_coords = PINCON_occurrences_2_df$decimalLongitude, y_coords = PINCON_occurrences_2_df$decimalLatitude)
PinusSP head(PinusSP, 2)
x_coords y_coords
1 -123.9814 49.15059
2 -117.5496 48.30912
<- terra::vect(PinusSP, geom=c("x_coords", "y_coords"), crs="epsg:4326") # WGS84
PinusSP head(PinusSP, 2)
data frame with 0 columns and 0 rows
# Check objects for plotting
head(t.mean.addvalues, 2)
mean
1 NA
2 NA
<- t.mean.addvalues
PinusSP_raster
# Extract cell numbers where coordinates match
<- terra::cellFromXY(PinusSP_raster, terra::crds(PinusSP))
matching_cells
# Set the values in PinusSP_raster to 1 where coordinates match
<- 1
PinusSP_raster[matching_cells] head(PinusSP_raster, 2)
mean
1 1
2 NA
::plot(PinusSP_raster) terra
<- terra::as.points(PinusSP_raster)
PinusSP_raster_dfpoints
# Convert to data frame
<- as.data.frame(PinusSP_raster_dfpoints, geom="XY")
PinusSP_raster_df head(PinusSP_raster_df, 2)
mean x y
1 1 -179.91667 89.91667
2 0 -38.91667 83.58333
# Plot using ggplot
<- PinusSP_raster_df[PinusSP_raster_df$mean > 0, ]
PinusSP_raster_df #PinusSP_raster_df
# ggplot wrapper for world map
<- map_data("world") #ggplot wrapper of map()
world_map <- subset(world_map, region == "Mexico" | region == "Canada" | region == "USA")
north_america_map head(north_america_map, 2)
long lat group order region subregion
14759 -59.78760 43.93960 245 14759 Canada Sable Island
14760 -59.92227 43.90391 245 14760 Canada Sable Island
#Yellowstone outline
<- sf::st_read("../data/YellowstonePark/YellowstonePark1995.shp") aoi_boundary_YNP
Reading layer `YellowstonePark1995' from data source
`/Users/tdivoll/Projects/Kartzinel/MolEco-MEC-24-1288/data/YellowstonePark/YellowstonePark1995.shp'
using driver `ESRI Shapefile'
Simple feature collection with 1 feature and 4 fields
Geometry type: POLYGON
Dimension: XY
Bounding box: xmin: 469695.4 ymin: -12706.41 xmax: 573531.9 ymax: 96658.52
Projected CRS: NAD83 / Montana
#examine features aoi_boundary_YNP
Simple feature collection with 1 feature and 4 fields
Geometry type: POLYGON
Dimension: XY
Bounding box: xmin: 469695.4 ymin: -12706.41 xmax: 573531.9 ymax: 96658.52
Projected CRS: NAD83 / Montana
AREA PERIMETER AB67_ AB67_ID geometry
1 8842796032 447129.5 2 649 POLYGON ((559728.7 88811.52...
st_crs(aoi_boundary_YNP) #check coordinate system
Coordinate Reference System:
User input: NAD83 / Montana
wkt:
PROJCRS["NAD83 / Montana",
BASEGEOGCRS["NAD83",
DATUM["North American Datum 1983",
ELLIPSOID["GRS 1980",6378137,298.257222101,
LENGTHUNIT["metre",1]]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
ID["EPSG",4269]],
CONVERSION["SPCS83 Montana zone (meter)",
METHOD["Lambert Conic Conformal (2SP)",
ID["EPSG",9802]],
PARAMETER["Latitude of false origin",44.25,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8821]],
PARAMETER["Longitude of false origin",-109.5,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8822]],
PARAMETER["Latitude of 1st standard parallel",49,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8823]],
PARAMETER["Latitude of 2nd standard parallel",45,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8824]],
PARAMETER["Easting at false origin",600000,
LENGTHUNIT["metre",1],
ID["EPSG",8826]],
PARAMETER["Northing at false origin",0,
LENGTHUNIT["metre",1],
ID["EPSG",8827]]],
CS[Cartesian,2],
AXIS["easting (X)",east,
ORDER[1],
LENGTHUNIT["metre",1]],
AXIS["northing (Y)",north,
ORDER[2],
LENGTHUNIT["metre",1]],
USAGE[
SCOPE["Engineering survey, topographic mapping."],
AREA["United States (USA) - Montana - counties of Beaverhead; Big Horn; Blaine; Broadwater; Carbon; Carter; Cascade; Chouteau; Custer; Daniels; Dawson; Deer Lodge; Fallon; Fergus; Flathead; Gallatin; Garfield; Glacier; Golden Valley; Granite; Hill; Jefferson; Judith Basin; Lake; Lewis and Clark; Liberty; Lincoln; Madison; McCone; Meagher; Mineral; Missoula; Musselshell; Park; Petroleum; Phillips; Pondera; Powder River; Powell; Prairie; Ravalli; Richland; Roosevelt; Rosebud; Sanders; Sheridan; Silver Bow; Stillwater; Sweet Grass; Teton; Toole; Treasure; Valley; Wheatland; Wibaux; Yellowstone."],
BBOX[44.35,-116.07,49.01,-104.04]],
ID["EPSG",32100]]
<- st_transform(aoi_boundary_YNP,CRS("+proj=longlat +datum=WGS84")) #choose a projection and datum that every spatial object we add to the map will be converted to before plotting
aoi_boundary_YNP_WGS84 st_crs(aoi_boundary_YNP_WGS84) #check coordinate system to ensure projection worked
Coordinate Reference System:
User input: GEOGCRS["unknown",
DATUM["World Geodetic System 1984",
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]],
ID["EPSG",6326]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8901]],
CS[ellipsoidal,2],
AXIS["longitude",east,
ORDER[1],
ANGLEUNIT["degree",0.0174532925199433,
ID["EPSG",9122]]],
AXIS["latitude",north,
ORDER[2],
ANGLEUNIT["degree",0.0174532925199433,
ID["EPSG",9122]]]]
wkt:
GEOGCRS["unknown",
DATUM["World Geodetic System 1984",
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]],
ID["EPSG",6326]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8901]],
CS[ellipsoidal,2],
AXIS["longitude",east,
ORDER[1],
ANGLEUNIT["degree",0.0174532925199433,
ID["EPSG",9122]]],
AXIS["latitude",north,
ORDER[2],
ANGLEUNIT["degree",0.0174532925199433,
ID["EPSG",9122]]]]
#re examine features aoi_boundary_YNP_WGS84
Simple feature collection with 1 feature and 4 fields
Geometry type: POLYGON
Dimension: XY
Bounding box: xmin: -111.1543 ymin: 44.13245 xmax: -109.8339 ymax: 45.10785
Geodetic CRS: GEOGCRS["unknown",
DATUM["World Geodetic System 1984",
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]],
ID["EPSG",6326]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8901]],
CS[ellipsoidal,2],
AXIS["longitude",east,
ORDER[1],
ANGLEUNIT["degree",0.0174532925199433,
ID["EPSG",9122]]],
AXIS["latitude",north,
ORDER[2],
ANGLEUNIT["degree",0.0174532925199433,
ID["EPSG",9122]]]]
AREA PERIMETER AB67_ AB67_ID geometry
1 8842796032 447129.5 2 649 POLYGON ((-110.0112 45.0478...
#Plot
<- read.csv("../data/combtab.csv")
combtab <- subset(combtab, species_name == "Pinus contorta")
combtab_Pincon <- as.data.frame(aoi_boundary_YNP_WGS84, xy = TRUE)
aoi_boundary_YNP_WGS84_df <- ggplot() +
pinusmap2 theme_classic() +
geom_tile(data = PinusSP_raster_df, aes(x, y, fill = mean)) +
geom_map(data = north_america_map,
map = north_america_map,
aes(map_id = region), fill = "NA", color = "lightgray", size = 0.2) +
scale_fill_viridis_c(na.value = "white", option = "viridis", direction = 1, begin =0.6) +
xlab("") + ylab("") +
scale_y_continuous(breaks = seq(-15, 75, 15), limits = c(20, 70)) +
scale_x_continuous(breaks = seq(-160, -60, 40), limits = c(-165, -50)) +
theme(legend.position = "none") +
geom_point(data = combtab_Pincon,
aes(x = lon, y = lat), pch = 3, size = 3, stroke = 0.9, color = alpha("black", 0.9)) +
geom_sf(data = aoi_boundary_YNP_WGS84, fill = "transparent", lwd = 0.25, color = "blue")
pinusmap2
#ggsave("pinusmap20240624_1.pdf", pinusmap2, width = 20, height = 8, units = "cm")
Figure 2B: Map of Artemisia tridentata gbif records
# Use occ_search() to query GBIF records
<- occ_search(taxonKey = 9396703, limit = 20000) #16,738 found
ArtTri_occurrences_2 head(ArtTri_occurrences_2, 2)
$meta
$meta$offset
[1] 19800
$meta$limit
[1] 200
$meta$endOfRecords
[1] FALSE
$meta$count
[1] 20276
$hierarchy
$hierarchy[[1]]
name key rank
1 Plantae 6 kingdom
2 Tracheophyta 7707728 phylum
3 Magnoliopsida 220 class
4 Asterales 414 order
5 Asteraceae 3065 family
6 Artemisia 3120641 genus
7 Artemisia tridentata 9396703 species
# Count matches
<- data.frame(ArtTri_occurrences_2$data) ArtTri_occurrences_df
# Obtain coordinate data and project to desired CRS
<- data.frame(x_coords = ArtTri_occurrences_df$decimalLongitude, y_coords = ArtTri_occurrences_df$decimalLatitude)
ArtemisiaSP head(ArtemisiaSP, 2)
x_coords y_coords
1 -112.2500 41.05834
2 -112.2562 41.03242
<- subset(ArtemisiaSP, x_coords !="")
ArtemisiaSP
<- terra::vect(ArtemisiaSP, geom=c("x_coords", "y_coords"), crs="epsg:4326")
ArtemisiaSP
# Check objects for plotting
head(t.mean.addvalues, 2)
mean
1 NA
2 NA
<- t.mean.addvalues
ArtemisiaSP_raster
# Extract cell numbers where coordinates match
<- terra::cellFromXY(ArtemisiaSP_raster, terra::crds(ArtemisiaSP))
matching_cells
# Set the values in ArtemisiaSP_raster to 1 where coordinates match
<- 1
ArtemisiaSP_raster[matching_cells] head(ArtemisiaSP_raster, 2)
mean
1 NA
2 NA
::plot(ArtemisiaSP_raster) terra
<- terra::as.points(ArtemisiaSP_raster)
ArtemisiaSP_raster_dfpoints
# Convert to data frame
<- as.data.frame(ArtemisiaSP_raster_dfpoints, geom="XY")
ArtemisiaSP_raster_df head(ArtemisiaSP_raster_df, 2)
mean x y
1 0 -38.91667 83.58333
2 0 -38.75000 83.58333
# Plot using ggplot
<- subset(ArtemisiaSP_raster_df, mean > 0)
ArtemisiaSP_raster_df head(ArtemisiaSP_raster_df, 2)
mean x y
207738 1 -122.0833 52.08333
209104 1 -122.5833 51.91667
#Plot
<- subset(combtab, species_name == "Artemisia tridentata")
combtab_Arttri <- ggplot() +
Artemisiamap3 theme_classic() +
geom_tile(data = ArtemisiaSP_raster_df, aes(x, y, fill = mean)) +
geom_map(data = north_america_map, map = north_america_map, aes(map_id = region), fill = "NA", color = "lightgray", size = 0.2) +
scale_fill_viridis_c(na.value = "white", option = "inferno", direction = -1, begin = 0.5) +
xlab("") + ylab("") +
scale_y_continuous(breaks = seq(-15, 75, 15), limits = c(20, 70)) +
scale_x_continuous(breaks = seq(-160, -60, 40), limits = c(-165, -50)) +
theme(legend.position = "none") +
geom_point(data = combtab_Arttri, aes(x = lon, y = lat), pch = 8, size = 3, stroke = 0.9, color = alpha("black", 0.9)) + geom_sf(data = aoi_boundary_YNP_WGS84, fill = "transparent", lwd = 0.25, color = "blue")
Artemisiamap3
#ggsave("Artemisiamap4_20240626_1.pdf", Artemisiamap3, width = 20, height = 8, units = "cm")