Mapping global coverage from local sampling

Import the records once the download is finished; this could take a while on a slow connection.

tic() # starts a timer
records <- occ_download_get('0066939-241126133413365') # generated in `Fetching plant occurrence records from GBIF`
Download file size: 1934.4 MB
file exists & overwrite=FALSE, not overwriting...
data <- occ_download_import(records, select=c("scientificName","species","taxonKey", "speciesKey","year","decimalLongitude","decimalLatitude","countryCode"))
toc() # ends the timer
49.024 sec elapsed
beep(2)

Inspect the lat/long values.

cat("Longitude extremes:", min(data$decimalLongitude), max(data$decimalLongitude))
Longitude extremes: -179.9753 179.9876
cat("Latitude extremes:", min(data$decimalLatitude), max(data$decimalLatitude))
Latitude extremes: -55.04704 85.75

Filter out points at the edges of flat map, they are causes some stretching issues when building hex bins.

data2 <- data %>%
  filter(between(decimalLatitude, -85, 85)) %>%
  filter(between(decimalLongitude, -175, 175))

Convert 18.5 Mil records into H3 hex shapes

We use the Uber H3 hexagon system for binning records into spatial areas to plot densities.

Convert df to sf

data_sf <- st_as_sf(x = data2,                         
           coords = c("decimalLongitude", "decimalLatitude"),
           crs = 4326)

Check # of species vs # of scientific name

cat(length(unique(data_sf$scientificName)), length(unique(data_sf$species)), length(unique(data_sf$taxonKey)), length(unique(data_sf$speciesKey)))
3941 311 3941 311
species <- as.data.frame(unique(data_sf$species))
species
          unique(data_sf$species)
1                  Holcus lanatus
2                Senecio vulgaris
3            Hypochaeris radicata
4            Achillea millefolium
5            Agrostis stolonifera
6              Juncus articulatus
7               Prunella vulgaris
8      Chamaenerion angustifolium
9           Deschampsia cespitosa
10              Spergularia rubra
11               Trifolium repens
12               Ranunculus acris
13          Ranunculus sceleratus
14          Eleocharis acicularis
15                  Festuca rubra
16           Phalaris arundinacea
17                Phleum pratense
18                  Sonchus asper
19           Eleocharis palustris
20                  Poa pratensis
21                   Rubus idaeus
22               Sonchus arvensis
23          Potamogeton gramineus
24              Cerastium arvense
25                Carex canescens
26                 Galium boreale
27             Juniperus communis
28              Tragopogon dubius
29                Bromus tectorum
30                 Erigeron acris
31                Thlaspi arvense
32         Campanula rotundifolia
33            Onopordum acanthium
34             Limosella aquatica
35          Asparagus officinalis
36             Veronica peregrina
37             Koeleria macrantha
38                  Poa palustris
39               Dianthus armeria
40            Solidago canadensis
41             Alyssum alyssoides
42            Lepidium virginicum
43               Myosurus minimus
44           Allium schoenoprasum
45                Euphorbia esula
46              Chondrilla juncea
47                 Bromus inermis
48           Potentilla norvegica
49            Xanthium strumarium
50                Agrostis scabra
51                   Carex limosa
52                     Poa alpina
53                 Phleum alpinum
54             Astragalus alpinus
55             Myosotis alpestris
56              Eritrichium nanum
57            Potentilla argentea
58                Alyssum simplex
59                  Oxyria digyna
60              Sagina saginoides
61            Anthoxanthum nitens
62              Bromus squarrosus
63            Dasiphora fruticosa
64            Persicaria amphibia
65                   Viola adunca
66           Collinsia parviflora
67               Agrostis exarata
68            Lepidium appelianum
69            Erythranthe lewisii
70       Ranunculus eschscholtzii
71          Hesperochiron pumilus
72          Maianthemum racemosum
73                   Actaea rubra
74       Lithophragma parviflorum
75                   Acer glabrum
76           Aconitum columbianum
77           Taraxacum officinale
78            Populus tremuloides
79            Fragaria virginiana
80             Iris missouriensis
81           Aquilegia flavescens
82              Sedum lanceolatum
83                 Mahonia repens
84        Polemonium pulcherrimum
85       Ceratocephala orthoceras
86       Spiranthes romanzoffiana
87           Ranunculus uncinatus
88             Veronica americana
89                 Allium cernuum
90           Platanthera dilatata
91        Ranunculus alismifolius
92                 Elymus glaucus
93                Viola nuttallii
94                Neottia cordata
95              Wyethia sagittata
96                  Linum lewisii
97         Geranium viscosissimum
98               Carex duriuscula
99            Agropyron cristatum
100              Phlox longifolia
101            Lysimachia ciliata
102              Bromus japonicus
103        Sporobolus cryptandrus
104             Festuca octoflora
105                Elymus smithii
106             Danthonia spicata
107          Sphaeralcea coccinea
108          Equisetum laevigatum
109             Tamarix chinensis
110              Bromus carinatus
111              Elymus elymoides
112           Hesperostipa comata
113             Plantago elongata
114        Polygonum ramosissimum
115       Castilleja linariifolia
116             Glyceria borealis
117      Calamagrostis canadensis
118         Geranium richardsonii
119       Pseudoroegneria spicata
120         Schoenoplectus acutus
121              Boechera stricta
122           Camelina microcarpa
123               Hordeum jubatum
124           Gayophytum diffusum
125           Potentilla hippiana
126           Polygonum douglasii
127         Eleocharis rostellata
128          Torreyochloa pallida
129             Collomia linearis
130              Elymus violaceus
131                Pinus contorta
132                Rumex fueginus
133  Schoenoplectus subterminalis
134        Androsace occidentalis
135            Descurainia incana
136       Delphinium nuttallianum
137          Eriogonum umbellatum
138     Androsace septentrionalis
139                Carex obtusata
140          Senecio triangularis
141         Beckmannia syzigachne
142      Taraxacum erythrospermum
143                    Poa glauca
144              Koeleria spicata
145        Taraxacum ceratophorum
146                Phlox gracilis
147         Scolochloa festucacea
148               Salvia nemorosa
149             Anemone multifida
150     Deschampsia danthonioides
151                   Poa secunda
152          Penstemon fruticosus
153                  Packera cana
154           Potentilla gordonii
155               Carex vallicola
156         Veronica wormskjoldii
157        Delphinium occidentale
158      Symphyotrichum ascendens
159             Lupinus argenteus
160   Leptosiphon septentrionalis
161       Puccinellia nuttalliana
162   Symphyotrichum bracteolatum
163          Erigeron caespitosus
164          Helianthus nuttallii
165            Carex pachystachya
166      Symphyotrichum foliaceum
167                Geum triflorum
168             Oxytropis sericea
169                Carex simulata
170             Mertensia ciliata
171          Festuca brachyphylla
172            Eremogone congesta
173             Penstemon deustus
174             Rumex paucifolius
175         Helianthella uniflora
176              Astragalus miser
177        Eriogonum heracleoides
178          Tetradymia canescens
179                  Carex rossii
180             Carex brunnescens
181            Erigeron glabellus
182        Calochortus eurycarpus
183           Astragalus agrestis
184                Carex petasata
185                  Poa cusickii
186          Lomatium triternatum
187       Chenopodiastrum simplex
188        Polemonium occidentale
189         Castilleja rhexifolia
190                Carex disperma
191              Erigeron pumilus
192        Antennaria microphylla
193               Agoseris glauca
194       Scrophularia lanceolata
195            Castilleja miniata
196        Plagiobothrys scouleri
197            Astragalus purshii
198            Taraxia subacaulis
199              Stenotus acaulis
200          Eremopyrum triticeum
201          Boechera retrofracta
202               Leymus cinereus
203         Micranthes rhomboidea
204              Phlox multiflora
205          Eriocoma lettermanii
206         Alyssum turkestanicum
207          Cherleria obtusiloba
208             Senecio crassulus
209            Carex chalciolepis
210               Carex elynoides
211         Clematis hirsutissima
212           Heterotheca villosa
213                     Poa arida
214               Draba albertina
215        Polygonum polygaloides
216          Lomatium macrocarpum
217             Iliamna rivularis
218            Carex praegracilis
219                Draba nemorosa
220          Danthonia intermedia
221          Penstemon attenuatus
222             Castilleja pilosa
223            Festuca idahoensis
224            Penstemon procerus
225               Veronica biloba
226         Epilobium hornemannii
227             Myosurus apetalus
228             Draba oligosperma
229                  Poa interior
230             Crepis modocensis
231            Orthocarpus luteus
232   Drymocallis pseudorupestris
233      Toxicoscordion venenosum
234              Eriogonum flavum
235              Carex raynoldsii
236          Erigeron ochroleucus
237             Draba crassifolia
238             Penstemon cyaneus
239              Elymus scribneri
240             Carex spectabilis
241        Chenopodium atrovirens
242         Saxifraga bronchialis
243         Solidago multiradiata
244                   Poa reflexa
245         Eriogonum caespitosum
246       Stephanomeria runcinata
247               Carex filifolia
248             Geum macrophyllum
249     Muhlenbergia richardsonis
250                Festuca kingii
251          Castilleja pulchella
252    Calamagrostis purpurascens
253           Erigeron corymbosus
254       Thalictrum sparsiflorum
255        Thalictrum occidentale
256         Epilobium lactiflorum
257                Glyceria elata
258                Carex parryana
259         Oreocarya spiculifera
260      Fritillaria atropurpurea
261               Lactuca biennis
262            Allium brevistylum
263             Boechera lemmonii
264        Muhlenbergia glomerata
265                 Draba globosa
266                 Poa leptocoma
267         Wyethia helianthoides
268              Carex haydeniana
269                Carex paysonis
270          Claytonia multiscapa
271              Opuntia fragilis
272             Carex leporinella
273               Ionactis alpina
274             Eriocoma nelsonii
275      Symphyotrichum campestre
276                  Carex hoodii
277            Taraxia breviflora
278              Carex neurophora
279             Arabis pycnocarpa
280           Ceanothus velutinus
281           Parnassia fimbriata
282                 Silene parryi
283           Heuchera cylindrica
284              Dryas octopetala
285        Scheuchzeria palustris
286        Synthyris wyomingensis
287      Telesonix heucheriformis
288           Heuchera parviflora
289                    Draba cana
290         Calamagrostis stricta
291            Polygonum achoreum
292                 Draba incerta
293               Carex tahoensis
294               Ventenata dubia
295 Pedicularis cystopteridifolia
296       Castilleja crista-galli
297        Conimitella williamsii
298                Bromus porteri
299           Hieracium praealtum
300               Filago arvensis
301          Matricaria discoidea
302           Blitum nuttallianum
303               Koeleria vaseyi
304                Draba praealta
305              Arabis nuttallii
306             Mimulus moschatus
307             Physaria reediana
308         Eriocoma richardsonii
309              Carex stenoptila
310         Ranunculus mongolicus
311              Castilleja nivea

Get CRS info.

raster::crs(data_sf)
[1] "GEOGCRS[\"WGS 84\",\n    ENSEMBLE[\"World Geodetic System 1984 ensemble\",\n        MEMBER[\"World Geodetic System 1984 (Transit)\"],\n        MEMBER[\"World Geodetic System 1984 (G730)\"],\n        MEMBER[\"World Geodetic System 1984 (G873)\"],\n        MEMBER[\"World Geodetic System 1984 (G1150)\"],\n        MEMBER[\"World Geodetic System 1984 (G1674)\"],\n        MEMBER[\"World Geodetic System 1984 (G1762)\"],\n        MEMBER[\"World Geodetic System 1984 (G2139)\"],\n        ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n            LENGTHUNIT[\"metre\",1]],\n        ENSEMBLEACCURACY[2.0]],\n    PRIMEM[\"Greenwich\",0,\n        ANGLEUNIT[\"degree\",0.0174532925199433]],\n    CS[ellipsoidal,2],\n        AXIS[\"geodetic latitude (Lat)\",north,\n            ORDER[1],\n            ANGLEUNIT[\"degree\",0.0174532925199433]],\n        AXIS[\"geodetic longitude (Lon)\",east,\n            ORDER[2],\n            ANGLEUNIT[\"degree\",0.0174532925199433]],\n    USAGE[\n        SCOPE[\"Horizontal component of 3D system.\"],\n        AREA[\"World.\"],\n        BBOX[-90,-180,90,180]],\n    ID[\"EPSG\",4326]]"

This step pairs lat long into a point geometry.

st_transform(data_sf, 4326)
Simple feature collection with 20503896 features and 6 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: -174.9518 ymin: -55.04704 xmax: 175 ymax: 84.75
Geodetic CRS:  WGS 84
# A tibble: 20,503,896 × 7
   scientificName          species         taxonKey speciesKey  year countryCode
 * <chr>                   <chr>              <int>      <int> <int> <chr>      
 1 Holcus lanatus L.       Holcus lanatus   2706164    2706164  2018 FR         
 2 Holcus lanatus L.       Holcus lanatus   2706164    2706164  2018 FR         
 3 Senecio vulgaris L.     Senecio vulgar…  3108983    3108983  1985 FR         
 4 Hypochaeris radicata L. Hypochaeris ra…  3093702    3093702  2019 FR         
 5 Achillea millefolium L. Achillea mille…  3120060    3120060  2015 FR         
 6 Agrostis stolonifera L. Agrostis stolo…  2706435    2706435  1994 FR         
 7 Achillea millefolium L. Achillea mille…  3120060    3120060  2000 FR         
 8 Senecio vulgaris L.     Senecio vulgar…  3108983    3108983  2009 FR         
 9 Juncus articulatus L.   Juncus articul…  2701261    2701261  1991 FR         
10 Prunella vulgaris L.    Prunella vulga…  5341297    5341297  1999 FR         
# ℹ 20,503,886 more rows
# ℹ 1 more variable: geometry <POINT [°]>

Toss records without a species assigned and randomly subsample per species.

data_sf <- data_sf %>%
  filter(species != "") %>%
  group_by(species) %>%
  #slice_sample(n = params$random_sample_n) # number of samples
  slice_sample(prop = params$random_sample_prop) # proportion of samples

Make a point for the centroid of YNP (middle of Yellowstone Lake), to use for distance calculations.

YNP_centroid <- st_point(c(-110.40, 44.45)) %>%
  st_coordinates() %>%
  as.data.frame() %>%
  st_as_sf(coords = c("X", "Y"), crs = 4326)

usa = st_as_sf(map('state', plot = FALSE, fill = TRUE))

ggplot() + 
  geom_sf(data = usa) +
  geom_sf(data = YNP_centroid, aes(geometry=geometry), pch = 19, color = 'darkturquoise')

This block will calculate the distances from the center of Yellowstone for each row in the dataset.

tic()
 data_sf <- data_sf %>%
  mutate(
    dist = st_distance(geometry, YNP_centroid) %>%
  set_units("km"))
beep(2)
toc()
91.123 sec elapsed

Summarize the distances to get a count, average distance, and sd of distance for each species.

This will output a dataframe where each row in one of the 279 species with occurrence records in GBIF. The geometries are also merged to a multipoint so a minimum convex polygon for a single species could be plotted easily.

tic()
species_dist <- data_sf %>%
  group_by(species) %>%
  summarise(
    count = n(),
    min_dist = min(dist),
    max_dist = max(dist),
    mean_dist = mean(dist),
    sd_dist = sd(dist)
  )
beep(2)
summary(species_dist)
   species              count            min_dist           max_dist    
 Length:311         Min.   :     26   Min.   :   1.268   Min.   :  174  
 Class :character   1st Qu.:   1083   1st Qu.:  12.825   1st Qu.: 3099  
 Mode  :character   Median :   3751   Median :  31.953   Median : 8666  
                    Mean   :  65929   Mean   :  61.950   Mean   : 8562  
                    3rd Qu.:  15874   3rd Qu.:  53.920   3rd Qu.:12698  
                    Max.   :1392917   Max.   :5688.516   Max.   :19489  
   mean_dist         sd_dist                 geometry  
 Min.   : 108.5   Min.   :  30.38   MULTIPOINT   :311  
 1st Qu.: 710.5   1st Qu.: 328.71   epsg:4326    :  0  
 Median :1068.3   Median : 786.85   +proj=long...:  0  
 Mean   :2889.7   Mean   :1124.93                      
 3rd Qu.:6019.3   3rd Qu.:1576.01                      
 Max.   :8737.6   Max.   :5122.90                      
toc()
628.331 sec elapsed

Output a file with summary data about distances!

species_dist %>%
  st_drop_geometry() %>%
  write.csv("./distance_from_YNP_by_species.csv")

Use this to get a few CRS codes to try for projection if things look off.

suggest_crs(data_sf)
# A tibble: 10 × 6
   crs_code crs_name                        crs_type crs_gcs crs_units crs_proj4
   <chr>    <chr>                           <chr>      <dbl> <chr>     <chr>    
 1 6931     WGS 84 / NSIDC EASE-Grid 2.0 N… project…    4326 m         +proj=la…
 2 3395     WGS 84 / World Mercator         project…    4326 m         +proj=me…
 3 3857     WGS 84 / Pseudo-Mercator        project…    4326 m         +proj=me…
 4 6933     WGS 84 / NSIDC EASE-Grid 2.0 G… project…    4326 m         +proj=ce…
 5 3832     WGS 84 / PDC Mercator           project…    4326 m         +proj=me…
 6 6932     WGS 84 / NSIDC EASE-Grid 2.0 S… project…    4326 m         +proj=la…
 7 8903     RGWF96 / UTM zone 1S            project…    8900 m         +proj=ut…
 8 3576     WGS 84 / North Pole LAEA Russia project…    4326 m         +proj=la…
 9 3575     WGS 84 / North Pole LAEA Europe project…    4326 m         +proj=la…
10 3574     WGS 84 / North Pole LAEA Atlan… project…    4326 m         +proj=la…

Pull geospatial hex locations for each point set.

data_sf$h3_index <- geo_to_h3(data_sf, params$resolution) 

Group by hex bin and count species in each one.

# Check that the number of unique bins is much less than the number of rows
length(unique(data_sf$h3_index))
[1] 7426
data_sf2 <- data_sf %>%
  group_by(h3_index) %>%
  summarise(distinct_species = n_distinct(species))
beep(2)

Count the number of records in each hex bin.

hex_freq <- as.data.frame(table(data_sf2$h3_index))

Get the coordinates of of each hex bin.

data_hex <- h3_to_geo_boundary_sf(data_sf2$h3_index)

Merge the bin coords with frequency data for plotting.

plot_data <- merge(as.data.frame(data_sf2), data_hex, by="h3_index")

Report out the number of hexagons, and the min and max number of species in any one hex.

cat(length(unique(plot_data$h3_index)), min(plot_data$distinct_species), max(plot_data$distinct_species))
7426 1 276

Build the map

worldmap <- ne_countries(scale = 'large', returnclass = 'sf')
ggplot() + 
  geom_sf(data = worldmap$geometry) +
  geom_sf(data = plot_data, aes(geometry = geometry.y, fill=distinct_species)) +
  scale_fill_viridis_c(option="turbo") +
  ggtitle("Count of species in each cell") +
  theme_classic() + theme(legend.title = element_blank()) 

ggsave("BOLD_GBIF_12400km2.png") # uncomment to save a copy
Saving 7 x 5 in image