Install required libraries (only if the libraries have not been installed before):
installed <- rownames(installed.packages())
required <- c("tidyverse",
"tidylog",
"here",
"readxl",
"sf",
"DBI",
"RSQLite",
"digest",
"testthat"
)
if (!all(required %in% installed)) {
install.packages(required[!required %in% installed])
}
Load libraries:
library(tidyverse) # To do data science
## Warning: package 'ggplot2' was built under R version 4.2.2
## Warning: package 'purrr' was built under R version 4.2.2
library(tidylog) # To provide feedback on dplyr functions
library(here) # To find files
library(readxl) # To read Excel files
library(sf) # To convert coordinate systems
## Warning: package 'sf' was built under R version 4.2.2
library(DBI) # To work with databases
library(RSQLite) # To work with SQLite databases in R
## Warning: package 'RSQLite' was built under R version 4.2.2
library(digest) # To create hashes
## Warning: package 'digest' was built under R version 4.2.2
Create a data frame occurrences
from the source
data:
occurrences <- readxl::read_excel(
path = here("data", "raw", "Plantenmonitoring RIPARIAS (Vlaanderen).xlsx"),
sheet = "Data_inventarisaties_opgekuist",
col_types = "text")
Preview data:
occurrences %>% head(n = 10)
Abundance values:
occurrences %>% count(abundantie)
The value O / Occasional / >5%O / Occasional / >5%
is of course a typo. We improve it:
occurrences$abundantie <- recode(
occurrences$abundantie,
"O / Occasional / >5%O / Occasional / >5%" = "O / Occasional / >5%"
)
Check improved values:
occurrences %>% count(abundantie)
Column soort
contains vernacular names in Dutch,
sometimes with the scientific name as prefix:
occurrences %>%
distinct(soort)
We add the scientific name where missing: -
roze waterlelie
: Nymphaea Γmarliacea -
brede waterpest
: Elodea canadensis -
smalle waterpest
: Elodea nuttallii
occurrences <-
occurrences %>%
mutate(soort = recode(
soort,
"roze waterlelie" = "roze waterlelie / Nymphaea Γmarliacea",
"brede waterpest" = "brede waterpest / Elodea canadensis",
"smalle waterpest" = "smalle waterpest / Elodea nuttallii"
)
)
Show distinct values of column soort
:
occurrences %>%
distinct(soort)
Extract vernacular names from column soort
:
occurrences <-
occurrences %>%
mutate(
vernacularName = stringr::str_extract(
string = .data$soort,
pattern = "(.*?)(?=\\/)")) %>%
mutate(vernacularName = stringr::str_trim(.data$vernacularName))
Extract scientific names from column soort
:
occurrences <-
occurrences %>%
mutate(
scientificName = stringr::str_extract(
string = .data$soort,
pattern = "(?<=\\/)(.*)")) %>%
mutate(scientificName = stringr::str_trim(scientificName))
Mapped values:
occurrences %>% distinct(scientificName, vernacularName)
Create a SQLite database with the source data, so it can be queried with SQL in the next steps:
message("Create in memory SQLite database...")
con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
# import occurrences
DBI::dbWriteTable(con, "occurrences", occurrences)
message("DONE")
Create Event core:
dwc_event_sql <- glue::glue_sql(
readr::read_file(here::here("sql", "dwc_event.sql")),
.con = con
)
dwc_event <- DBI::dbGetQuery(con, dwc_event_sql)
Create Occurrence extension:
message("Map occurrences to DwC...")
dwc_occurrence_sql <- glue::glue_sql(
readr::read_file(here::here("sql", "dwc_occurrence.sql")),
.con = con
)
dwc_occurrence <- DBI::dbGetQuery(con, dwc_occurrence_sql)
message("DONE")
Create Measurement Or Facts extension:
dwc_mof_sql <- glue::glue_sql(
readr::read_file(here::here("sql", "dwc_mof.sql")),
.con = con
)
dwc_mof <- DBI::dbGetQuery(con, dwc_mof_sql)
Disconnect from SQLite database:
DBI::dbDisconnect(con)
write_csv(
x = dwc_event,
file = here::here("data", "processed", "event.csv"),
na = ""
)
write_csv(
x = dwc_occurrence,
file = here::here("data", "processed", "occurrence.csv"),
na = ""
)
write_csv(
x = dwc_mof,
file = here::here("data", "processed", "mof.csv"),
na = ""
)
Load tests and run them to validate the DwC mapping:
source(here::here("tests", "test_dwc_event_occurrence_mof.R"))
## Warning: package 'testthat' was built under R version 4.2.2
## Test passed πΈ
## Test passed π
## Test passed π
## Test passed π
## Test passed π
## Test passed π
## Test passed π
## Test passed π
## Test passed πΈ
## Test passed π₯
## Test passed π
## Test passed π₯³
## Test passed πΈ
## Test passed πΈ
## Test passed π₯³
## Test passed π₯³
## Test passed π₯³
## Test passed π
## Test passed π₯³
## Test passed π
## Test passed π₯³
## Test passed π
## Test passed πΈ
## Test passed π
## Test passed π
## Test passed π
## Test passed π
## Test passed πΈ
## Test passed π₯
## Test passed π₯
## Test passed π
## Test passed π
## Test passed π
## Test passed π
## Test passed π