generated from WHOIGit/minimal-edi-package
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnes-lter-events-transect.Rmd
179 lines (154 loc) · 7.81 KB
/
nes-lter-events-transect.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
---
title: "NES-LTER compiled event log packaging workflow"
author: "Joe Futrelle"
date: "2022-06-14"
output: html_notebook
---
Libraries used
```{r}
# two of the required packages are installed from GitHub
# library(remotes)
# remotes::install_github("EDIorg/EMLassemblyline")
# remotes::install_github("WHOIGit/ediutilities")
library(EMLassemblyline)
library(ediutilities)
library(here)
library(lubridate)
library(stringr)
library(dplyr)
library(readr)
base_api_url <- 'https://nes-lter-data.whoi.edu/api/events/'
```
Construct and clean raw data table
```{r}
package_name = 'nes-lter-events-transect'
cruise_list <- api_list_cruises()
all_events <- NA
print(cruise_list)
for(cruise_id in cruise_list) {
cruise_id <- str_to_lower(cruise_id)
url <- glue::glue('{base_api_url}{cruise_id}.csv')
print(url)
event <- tryCatch(read_csv(url),
error=function(cond)
{ message(cond)
return(NULL) })
if(nrow(event) > 0 && !is.null(event)) {
event$Cruise <- cruise_id
# Reorder to make Cruise the first column
event <- event %>% select(Cruise, everything())
if(length(all_events) == 1 && is.na(all_events)) {
all_events <- event
} else {
all_events <- plyr::rbind.fill(all_events, event)
}
}
}
data_table = all_events
data_table$dateTime8601 <- ymd_hms(data_table$dateTime8601)
data_table <- data_table %>% arrange(dateTime8601)
data_table$Cast <- str_replace(data_table$Cast,'\\.0','')
data_table <- data_table %>% select(-c('Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13'))
data_table$Station <- str_replace(data_table$Station, 'Cast*\\n[0-9]+ ', '')
data_table$Station <- str_replace(data_table$Station, '\\n.*', '')
data_table$Station <- str_replace(data_table$Station, '\\nName:.*', '')
data_table <- data_table %>% rename(Message.ID = `Message ID`)
data_table$Comment <- str_replace_all(data_table$Comment, ',', ';')
# add project_id column
cpid <- read.csv(here('cruise_project_id.csv'))
data_table <- data_table %>% left_join(cpid, by = "Cruise")
# exclude OOI instruments
ooi_instruments <- str_split('CPM, CSM, Falcon ROV, Glider, Kraken ROV, REMUS, Slocum Glider, USBL', ', ')[[1]]
data_table <- data_table %>% filter(!Instrument %in% ooi_instruments)
data_table$Station[is.na(data_table$Station)] <- ""
data_table$Cast[is.na(data_table$Cast)] <- ""
data_table$Comment[is.na(data_table$Comment)] <- ""
data_table$Message.ID[is.na(data_table$Message.ID)] <- "NaN"
data_table$dateTime8601 <- as.character(data_table$dateTime8601)
data_table$dateTime8601[is.na(data_table$dateTime8601)] <- ""
data_table$Latitude[is.na(data_table$Latitude)] <- "NaN"
data_table$Longitude[is.na(data_table$Longitude)] <- "NaN"
# Regularize Instrument names
data_table$Instrument[data_table$Instrument == "Bongo"] <- "Bongo Net"
data_table$Instrument[data_table$Instrument == "Attune"] <- "Attune Flow Cytometer"
data_table$Instrument[data_table$Instrument == "Cytomeoter"] <- "Cytometer"
data_table$Instrument[data_table$Instrument == "EK80"] <- "EK80 broadband"
data_table$Instrument[data_table$Instrument == "Incubation"] <- "Incubation Grazing"
data_table$Instrument[data_table$Instrument == "Incubation O2"] <- "Incubation Respiration O2"
data_table$Instrument[data_table$Instrument == "RingNet"] <- "Ring Net"
data_table$Instrument[data_table$Instrument == "SUNA V2"] <- "SUNAV2"
data_table$Instrument[data_table$Instrument == "Thermosalinograph SBE45"] <- "Thermosalinographs on underway impeller "
data_table$Instrument[data_table$Instrument == "trans10"] <- "Transmissometer 10cm"
data_table$Instrument[data_table$Instrument == "Transmissometer 10"] <- "Transmissometer 10cm"
data_table$Instrument[data_table$Instrument == "trans25"] <- "Transmissometer 25cm"
data_table$Instrument[data_table$Instrument == "Underway diaphram pump"] <- "Underway Science seawater diaphragm pump"
data_table$Instrument[data_table$Instrument == "Underway Science Seawater Diaphragm Pump"] <- "Underway Science seawater diaphragm pump"
data_table$Instrument[data_table$Instrument == "SSW"] <- "Underway Science seawater diaphragm pump"
data_table$Instrument[data_table$Instrument == "Underway Impeller"] <- "Underway Science seawater impeller"
data_table$Instrument[data_table$Instrument == "IFCB_continuous"] <- "IFCB continuous"
data_table$Instrument[data_table$Instrument == "RingNetIFCB Continuous"] <- "IFCB continuous"
data_table$Instrument[data_table$Instrument == "IFCB Continuous"] <- "IFCB continuous"
data_table$Instrument[data_table$Instrument == "IFCB 109"] <- "IFCB continuous"
# Fix IFCB 102 entries
# ar24b - remove all of IFCB102 except the action of bucket
data_table <- data_table %>%
filter(!(Cruise == "ar24b" & Instrument == "IFCB 102" & Action != "bucket"))
# ar24a - remove IFCB102 with action start/station 'LTER Station 1'
data_table <- data_table %>%
filter(!(Cruise == "ar24a" & Instrument == "IFCB 102" &
Action == "start" & Station == "LTER Station 1"))
# ar24a & ar24b - IFCB102 with action 'bucket' (change Instrument to
# BucketSample; change action to 'start'; append comment with '; IFCB discrete')
data_table <- data_table %>%
mutate(
Instrument = ifelse(Cruise %in% c("ar24a", "ar24b") & Instrument == "IFCB 102" & Action == "bucket",
"BucketSample", Instrument),
Action = ifelse(Cruise %in% c("ar24a", "ar24b") & Instrument == "BucketSample" & Action == "bucket",
"start", Action),
Comment = ifelse(Cruise %in% c("ar24a", "ar24b") & Instrument == "BucketSample" & Action == "start",
paste0(Comment, "; IFCB discrete"), Comment)
)
# ar24a - IFCB102 with action 'underway' change Instrument to Underway Science
# seawater diaphragm pump; change action to 'IFCB_discrete'
data_table <- data_table %>%
mutate(
Instrument = ifelse(Cruise == "ar24a" & Instrument == "IFCB 102" & Action =="underway", "Underway Science seawater diaphragm pump", Instrument),
Action = ifelse(Cruise == "ar24a" & Instrument == "Underway Science seawater diaphragm pump" & Action == "underway",
"IFCB_discrete", Action)
)
# ar28a - IFCB continuous (all of IFCB102)
data_table <- data_table %>%
mutate(Instrument = ifelse(Cruise == "ar28a" & Instrument == "IFCB 102",
"IFCB continuous", Instrument))
product_table_filename <- paste0(package_name, '.csv')
write.table(data_table, here(product_table_filename), row.names = FALSE, sep = ",", qmethod = "escape", quote = FALSE)
```
Read the Excel metadata template and generate text templates used by
EMLassemblyline
```{r}
excel_to_template(here('nes-lter-events-transect-info'), package_name, rights='CC0', file_type=".md")
# Data Coverage
lat <- as.numeric(ifelse(data_table$Latitude == "NaN", NA, data_table$Latitude))
lon <- as.numeric(ifelse(data_table$Longitude == "NaN", NA, data_table$Longitude))
north <- round(max(lat, na.rm = TRUE), 5)
south <- round(min(lat, na.rm = TRUE), 5)
east <- round(max(lon, na.rm = TRUE), 5)
west <- round(min(lon, na.rm = TRUE), 5)
```
Generate the package and insert the parent project node into the resulting EML
```{r}
pkg_id <- 'knb-lter-nes.20.2'
make_eml(here(),
dataset.title='Event logs from Northeast U.S. Shelf Long Term Ecological Research (NES-LTER) Transect cruises, ongoing since 2017',
data.table=product_table_filename,
data.table.description='Concatenated table of events recorded on seasonal NES-LTER Transect cruises',
data.table.name = 'nes-lter-events-transect',
temporal.coverage = temporal_coverage(data_table$dateTime8601),
geographic.description = "NES-LTER Transect",
geographic.coordinates = c(north, east, south, west),
maintenance.description = "ongoing",
user.id = "NES",
user.domain = "LTER",
package.id = pkg_id)
project_insert(pkg_id, "parent_project_NESI-II_Rapid_OOI.txt")
```