-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathStoxBiotic.R
750 lines (610 loc) · 30.2 KB
/
StoxBiotic.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
#' Convert BioticData to StoxBioticData
#'
#' @inheritParams ModelData
#' @inheritParams general_arguments
#'
#' @return An object of StoX data type \code{\link{StoxBioticData}}.
#'
#' @seealso The definition of the \code{\link[=StoxBioticFormat]{StoxBiotic format}} and \code{\link{generalSamplingHierarhcy}}.
#'
#' @export
#'
StoxBiotic <- function(
BioticData
) {
# This can be used if we decide to introduce the UseHaulKeyAsStationKey:
# @param UseHaulKeyAsStationKey Logical: If TRUE the variable used as key in the Haul table (HaulKey) will also be used as key in the parent Station table (StationKey). This option can be used for biotic input data in cases where the station variable has been used for something different than a geographical grouping variable (to group hauls on the sam egeographical station) but e.g. to group one month of data from the Norwegian reference fleet.
# UseHaulKeyAsStationKey = FALSE
# Convert from BioticData to the general sampling hierarchy:
GeneralSamplingHierarchy <- BioticData2GeneralSamplingHierarchy(
BioticData,
NumberOfCores = 1L
#UseHaulKeyAsStationKey = UseHaulKeyAsStationKey
)
# Extract the StoxBiotic data and rbind across files:
StoxBioticData <- GeneralSamplingHierarchy2StoxBiotic(GeneralSamplingHierarchy, NumberOfCores = 1L)
# Warn if there are keys with missing values:
warningMissingKeys(
StoxData = StoxBioticData,
stoxDataFormat = "Biotic"
)
# Remove rows of duplicated keys:
#StoxBioticData <- removeRowsOfDuplicatedKeysFromStoxBioticData(StoxBioticData)
StoxBioticData <- removeRowsOfDuplicatedKeys(
StoxData = StoxBioticData,
stoxDataFormat = "Biotic"
)
# Order rows:
orderRowsByKeys(StoxBioticData)
# Ensure that the numeric values are rounded to the defined number of digits:
#setRstoxPrecisionLevel(StoxBioticData)
return(StoxBioticData)
}
# Function to convert each element (representing input files) a BioticData object to the general sampling hierarchy:
BioticData2GeneralSamplingHierarchy <- function(
BioticData,
NumberOfCores = 1L,
#AddToLowestTable = FALSE,
SplitTableAllocation = c("Default", "Lowest", "Highest")
#UseHaulKeyAsStationKey = FALSE
) {
# Make a copy, since we are modifying things by reference:
BioticDataCopy <- data.table::copy(BioticData)
# Run the first phase possibly on several cores:
lapplyOnCores(
BioticDataCopy,
FUN = StoxBiotic_firstPhase,
NumberOfCores = NumberOfCores,
#AddToLowestTable = AddToLowestTable
SplitTableAllocation = SplitTableAllocation
#UseHaulKeyAsStationKey = UseHaulKeyAsStationKey
)
}
# Function to convert rbind :
GeneralSamplingHierarchy2StoxBiotic <- function(GeneralSamplingHierarchy, NumberOfCores = integer()) {
# Run the second phase possibly on several cores:
StoxBioticData <- lapplyOnCores(
GeneralSamplingHierarchy,
FUN = StoxBiotic_secondPhase,
NumberOfCores = NumberOfCores
)
# Rbind for each StoxBiotic table:
StoxBioticData <- rbindlist_StoxFormat(StoxBioticData)
# Temporarily remove the Prey tables:
StoxBioticData <- StoxBioticData[!startsWith(names(StoxBioticData), "Prey")]
return(StoxBioticData)
}
# Function to rbind each table of the StoX format. The input is a list of either StoxBiotic or StoxAcoustic formats (not mixed in the same list):
rbindlist_StoxFormat <- function(x) {
# Skip elements with 0 rows:
x <- x[sapply(x, NROW) > 0]
if(!length(x)) {
return(x)
}
# Rbind for each StoX format table:
tableNames <- names(x[[1]])
x <- lapply(
tableNames,
function(name) data.table::rbindlist(lapply(x, "[[", name))
)
names(x) <- tableNames
return(x)
}
# Function to convert the data read from one type of biotic data into the general sampling hierarchy for biotic data defined by StoX:
#' @importFrom data.table .N setindexv
firstPhase <- function(
data,
datatype,
stoxBioticObject,
#AddToLowestTable = FALSE
SplitTableAllocation = c("Default", "Lowest", "Highest")
#UseHaulKeyAsStationKey = FALSE
) {
# Getting data for the datatype
#if(UseHaulKeyAsStationKey) {
# tableKey <- stoxBioticObject$tableKeyList_HaulAtStation[[datatype]]
#}
#else {
tableKey <- stoxBioticObject$tableKeyList[[datatype]]
#}
tableMap <- stoxBioticObject$tableMapList[[datatype]]
simpleTableMap <- tableMap[sapply(tableMap, function(x) length(x[[2]])) == 1]
originalParentTables <- stoxBioticObject$originalParentTables[[datatype]]
indageHeaders <- stoxBioticObject$indageHeadersList[[datatype]]
# 1. Merge: a) Cruise number with everything b) age reading and individual (specific to data source type)
if( datatype %in% c("nmdbioticv3", "nmdbioticv3.1") ) {
## If preferredagereading in indivdiual is NA, use 1 as the preferred reading
data$individual[,preferredagereading:= ifelse(is.na(preferredagereading), 1, preferredagereading)]
## Merge individual and age
data$individual <- merge(data$individual, data$agedetermination, by.x = c(indageHeaders, "preferredagereading"), by.y = c(indageHeaders, "agedeterminationid"), all.x = TRUE)
# Merge in the tag, but only if there is at most 1 tag per individual:
# Count the tags:
numberOfTagsPerIndividual <- data$tag[, .N, by = c("serialnumber", "catchsampleid", "specimenid")]$N
if(any(numberOfTagsPerIndividual > 1)) {
warning("Multiple tags detected for some individuals. tagid and tagtype added as NA.")
data$individual[, tagid := NA_character_]
data$individual[, tagtype := NA_character_]
}
else {
data$individual <- mergeByIntersect(data$individual, data$tag, all.x = TRUE)
}
# Store the names of the individual and prey tables:
individualNames <- names(data$individual)
preyNames <- names(data$prey)
## Merge individual and tag
#data$individual <- merge(data$individual, data$tag, by.x=indageHeaders, by.y=indageHeaders, all.x=TRUE)
## Cascading merge tables
toMerge <- c("mission", "fishstation", "catchsample", "individual", "prey")
data <- mergeDataTables(data, toMerge)
# For the individual and prey tables, what are not splitt by the complexMap later, keep only the original columns and the keys
}
else if(datatype %in% c("nmdbioticv1.1", "nmdbioticv1.4")) {
## Merge individual and age
indageHeaders <- intersect(names(data$agedetermination), names(data$individual))
data$individual[,preferredagereading:= 1]
data$individual <- merge(data$individual, data$agedetermination, by.x = c(indageHeaders, "preferredagereading"), by.y = c(indageHeaders, "no"), all.x=TRUE)
# Store the names of the individual and prey tables:
individualNames <- names(data$individual)
#preyNames <- names(data$prey)
## Cascading merge tables
toMerge <- c("mission", "fishstation", "catchsample", "individual")
# Use custom suffixes as it contains duplicate header name between tables
data <- mergeDataTables(data, toMerge)
}
else if(datatype == "icesBiotic") {
# Merge Cruise and Survey name
data[["Cruise"]][, Survey := tail(data[["Survey"]], 1)]
# Apply translations defined in the table 'vocabulary':
if(length(data$vocabulary)) {
tablesToTranslate <- setdiff(names(data), c("metadata", "vocabulary"))
tablesToTranslate <- tablesToTranslate[sapply(data[tablesToTranslate], nrow) > 0]
vocabulary <- findVariablesMathcinigVocabulary(
vocabulary = data$vocabulary,
data = data[tablesToTranslate]
)
# Uniqueify since some columns (keys) are present in several tables:
vocabulary <- unique(vocabulary)
translateVariables(
data = data[tablesToTranslate],
TranslationDefinition = "FunctionInput",
Translation = vocabulary,
translate.keys = TRUE
)
}
## Cascading merge tables
toMerge <- c("Cruise", "Haul", "Catch")
# Use custom suffixes as it contains duplicate header name between tables
data <- mergeDataTables(data, toMerge)
# Merge Biology manually as there is no obvious key relation with Catch table
toAdd <- c("WeightUnit", "LengthCode", "LengthClass")
setnames(data$Biology, toAdd, paste0(toAdd, ".Biology"))
# Set intersection column
byVars <- intersect(names(data$Catch), names(data$Biology))
# Get row count
nrowC <- nrow(data$Biology)
# Match keys of Catch and Biotic and obtain the indices in the Biology table for each row of the Catch table:
keyColumns <- intersect(names(data$Catch), names(data$Biology))
keysCatchPasted <- data$Catch[, do.call(paste0, .SD), .SDcols = keyColumns]
keysBiologyPasted <- data$Biology[, do.call(paste0, .SD), .SDcols = keyColumns]
BiologyIndex <- split(seq_along(keysBiologyPasted), match(keysBiologyPasted, keysCatchPasted))
catchIndicesInBiology <- as.list(double(length(keysCatchPasted)))
catchIndicesInBiology[as.numeric(names(BiologyIndex))] <- BiologyIndex
# All individuals stored in the Biology table have weight measured:
data$Biology[, WeightMeasurement := TRUE]
# Add the number of sampled individuals in the Biology table for each combination of the keys of the Catch table:
data$Biology[, NumberOfSampledIndividuals := .N, by = c(byVars, "LengthCode.Biology", "LengthClass.Biology")]
# Merge this into the Catch table:
byVarX <- c(byVars, "LengthCode", "LengthClass")
byVarY <- c(byVars, "LengthCode.Biology", "LengthClass.Biology")
data$Catch <- merge(
data$Catch,
unique(data$Biology[, c(byVarY, "NumberOfSampledIndividuals"), with=FALSE]),
by.x = byVarX,
by.y = byVarY,
all.x = TRUE,
sort = FALSE
)
# Translate NAs to 0, and calculate also the number of individuals to generate from average weigth:
data$Catch[is.na(NumberOfSampledIndividuals), NumberOfSampledIndividuals := 0]
data$Catch[, NumberOfIndividualsToGenerate := NumberAtLength - NumberOfSampledIndividuals]
# Get the indices of the rows of the Catch table for which we should generate individuals:
atGenerateIndividuals <- which(
data$Catch$NumberAtLength > data$Catch$NumberOfSampledIndividuals &
!is.na(data$Catch$LengthCode)
)
# Generate the individuals with average weights:
generatedBiology <- lapply(
atGenerateIndividuals,
specialMerge,
Catch = data$Catch,
byVars = byVars
)
# Combine results and merge add to the Biology table:
generatedBiology <- rbindlist(generatedBiology, use.names = TRUE)
data$Biology <- rbind(
data$Biology,
generatedBiology, # Place the generated individuals last.
fill = TRUE
)
# Merge in also the StationName from Haul to Biology, as it is not a key in the ICESBiotic:
data$Biology <- mergeByIntersect(data$Biology, unique(data$Catch[, c(byVars, "StationName"), with = FALSE]))
# Order by the keys:
data.table::setorderv(data$Biology, byVars)
# Sanity check (old Biology row number must be the same with the merged product, _if there is no WeightMeasurement == FALSE_)
if(nrowC != nrow(data$Biology[WeightMeasurement == TRUE,])) {
stop("Error in merging.")
}
### # Keep only the original columns and the "StationName", which is not a key in the ICESBiotic:
### columnsToKeep <- c(
### names(data$Biology),
### "StationName"
### )
### data$Biology <- data$Biology[, .SD, .SDcols = columnsToKeep]
# Fix Individual ID for those coming from Catch:
FishIDBy <- sapply(tableKey, "[[", 1)
FishIDBy <- FishIDBy[names(FishIDBy) != "Individual"]
# Does not work, as FishID is integer
#data$Biology[is.na(FishID), FishID := paste0("FromCatch_", seq_len(.N)), by = FishIDBy]
data$Biology[, maxFishID := max(0, FishID, na.rm = TRUE), by = FishIDBy]
data$Biology[is.na(FishID), FishID := seq_len(.N) + maxFishID, by = FishIDBy]
data$Biology[, maxFishID := NULL]
# Fix the SampleNumber
#colAgg <- setdiff(colnames(data$Catch), c("NumberAtLength", "WeightAtLength", "LengthCode", "LengthClass", "LengthType"))
data$Catch[, SubsampledNumber:=ifelse(!is.na(NumberAtLength), sum(NumberAtLength), SubsampledNumber), by=byVars]
# Purge duplicate samples
#data$Catch <- data$Catch[!duplicated(data$Catch[, ..byVars])]
data$Catch <- unique(data$Catch, by = byVars)
}
else {
warning("StoX: Invalid data input format ", datatype, ". Only NMD Biotic ver 1.1, 1.4 and >= 3, and ices Biotic formats that are supported for now.")
return(NULL)
}
# 2. Making keys
createKey <- function(x) {
if(length(x) == 1) {
as.character(x[[1]])
}
else {
do.call(paste, c(x, sep="/"))
}
}
for(curr in names(data)) {
tmpKeys <- c()
for(key in tableKey) {
if(all(key[[1]] %in% names(data[[curr]]))) {
#data[[curr]][, key[[2]] := do.call(paste, c(.SD, sep="/")), .SDcols = key[[1]]]
data[[curr]][, key[[2]] := createKey(.SD), .SDcols = key[[1]]]
tmpKeys <- c(tmpKeys, key[[2]])
}
}
setindexv(data[[curr]], tmpKeys)
}
# Keep only the original names and the new keys of the individual and prey tables:
if(datatype %in% c("nmdbioticv1.1", "nmdbioticv1.4", "nmdbioticv3", "nmdbioticv3.1")) {
getKeyNames <- function(x) {
names(x)[endsWith(names(x), "Key")]
}
data$individual <- subset(data$individual, select = c(individualNames, getKeyNames(data$individual)))
if(datatype %in% c("nmdbioticv3", "nmdbioticv3.1")) {
data$prey <- subset(data$prey, select = c(preyNames, getKeyNames(data$prey)))
}
}
# Special warning if there are duplicate station for NMDBiotic:
CruiseStationKeys <- c("cruise", "station")
if("fishstation" %in% names(data) && any(duplicated(data$fishstation, by = CruiseStationKeys))) {
numStations <- NROW(unique(data$fishstation, by = CruiseStationKeys))
duplicatedCruiseStationKeys <- duplicated(data$fishstation, by = CruiseStationKeys) | duplicated(data$fishstation, by = CruiseStationKeys, fromLast = TRUE)
stationsWithMoreThanOneSerialnumber <- unique(subset(data$fishstation, duplicatedCruiseStationKeys, select = CruiseStationKeys))
stationsWithMoreThanOneSerialnumber <- sort(stationsWithMoreThanOneSerialnumber[, do.call(paste, c(.SD, list(sep = "/")))])
numDup <- length(stationsWithMoreThanOneSerialnumber)
warning("StoX: There are more than one 'serialnumber' (HaulKey in StoxBioticData) for ", numDup, " out of ", numStations," 'station' (StationKey in StoxBioticData) in the NMDBiotic data. This implies that the first value of CatchPlatform, DateTime, Longitude, Latitude and BottomDepth is used for all the hauls of each of these stations.\nNote about biotic assignment: In DefineBioticAssignment() it is currently only possible to asssign all hauls of a station in the map (manual assignment). If certain Hauls should be exclcuded, use FilterStoxBiotic().\nMore than one serialnumber for the following cruise/station (of the fishstation table of the BioticData):", printErrorIDs(stationsWithMoreThanOneSerialnumber))
}
# 3. One to one mapping and keys
firstPhaseTables <- list()
# Add the tables which should simply be renamed, and not split into several tables:
for(map in simpleTableMap) {
firstPhaseTables[[map[[2]]]] <- data[[map[[1]]]]
}
# 4. "COMPLEX" mapping (for tables fishstation and catchsample of NMDBiotic):
complexMap <- getComplexMap(
datatype = datatype,
stoxBioticObject = stoxBioticObject,
#AddToLowestTable = AddToLowestTable
SplitTableAllocation = SplitTableAllocation
)
# Get the different origin tables, and loop through these:
levels <- unlist(unique(complexMap[, "level"]))
for(origin in levels) {
# Get the possible destination tables, and loop through these:
destinations <- unlist(unique(complexMap[level == origin, "target"]))
for (dest in destinations) {
# List the columns to copy to the destination table:
colList <- unlist(complexMap[target==dest & level==origin, "variable"])
# Allow missing columns, specifically agedeterminationid, as this was used for merging (and is thus missing in the data):
colList <- intersect(colList, names(data[[origin]]))
if(!is.null(firstPhaseTables[[dest]])) stop("Error")
# Copy the columns:
firstPhaseTables[[dest]] <- data[[origin]][, ..colList]
# Uniquify, as splitting a table may produce dupicates, e.g. for multiple samples of the same species. For multiple serialnumber for the same station see the use of removeRowsOfDuplicatedKeys() in StoxBiotic():
firstPhaseTables[[dest]] <- unique(firstPhaseTables[[dest]])
}
}
## Set keys for complex mapping
for (dest in unlist(unique(complexMap[, "target"]))) {
tmpKeys <- c()
for(key in tableKey) {
if(any(key[[2]] %in% names(firstPhaseTables[[dest]]))) {
tmpKeys <- c(tmpKeys, key[[2]])
}
}
setindexv(firstPhaseTables[[dest]], tmpKeys)
}
# Remove the original keys of the higher levels of the original data:
#originalKeys <- lapply(tableKey, "[[", 1)
#originalKeysAggregatedFromHigherLevels <- c(list(NULL), lapply(seq_len(length(originalKeys) - 1), function(x) unlist(originalKeys[sequence(x)])))
#names(originalKeysAggregatedFromHigherLevels) <- names(tableKey)
# Changed to keeping keys from the table used as origin of a StoxBiotic table, to support the SplitTableAllocation of AddToStoxBiotic():
# Get the keys of the higher tables that should be removed:
keysOfOriginalParentTables <- lapply(names(originalParentTables), function(name) unlist(lapply(originalParentTables[[name]], function(tableName) tableKey[[tableName]][[1]])))
names(keysOfOriginalParentTables) <- names(originalParentTables)
for(curr in names(firstPhaseTables)) {
toKeep <- setdiff(names(firstPhaseTables[[curr]]), keysOfOriginalParentTables[[curr]])
firstPhaseTables[[curr]] <- subset(firstPhaseTables[[curr]], select = toKeep)
}
return(firstPhaseTables)
}
getComplexMap <- function(
datatype,
stoxBioticObject,
#AddToLowestTable = FALSE
SplitTableAllocation = c("Default", "Lowest", "Highest")
) {
#if(AddToLowestTable) {
# stoxBioticObject$complexMaps_lowestTable[[datatype]]
#}
#else {
# stoxBioticObject$complexMaps[[datatype]]
#}
# Read the appropriate mapping data:
SplitTableAllocation <- match_arg_informative(SplitTableAllocation)
if(SplitTableAllocation == "Default") {
stoxBioticObject$complexMaps[[datatype]]
}
else if(SplitTableAllocation == "Lowest") {
stoxBioticObject$complexMaps_lowestTable[[datatype]]
}
else if(SplitTableAllocation == "Highest") {
stoxBioticObject$complexMaps_highestTable[[datatype]]
}
}
# Function for merging the appropriate Catch row with the corresponding Biology record
# Also check for missing biology
# This function generates individuals from the "NumberAtLength", "WeightAtLength", "LengthCode", "LengthClass", "LengthType":
specialMerge <- function(catchRowIndex, Catch, byVars) {
# For convenience extract the current row and the LengthCode:
thisCatch <- Catch[catchRowIndex, ]
# Create NumberOfIndividualsToGenerate identical individuals:
averageWeigth <- rep(
thisCatch$WeightAtLength / thisCatch$NumberAtLength,
thisCatch$NumberOfIndividualsToGenerate
)
generatedBiologyOneCatch <- data.table::data.table(
thisCatch[, byVars, with = FALSE],
FishID = NA,
LengthCode = thisCatch$LengthCode,
LengthClass = thisCatch$LengthClass,
WeightMeasurement = FALSE,
IndividualWeight = averageWeigth,
WeightUnit = thisCatch$WeightUnit,
LengthCode.Biology = thisCatch$LengthCode,
LengthClass.Biology = thisCatch$LengthClass,
WeightUnit.Biology = thisCatch$WeightUnit
)
return(generatedBiologyOneCatch)
}
# Function to get the StoxBiotic on one file:
StoxBiotic_firstPhase <- function(
BioticData,
#AddToLowestTable = FALSE
SplitTableAllocation = c("Default", "Lowest", "Highest")
#UseHaulKeyAsStationKey = FALSE
) {
# Get data type:
datatype <- unlist(BioticData[["metadata"]][1, "useXsd"])
if(!exists("stoxBioticObject")) {
data(stoxBioticObject, package="RstoxData", envir = environment())
}
# Do first phase
first <- firstPhase(
BioticData,
datatype,
stoxBioticObject,
#AddToLowestTable = AddToLowestTable
SplitTableAllocation = SplitTableAllocation
#UseHaulKeyAsStationKey = UseHaulKeyAsStationKey
)
# Add the metadata:
first$metadata <- BioticData$metadata
return(first)
}
# Function to convert from the general sampling hierarchy to the StoxBiotic format for each file:
#' @importFrom data.table indices
secondPhase <- function(data, datatype, stoxBioticObject) {
# Getting conversion function for datatype, used in the loop using convertTable below (applying the conversions in "stox-biotic-final-phase.csv"):
convertWeightRes <- stoxBioticObject$convertWeightRes[[datatype]]
convertLenRes <- stoxBioticObject$convertLenRes[[datatype]]
convertLen <- stoxBioticObject$convertLen[[datatype]]
convertWt <- stoxBioticObject$convertWt[[datatype]]
getCatchFractionWeight <- stoxBioticObject$getCatchFractionWeight[[datatype]]
getSampleWeight <- stoxBioticObject$getSampleWeight[[datatype]]
getIndividualRoundWeight <- stoxBioticObject$getIndividualRoundWeight[[datatype]]
getIndividualTotalLength <- stoxBioticObject$getIndividualTotalLength[[datatype]]
getPreyCatchFractionWeight <- stoxBioticObject$getPreyCatchFractionWeight[[datatype]]
getBottomDepth <- stoxBioticObject$getBottomDepth[[datatype]]
getDateTime <- stoxBioticObject$getDateTime[[datatype]]
# Try to stop data.table warnings (https://github.com/Rdatatable/data.table/issues/2988)
.. <- function (x, env = parent.frame()) {
stopifnot(inherits(x, "character"))
stopifnot(length(x) == 1)
get(x, envir = parent.env(env))
}
columns <- c("variable", "level", datatype, "class")
if(!datatype %in% names(stoxBioticObject$convertTable)) {
stop("The input format ", datatype, " is not yet supprted in RstoxData (")
}
convertTable <- stoxBioticObject$convertTable[, ..columns]
# Data placeholder
secondPhaseTables <- list()
# Borrow variables for use in the conversion defined by convertTable:
borrowVariables <- stoxBioticObject$borrowVariables[[datatype]]
if(length(borrowVariables)) {
for(ind in seq_along(borrowVariables)) {
data[[borrowVariables[[ind]]$target]] <- mergeByStoxKeys(
x = data[[borrowVariables[[ind]]$target]],
y = data[[borrowVariables[[ind]]$source]],
StoxDataType = "StoxBiotic",
toMergeFromY = borrowVariables[[ind]]$variable,
all.x = TRUE,
sort = FALSE
)
}
}
# Apply the conversions, such as pasting keys, converting to POSIX, etc.:
for (i in unique(convertTable[, level])) {
if(NROW(data[[i]])) {
# Process conversion table
for(j in 1:nrow(convertTable[level==i,])) {
k <- convertTable[level==i,][j,]
data[[i]][, (unlist(k[,"variable"])) := eval(parse(text=k[, get(..("datatype"))]))]
}
# Get key for transfer
sourceColumns <- unlist(indices(data[[i]], vectors = TRUE))
sourceColumns <- c(sourceColumns, unlist(convertTable[level==i, "variable"]))
secondPhaseTables[[i]] <- data[[i]][, ..sourceColumns]
}
else {
secondPhaseTables[[i]] <- createEmptyDataTable(convertTable[level == i,]$variable, convertTable[level == i,]$class)
}
}
# Optionally select only the variables given by the convertTable:
# Remove duplicated rows from SpeciesCategory
# This has been moved to **** and made general for all tables. The rule is to remove dows of duplicated keys.
#secondPhaseTables[["SpeciesCategory"]] <- unique(secondPhaseTables[["SpeciesCategory"]])
# secondPhaseTables <- lapply(secondPhaseTables, unique)
return(secondPhaseTables)
}
# Function to get the StoxBiotic on one file:
StoxBiotic_secondPhase <- function(BioticData) {
# Get data type:
datatype <- unlist(BioticData[["metadata"]][1, "useXsd"])
if(!exists("stoxBioticObject"))
data(stoxBioticObject, package="RstoxData", envir = environment())
# Do second phase
second <- secondPhase(BioticData, datatype, stoxBioticObject)
return(second)
}
#' Merge StoxBioticData
#'
#' @param StoxBioticData A list of StoX biotic data (StoX data type \code{\link{StoxBioticData}}).
#' @param TargetTable The name of the table up until which to merge (the default "Individual" implies merging all tables)
#'
#' @return An object of StoX data type \code{\link{MergeStoxBioticData}}.
#'
#' @importFrom data.table setattr
#' @export
#'
MergeStoxBiotic <- function(
StoxBioticData,
TargetTable = "Individual"
) {
# Get the tables to merge:
StoxBioticDataTableNames <- names(StoxBioticData)
if(! TargetTable %in% StoxBioticDataTableNames) {
stop("TargetTable must be one of ", paste(StoxBioticDataTableNames, collapse = ", "))
}
tableNames <- StoxBioticDataTableNames[seq_len(which(StoxBioticDataTableNames == TargetTable))]
# Get the variable names of the different tables, to add as attributes to the merged data:
stoxDataVariableNames <- lapply(StoxBioticData, names)
stoxDataVariableNames <- stoxDataVariableNames[tableNames]
# Merge:
MergeStoxBioticData <- mergeDataTables(StoxBioticData, tableNames = tableNames, output.only.last = TRUE, all = TRUE)
# Move all keys to the start of the table:
data.table::setcolorder(
MergeStoxBioticData,
intersect(
names(MergeStoxBioticData),
getRstoxDataDefinitions("StoxBioticKeys")
)
)
# Add the variable names as attributes:
setattr(
MergeStoxBioticData,
"stoxDataVariableNames",
stoxDataVariableNames
)
return(MergeStoxBioticData)
}
#' Add variables to StoxBioticData from BioticData
#'
#' @inheritParams ModelData
#' @param VariableNames A character vector with names of the variables to add from the \code{BioticData}.
#' @param SplitTableAllocation A string indicating how to split tables of the BioticData into tables of the StoxBiotic format. See Details.
#'
#' @details The default \code{SplitTableAllocation} allocates variables to the StoxBiotic tables according to the mapping defined in \cr\cr
#' \code{RstoxData::stoxBioticObject$complexMaps}. \cr\cr
#' When \code{SplitTableAllocation} is "Lowest" or "Highest" variables are allocated to the lowest or highest table, respectively, as defined in \cr\cr
#' \code{RstoxData::stoxBioticObject$complexMaps_lowestTable} \cr\cr
#' and \cr\cr
#' \code{RstoxData::stoxBioticObject$complexMaps_highestTable}, \cr\cr
#' respectively. In NMDBiotic the tables that are split are fishstation -> Station/Haul (where the first table is the highest and the second is the lowest in the \code{\link[=generalSamplingHierarhcy]{General sampling hierarchy}}) and catchsample -> SpeciesCategory/Sample. In ICESBiotic the tables that are split are Haul -> Station/Haul and Catch -> SpeciesCategory/Sample for NMDBiotic.
#'
#' @return An object of StoX data type \code{\link{StoxBioticData}}.
#'
#' @export
#'
AddToStoxBiotic <- function(
StoxBioticData,
BioticData,
VariableNames = character(),
#AddToLowestTable = FALSE,
SplitTableAllocation = c("Default", "Lowest", "Highest")
) {
StoxBioticData <- AddToStoxData(
StoxData = StoxBioticData,
RawData = BioticData,
VariableNames = VariableNames,
#AddToLowestTable = AddToLowestTable,
SplitTableAllocation = SplitTableAllocation,
NumberOfCores = 1L,
StoxDataFormat = "Biotic"
)
# Remove rows of duplicated keys:
StoxBioticData <- removeRowsOfDuplicatedKeys(
StoxData = StoxBioticData,
stoxDataFormat = "Biotic"
)
return(StoxBioticData)
}
# This can be used later if TowDuration is needed:
# TowDuration,Haul,"as.numeric(difftime(as.POSIXct(paste0(stationstopdate, stationstoptime), format='%Y-%m-%dT%H:%M:%OSZ', tz='GMT'), as.POSIXct(paste0(stationstartdate, stationstarttime), format='%Y-%m-%dZ%H:%M:%OSZ', tz='GMT'), units = 'mins'))","as.numeric(difftime(as.POSIXct(paste0(stationstopdate, stationstoptime), format='%Y-%m-%dZ%H:%M:%OSZ', tz='GMT'), as.POSIXct(paste0(stationstartdate, stationstarttime), format='%Y-%m-%dZ%H:%M:%OSZ', tz='GMT'), units = 'mins'))","as.numeric(difftime(as.POSIXct(paste(stopdate.fishstation, stoptime), format='%d/%m/%Y %H:%M:%S', tz='GMT'), as.POSIXct(paste(startdate.fishstation, starttime), format='%d/%m/%Y %H:%M:%S', tz='GMT'), units = 'mins'))","as.numeric(difftime(as.POSIXct(paste(stopdate.fishstation, stoptime), format='%d/%m/%Y %H:%M:%S', tz='GMT'), as.POSIXct(paste(startdate.fishstation, starttime), format='%d/%m/%Y %H:%M:%S', tz='GMT'), units = 'mins'))","Duration",
# EffectiveTowDuration,Haul,TowDuration,TowDuration,TowDuration,TowDuration,TowDuration,
#' Divide EffectiveTowDistance by fishingdepthcount for NMDBiotic data
#'
#' This function is specific for use with \code{\link{StoxBioticData}} generated from files of the format NMDBiotic3.0 or higher, where the variable fishingdepthcount can be used to indicate that the trawl was hauled at multiple depths. Use this function with causion, as it assumes that the target species is ONLY located in one of the depths at which the trawl was hauled, and that the trawl was hauled for equal distance at each fishing depth.
#'
#' @inheritParams ModelData
#'
#' @return An object of StoX data type \code{\link{StoxBioticData}}.
#'
#' @export
#'
CompensateEffectiveTowDistanceForFishingDepthCount <- function(StoxBioticData) {
# This function requires fishingdepthcount to be present (added using AddToStoxBiotic()):
if(!"fishingdepthcount" %in% names(StoxBioticData$Haul)) {
stop("fishingdepthcount must be present in StoxBioticData in order to compensate for multiple fishing depths. Add this using AddToStoxBiotic(). This is only present in NMDBiotic v3 and later.")
}
# Make a copy and change EffectiveTowDistance:
StoxBioticDataCopy <- data.table::copy(StoxBioticData)
StoxBioticDataCopy$Haul[!is.na(fishingdepthcount), EffectiveTowDistance := EffectiveTowDistance / fishingdepthcount]
return(StoxBioticDataCopy)
}