Skip to content

Commit 937af25

Browse files
arnejharnejh
arnejh
authored and
arnejh
committed
Version 2.1.1-9001, see NEWS.md.
1 parent ef7c6d4 commit 937af25

File tree

7 files changed

+174
-183
lines changed

7 files changed

+174
-183
lines changed

DESCRIPTION

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: RstoxData
2-
Version: 2.1.0
3-
Date: 2024-11-06
2+
Version: 2.1.1-9001
3+
Date: 2024-11-27
44
Title: Tools to Read and Manipulate Fisheries Data
55
Authors@R: c(
66
person(given = "Edvin",

NEWS.md

+6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
# RstoxData v2.1.1_9001 (2024-11-25)
2+
* Removed rscript_args from mapplyOnCores(), since this caused the Renviron to be ignored in the sub processes, resulting in errors with finding RstoxFramework in the case of multiple libraries on Linux and macOS.
3+
* Fixed the 29 cm bug again, as it was not properly fixed in RstoxData v2.1.0. Refactored how precision is set both when reading data and in the ICESBiotic() and ICESAcoustic().
4+
* Introduced the EchoType as a column in the Data table of ICESBiotic() (and WriteICESBiotic()).
5+
6+
17
# RstoxData v2.1.0 (2024-11-04)
28
* Final release for StoX 4.1.0.
39
* Fixed bug in DefineTranslation, where the ConditionalVariableNames was showing as a single string and not a vector.

R/Definitions.R

+18
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,24 @@ initiateRstoxData <- function(){
257257
lengthCode_unit_table[, rank := seq_len(.N)]
258258

259259

260+
# Define conversion functions from xml types:
261+
conversionFunctionName <- list(
262+
"xsd:ID" = "as.character",
263+
"xsd:float" = "as.double",
264+
"xs:string" = "as.character",
265+
"xsd:string" = "as.character",
266+
#"xsd:int" = "integer",
267+
"xsd:int" = "asIntegerAfterRound",
268+
"xs:long" = "asIntegerAfterRound",
269+
#"xs:integer" = "integer",
270+
"xs:integer" = "asIntegerAfterRound",
271+
"xs:decimal" = "as.double",
272+
"xs:date" = "as.character",
273+
"xs:time" = "as.character",
274+
"xs:double" = "as.double"
275+
)
276+
277+
260278
#### Assign to RstoxDataEnv and return the definitions: ####
261279
definitionsNames <- ls()
262280
definitions <- lapply(definitionsNames, get, pos = environment())

R/StoxExport.R

+4-110
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ checkAndCreateICESAcousticCSV <- function(ICESAcousticDataOne) {
274274
checkICESAcousticDefinitions (ICESAcousticDataOne)
275275

276276
# Set classes of the variables, especially taking care of NAs. The class of the variables is used later to format the output from WriteICESAcoustic, where NA double type is stored as empty sting to support these beingg empty fields in the written file:
277-
setClassICESAcoustic(ICESAcousticDataOne)
277+
lapply(names(ICESAcousticDataOne), setClass_OneTable, ICESAcousticDataOne, RstoxData::xsdObjects$icesAcoustic.xsd)
278278

279279

280280
#### Rename columns to start with the table name:
@@ -595,7 +595,7 @@ BioticDataToICESBioticOne <- function(
595595
ICESBioticDataOne <- ICESBioticDataOne[hierarchicalTables]
596596

597597
# Set classes of the variables, especially taking care of NAs. The class of the variables is used later to format the output from WriteICESBiotic, where NA double type is stored as empty sting to support these being empty fields in the written file:
598-
setClassICESBiotic(ICESBioticDataOne)
598+
lapply(names(ICESBioticDataOne), setClass_OneTable, ICESBioticDataOne, RstoxData::xsdObjects$icesBiotic.xsd)
599599

600600

601601
# Create a table of the original and new column names, but remove keys:
@@ -748,6 +748,8 @@ BioticData_NMDToICESBioticOne <- function(
748748
# We must filter records with aphia == NA
749749
catchRaw <- catchRaw[!is.na(aphia)]
750750

751+
752+
751753
Catch <- catchRaw[, .(
752754
LocalID = cruise,
753755
Gear = gear,
@@ -862,9 +864,6 @@ BioticData_NMDToICESBioticOne <- function(
862864
}
863865

864866

865-
866-
867-
868867
ICESBioticCSV <- list(
869868
Cruise = Cruise,
870869
Haul = Haul,
@@ -878,117 +877,12 @@ BioticData_NMDToICESBioticOne <- function(
878877

879878

880879

881-
setClassICESBiotic <- function(data, tables = names(data), xsd = RstoxData::xsdObjects$icesBiotic.xsd) {
882-
setClassICES(data, xsd, tables = names(data))
883-
}
884-
885-
setClassICESAcoustic <- function(data, tables = names(data), xsd = RstoxData::xsdObjects$icesAcoustic.xsd) {
886-
setClassICES(data, xsd, tables = names(data))
887-
}
888880

889881

890882

891-
setClassICES <- function(data, xsd, tables = names(data)) {
892-
# Get the classes per table:
893-
classes <- mapply(
894-
structure,
895-
lapply(
896-
xsd$tableTypes[tables],
897-
translateSimple,
898-
old = c(
899-
"xsd:float",
900-
"xsd:int",
901-
"xsd:string",
902-
"xs:string",
903-
"xsd:ID"
904-
),
905-
new = c(
906-
"numeric",
907-
"integer",
908-
"character",
909-
"character",
910-
"character"
911-
)
912-
),
913-
names = xsd$tableHeaders[tables],
914-
SIMPLIFY = FALSE
915-
)
916-
classes <- lapply(classes, as.list)
917-
918-
for(table in tables) {
919-
data[[table]] <- data[[table]][, lapply(names(.SD), changeClassOfNonNA, classes = classes[[table]], data = data[[table]])]
920-
}
921-
}
922883

923-
translateSimple <- function(x, old, new) {
924-
if(length(old) != length(new)) {
925-
stop("old and new need to be of equal length.")
926-
}
927-
for(ind in seq_along(old)) {
928-
x <- replace(x, x == old[ind], new[ind])
929-
}
930-
return(x)
931-
}
932884

933885

934-
changeClassOfNonNA <- function(name, classes, data) {
935-
if(name %in% names(data) && name %in% names(classes) && firstClass(data[[name]]) != classes[[name]]) {
936-
thisClass <- classes[[name]]
937-
if(all(is.na(data[[name]]))) {
938-
NAToInsert <- getNAByType(thisClass)
939-
data[, c(name) := ..NAToInsert]
940-
}
941-
else {
942-
# Removed this by a special function that converts to integer after rounding to avoid problems like trunc(0.29 * 100) == 28:
943-
# I.e., convertion from float to integer performs in the same way as trunc(), which has problems with floating numbers. For the fish lengths that are relevant we have the problem for the following values:
944-
# int <- seq_len(150)
945-
# d <- data.table::data.table(int = int, equalToFloat = trunc(int / 100 * 100) == int)
946-
# subset(d, !equalToFloat)
947-
#data[, c(name) := get(paste("as", thisClass, sep = "."))(get(name))]
948-
# int equalToFloat
949-
# <int> <lgcl>
950-
# 1: 29 FALSE
951-
# 2: 57 FALSE
952-
# 3: 58 FALSE
953-
# 4: 113 FALSE
954-
# 5: 114 FALSE
955-
# 6: 115 FALSE
956-
# 7: 116 FALSE
957-
# So the problem is particularly for fish of length 29 cm, which were truncated to 28 cm when submitting to ICES:
958-
data[, c(name) := get(getConversionFunction(thisClass))(get(name))]
959-
}
960-
}
961-
}
962-
963-
964-
getConversionFunction <- function(class) {
965-
atInteger <- class %in% "integer"
966-
out <- paste("as", class, sep = ".")
967-
out[atInteger] <- "asIntegerAfterRound"
968-
return(out)
969-
}
970-
971-
972-
asIntegerAfterRound <- function(x, prec = .Machine$double.eps) {
973-
# This operation requires that the input can be represented as numeric, so we test that first by observing whether the number of missing values increases:
974-
x_numeric <- as.numeric(x)
975-
numberOfNAs <- sum(is.na(x))
976-
numberOfNAs_numeric <- sum(is.na(x_numeric))
977-
if(numberOfNAs_numeric > numberOfNAs) {
978-
warning("StoX: NAs introduced when trying to convert to integer.")
979-
return(as.integer)
980-
}
981-
982-
# Convert to integer:
983-
x_integer <- as.integer(x)
984-
# Find values which differ to the integer value by less than the input precision, and round these off before converting to integer to avoid occasional shifts in integer value due to floating point representation (e.g. as.integer(0.29 * 100) == 28):
985-
atSmallDiff <- which(abs(x_numeric - x_integer) <= prec)
986-
987-
# Convert to integer, but for values that differ to the integer value by more than
988-
x_integer[atSmallDiff] <- as.integer(round(x_numeric[atSmallDiff]))
989-
990-
return(x_integer)
991-
}
992886

993887
#' Write ICESBiotic to CSV fille
994888
#'

R/Utilities.R

+123-1
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,10 @@ mapplyOnCores <- function(FUN, NumberOfCores = 1L, ..., MoreArgs = NULL, SIMPLIF
319319

320320
# On Windows run special args to speed up:
321321
#if(get_os() == "win") {
322-
cl <- parallel::makeCluster(NumberOfCores, rscript_args = c("--no-init-file", "--no-site-file", "--no-environ"))
322+
323+
# Removed the rscript_args, because it changes the envoronment compared to the partent environment:
324+
#cl <- parallel::makeCluster(NumberOfCores, rscript_args = c("--no-init-file", "--no-site-file", "--no-environ"))
325+
cl <- parallel::makeCluster(NumberOfCores)
323326
out <- parallel::clusterMap(cl, FUN, ..., MoreArgs = MoreArgs, SIMPLIFY = SIMPLIFY)
324327
parallel::stopCluster(cl)
325328
#}
@@ -1052,3 +1055,122 @@ do.call_robust <- function(what, args, quote = FALSE, envir = parent.frame(), ke
10521055
}
10531056

10541057

1058+
1059+
1060+
1061+
1062+
1063+
1064+
1065+
1066+
1067+
1068+
1069+
1070+
1071+
1072+
1073+
1074+
1075+
1076+
1077+
1078+
1079+
1080+
1081+
1082+
1083+
# Process column names and types
1084+
setNames_OneTable <- function(tableName, data, xsd) {
1085+
1086+
# For convenience extract the tableHeader of the current table:
1087+
tableHeader <- xsd$tableHeaders[[tableName]]
1088+
1089+
# There are duplicated column names in NMDBiotic 1, 1.1 and 1.4. we suffix the table name to those fields:
1090+
if(anyDuplicated(tableHeader)) {
1091+
dup <- duplicated(tableHeader)
1092+
tableHeader[dup] <- paste(tableHeader[dup], tableName, sep = ".")
1093+
}
1094+
1095+
# Handle empty data. This is only relevant for NMDBiotic and NMDAcoustic, which both have levels with no data (i.e. "missions" and "distance_list", respectively):
1096+
if(!length(data[[tableName]])) {
1097+
data[[tableName]] <- matrix(data = "", nrow = 0, ncol = length(tableHeader))
1098+
}
1099+
1100+
# Convert to data.table
1101+
output <- data.table(data[[tableName]])
1102+
1103+
# Set column names
1104+
setnames(output, tableHeader)
1105+
1106+
return(output)
1107+
}
1108+
1109+
1110+
1111+
1112+
1113+
# Set types of the columns of the table named 'tableName' of 'data'. Note that this only considers the columns with names present in the xsd$tableHeader. For ICES formats the keys are not included in the tableHeader, but all keys are character
1114+
setClass_OneTable <- function(tableName, data, xsd) {
1115+
1116+
# Known atomic data types
1117+
conversionFunctionName <- getRstoxDataDefinitions("conversionFunctionName")
1118+
1119+
# Set column types (only double and integer for now)
1120+
tableHeader <- xsd$tableHeader[[tableName]]
1121+
tableType <- xsd$tableTypes[[tableName]]
1122+
if(length(tableType) > 0) {
1123+
for(i in seq_along(tableHeader)) {
1124+
# Map the types
1125+
doConv <- eval(
1126+
parse(
1127+
text = conversionFunctionName[[tableType[i]]]
1128+
)
1129+
)
1130+
1131+
# Throw a proper warning when conversion fails:
1132+
tryCatch(
1133+
data[[tableName]][, tableHeader[i] := doConv(data [[tableName]] [[tableHeader[i]]] ) ],
1134+
error = function(e) {
1135+
e
1136+
},
1137+
warning = function(w) {
1138+
modifiedWarning <- paste0("The following variable could not converted to numeric as per the format definition and were set to NA: ", names(data[[tableName]])[i])
1139+
warning(modifiedWarning)
1140+
}
1141+
)
1142+
}
1143+
}
1144+
1145+
invisible(tableName)
1146+
}
1147+
1148+
1149+
asIntegerAfterRound <- function(x, prec = sqrt(.Machine$double.eps)) {
1150+
# This operation requires that the input can be represented as numeric, so we test that first by observing whether the number of missing values increases:
1151+
x_numeric <- as.numeric(x)
1152+
x_integer <- as.integer(x)
1153+
1154+
# Detect whether the input is not fully convertible to integer, which we assume is the case if there are mote missing values in the x_numeric, in which case we simply return the x_integer:
1155+
numberOfNAs <- sum(is.na(x))
1156+
numberOfNAs_numeric <- sum(is.na(x_numeric))
1157+
if(numberOfNAs_numeric > numberOfNAs) {
1158+
warning("StoX: NAs introduced when trying to convert to integer.")
1159+
return(x_integer)
1160+
}
1161+
1162+
# Convert to integer:
1163+
x_integer <- as.integer(x)
1164+
x_rounded <- round(x_numeric)
1165+
# Find values which differ to the integer value by less than the input precision, and round these off before converting to integer to avoid occasional shifts in integer value due to floating point representation (e.g. as.integer(0.29 * 100) == 28):
1166+
diff <- x_numeric - x_rounded
1167+
atSmallDiff <- which(diff < 0 & -diff <= prec)
1168+
1169+
# Convert to integer after rounding for values that differ to the integer value by less than the prec:
1170+
x_integer[atSmallDiff] <- as.integer(x_rounded[atSmallDiff])
1171+
1172+
return(x_integer)
1173+
}
1174+
1175+
1176+

0 commit comments

Comments
 (0)