Skip to content

Commit

Permalink
steppingstone: see oce-issues for tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dankelley committed Oct 14, 2024
1 parent 454cd50 commit 1adb01e
Show file tree
Hide file tree
Showing 4 changed files with 211 additions and 0 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ export(
handleFlags,
handleFlagsInternal,
imagep,
inferUnits,
initializeFlagScheme,
initializeFlagSchemeInternal,
initializeFlags,
Expand Down
75 changes: 75 additions & 0 deletions R/units.R
Original file line number Diff line number Diff line change
Expand Up @@ -226,3 +226,78 @@ as.unit <- function(u, default = list(unit = expression(), scale = "")) {
}
}
}

#' Infer units and scales from character strings
#'
#' @param s vector of character values.
#'
#' @param dictionary either a data frame or a string indicating a csv
#' file that holds a data frame. If a string, and if it ends in `".csv"`,
#' then that file is used, but if it has another ending, it is taken
#' as an indicator of a built-in file that is provided with
#' the package, named `units_`, followed by the supplied `dictionary`
#' value, and ending with `.csv`.
#'
#' @param ignore.case logical alue indicating whether to ignore the
#' case in `strings`. This defaults to TRUE, so that e.g. `"deg C"`
#' and `"deg c"` can both be matched easily.
#'
#' @return if the unit is recognized, a list is returned, with the first
#' column, named `unit`, being an [expression()] and the second,
#' named `scale`, being an optional descriptor of the unit.
#'
#' @author Dan Kelley
inferUnits <- function(strings, dictionary = "default", ignore.case = TRUE, debug = 0) {
oceDebug(debug, "inferUnits(..., dictionary=",
if (is.data.frame(dictionary)) {
"[data frame]"
} else if (is.character(dictionary)) {
paste0("\"", dictionary, "\"")
},
") START\n", sep = "", unindent = 1)
if (is.character(dictionary)) {
if (!grepl(".csv$", dictionary)) {
dictionary <- system.file("extdata", paste0("units_", dictionary, ".csv"), package = "oce")
oceDebug(debug, "using internal dictionary \"", dictionary, "\"\n")
}
dictionary <- read.csv(dictionary, header = FALSE)
}
if (!is.data.frame(dictionary)) {
stop("dictionary must be either a data frame or a character string")
}
message("DAN 1")
dictionary <- cbind(dictionary, data.frame(V4 = paste0("^", dictionary$V1, "$")))
message("DAN 2")
dictionary$V4 <- gsub("#", "[0-9]", dictionary$V4)
message("DAN 3")
stringsOrig <- strings
if (ignore.case) {
strings <- tolower(strings)
}
n <- length(strings)
message("DAN 4")
rval <- vector("list", n)
for (i in seq_len(n)) {
message("DAN 5 i=", i, " string=\"", strings[i], "\"")
w <- which(sapply(dictionary$V4, \(p) grepl(p, strings[i])))
message("DAN 6 i=", i)
wlen <- length(w)
message("DAN 7 wlen=", wlen)
if (wlen > 1L) {
warning("unit matches more than 1 entry in the dictionary. Indices are: ", paste(w, collapse=" "))
print(dictionary[w, ])
w <- w[1]
}
if (length(w) == 1) {
#if (i == 9) browser("DANNN")
rval[[i]] <- list(
unit = as.expression(parse(text = dictionary$V2[w])),
scale = dictionary$V3[w]
)
} else {
rval[[i]] <- list(unit = "unit?", scale = "scale?")
}
}
names(rval) <- stringsOrig
rval
}
106 changes: 106 additions & 0 deletions inst/extdata/units_default.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
"MicroEinstein[s]{0,1} m-2 s-1",mu*Einsten/m^2/s,
"count[s]{0,1}",count,
"dbar[s]{0,1}",dbar,
"decibar[s]{0,1}",dbar,
"degree[s]{0,1} decimal minute[s]{0,1}", degreeDecimalMinute,
"degree[s]{0,1}",degree,
"degree[s]{0,1}.C",degree*C,
"degree[s]{0,1}.celsius",degree*C,
"degree[s]{0,1}.east",degree*E,
"degree[s]{0,1}.north",degree*N,
"degree[s]{0,1}.south",degree*S,
"degree[s]{0,1}.west",degree*W,
"degree[s]{0,1}_C",degree*C,
"degree[s]{0,1}_east",degree*East,
"degree[s]{0,1}_north",degree*North,
"pH unit[s]{0,1}",pH,
%,percent,
,,
/m/sr,m^-1*sr^-1,
1,,
1/m,1/m,
NTU,NTU,
PSS-78,PSS-78,
PSU,,PSS-78
S m-1,S/m,
S m\^-1,S/m,
S/m,S/m
W kg-1,W/kg
W kg\^-1,W/kg
W/kg,W/kg
ah,A*hr,
amp,A,
amp-hrs,A*hr,
bar,bar,
bool,,
boolean,,
byte,,
cc,cm^3,
celsius,degree*C,
db,dbar,
dbar,dbar,
deg,degree,
degc,degree*C,
deg C,degree*C,
enum,,,
inHg,inch*Hg,
inch,inch,
ipts[ -]68,degree*C,IPTS-68
its[ -]90,degree*C,ITS-90
.*[kK]g m-3,kg/m^3,
.*[kK]g m^-3,kg/m^3,
.*[kK]g/m\^3,kg/m^3,
m s-1,m/s,
m s\^-2,m/s^2
m,m,
m-1,1/m,
m/s,m/s,
m/s\^2,m/s^2
mS cm-1,mS/cm,
mS cm\^-1,mS/cm,
mS/cm,mS/cm,
mV,mV,
mg m-3,mg/m^3,
mg/m\^3,mg/m^3,
micromol l-1,mu*mol/l,
micromol l\^-1,mu*mol/l,
micromol/l,mu*mol/l,
micromole kg-1,mu*mol/kg,
micromole kg\^-1,mu*mol/kg,
micromole/kg,mu*mol/kg,
ml [lL]-1,ml/l,
ml [lL]\^-1,ml/l,
ml/[lL],ml/l,
muS cm-1,mu*S/cm,
muS cm\^-1,mu*S/cm,
muS/cm,mu*S/cm,
nodim,,
ppb,ppb,
pss-78,,PSS-78
psu,,PSS-78
rad,rad,
s m-1,s/m,
s m\^-1,s/m,
s,s,
s/m,s/m,
sec,s,
seconds since 1970-01-01T00:00:00Z,s,since 1970-01-01
seconds since 1990-01-01,s,since 1990-01-01
uEinsteins s-1 m-2,mu*Einstein/m^2/s^1,
uEinsteins s\^-1 m\^-2,mu*Einstein/m^2/s^1,
uEinsteins/s/m\^2,mu*Einstein/m^2/s^1,
ug/kg,mu*g/kg,
ug/l,mu*g/l,
umol kg-1,mu*mol/kg,
umol l-1,mu*mol/l,
umol l\^-1,mu*mol/l,
umol/kg,mu*mol/kg,
umol/l,mu*mol/l,
unixtimestamp,s,since 1970-01-01
uw cm\^-2 nm\^-1,mu*W/cm^2/nm,
uw/cm-2 nm-1,mu*W/cm^2/nm,
uw/cm\^2/nm,mu*W/cm^2/nm,
volt,V,
volts,V,
watt,W,
watts,W,
29 changes: 29 additions & 0 deletions man/inferUnits.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 1adb01e

Please sign in to comment.