Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
3992765
experimenting with direct parsing of XML
colinpmillar Nov 8, 2016
6e07dbc
Simplify gsub(..., regmatches, gregexpr) to gsub(...)
Nov 8, 2016
84a93cf
Merge pull request #11 from arnima-github/direct_xml_parsing
colinpmillar Nov 8, 2016
0390ea1
Merge pull request #12 from ices-tools-prod/master
colinpmillar Nov 9, 2016
3518408
Rearrange comments
Nov 12, 2016
c62328b
Merge pull request #13 from ices-tools-prod/master
colinpmillar Nov 28, 2016
7479d96
Merge remote-tracking branch 'refs/remotes/origin/master' into direct…
colinpmillar Dec 8, 2016
24d997c
Merge branch 'direct_xml_parsing' of https://github.com/ices-tools-pr…
colinpmillar Dec 8, 2016
1819a82
Merge remote-tracking branch 'refs/remotes/origin/direct_xml_parsing'…
colinpmillar Dec 8, 2016
ed429f9
update NEWS
colinpmillar Dec 8, 2016
a887834
update travis
colinpmillar Dec 8, 2016
e30fefc
updates to reading and parsing
colinpmillar Dec 9, 2016
25b8ff3
comment changes [ci skip]
colinpmillar Dec 9, 2016
4beb892
Simplify once
Dec 10, 2016
760fa59
Comments and whitespace
Dec 10, 2016
81580bc
return a more "tidy" nested list
slarge Dec 15, 2016
5146248
Merge pull request #14 from ices-tools-prod/slarge-patch-1
colinpmillar Jan 10, 2017
b924b08
Merge pull request #16 from ices-tools-prod/arni
colinpmillar Jan 10, 2017
5d66a5f
Merge branch 'master' into development
colinpmillar Jan 10, 2017
e52a01c
Merge remote-tracking branch 'refs/remotes/origin/master' into develo…
colinpmillar Jan 10, 2017
9005975
Merge branch 'development' of https://github.com/ices-tools-prod/ices…
colinpmillar Jan 10, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,23 @@ warnings_are_errors: false

os:
- linux
- osx

language: r
r:
- oldrel
- release
- devel

sudo: false

repos:
CRAN: https://cloud.r-project.org

r_packages:
- icesVocab
- testthat
- rmarkdown
- covr

cache: packages

Expand All @@ -25,10 +29,6 @@ notifications:
on_success: change
on_failure: change

r_github_packages:
- jimhester/covr
- ices-tools-prod/icesVocab

after_success:
- Rscript -e 'library(covr);codecov()'

9 changes: 3 additions & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
Package: icesDatras
Version: 1.1-1
Date: 2016-08-17
Version: 1.2-0
Date: 2016-12-09
Title: DATRAS Trawl Database Web Services
Authors@R: c(person("Colin", "Millar", role=c("aut","cre"), email="colin.millar@ices.dk"),
person("Einar", "Hjorleifsson", role="aut"),
person("Scott", "Large", role="aut"),
person("Arni", "Magnusson", role="aut"))
Imports: RCurl,
utils,
XML
Imports: utils
Suggests: icesVocab,
testthat
Description: R interface to access the web services of the ICES (International
Expand Down
8 changes: 1 addition & 7 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,5 @@ export(getHLdata)
export(getSurveyList)
export(getSurveyYearList)
export(getSurveyYearQuarterList)
importFrom(RCurl,basicTextGatherer)
importFrom(RCurl,curlPerform)
importFrom(XML,getChildrenStrings)
importFrom(XML,removeNodes)
importFrom(XML,xmlParse)
importFrom(XML,xmlRoot)
importFrom(XML,xmlSize)
importFrom(utils,capture.output)
importFrom(utils,download.file)
6 changes: 6 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
--------------------------------------------------------------------------------
icesDatras 1.2-0 (2016-12-xx)
--------------------------------------------------------------------------------
o


--------------------------------------------------------------------------------
icesDatras 1.1-1 (2016-09-14)
--------------------------------------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions R/getCAdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@ getCAdata <- function(survey, year, quarter) {
# check quarter
if (!checkSurveyYearQuarterOK(survey, year, quarter, checksurvey = FALSE, checkyear = FALSE)) return(FALSE)

# read XML string and parse to data frame
# read url and parse to data frame
url <-
sprintf(
"https://datras.ices.dk/WebServices/DATRASWebService.asmx/getCAdata?survey=%s&year=%i&quarter=%i",
survey, year, quarter)
out <- curlDatras(url)
out <- readDatras(url)
out <- parseDatras(out)

out
Expand Down
3 changes: 1 addition & 2 deletions R/getCatchWgt.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@
#' getCatchWgt(survey = "ROCKALL", years = 2002, quarters = 3, aphia = 126437)
#'
#' # look up specific species
#' # aphia <- icesVocab::findAphia(c("cod", "haddock"))
#' aphia <- c(126436, 126437)
#' aphia <- icesVocab::findAphia(c("cod", "haddock"))
#' cwt <- getCatchWgt(survey = "ROCKALL", years = 2002, quarters = 3, aphia = aphia)
#'
#' @export
Expand Down
4 changes: 2 additions & 2 deletions R/getDATRAS.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ getDATRAS <- function(record = "HH", survey, years, quarters) {
# read XML string and parse to data frame
out <- lapply(url,
function(x) {
out <- curlDatras(x)
parseDatras(out)
x <- readDatras(x)
parseDatras(x)
})
out <- do.call(rbind, out)

Expand Down
4 changes: 2 additions & 2 deletions R/getDatrasDataOverview.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ getDatrasDataOverview <- function(surveys = NULL) {
out <- sapply(as.character(getSurveyYearList(s)),
function(y) getSurveyYearQuarterList(s, as.integer(y)),
simplify = FALSE)
out <- sapply(out, function(x) as.integer(1:4 %in% x)) # hard wire 4 quarters
row.names(out) <- 1:4
out <- t(sapply(out, function(x) as.integer(1:4 %in% x))) # hard wire 4 quarters
colnames(out) <- paste0("Q", 1:4)
class(out) <- "datrasoverview"
out
},
Expand Down
4 changes: 2 additions & 2 deletions R/getHHdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ getHHdata <- function(survey, year, quarter) {
# check quarter
if (!checkSurveyYearQuarterOK(survey, year, quarter, checksurvey = FALSE, checkyear = FALSE)) return(FALSE)

# read XML string and parse to data frame
# read url and parse to data frame
url <-
sprintf(
"https://datras.ices.dk/WebServices/DATRASWebService.asmx/getHHdata?survey=%s&year=%i&quarter=%i",
survey, year, quarter)
out <- curlDatras(url)
out <- readDatras(url)
out <- parseDatras(out)

out
Expand Down
4 changes: 2 additions & 2 deletions R/getHLdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ getHLdata <- function(survey, year, quarter) {
# check quarter
if (!checkSurveyYearQuarterOK(survey, year, quarter, checksurvey = FALSE, checkyear = FALSE)) return(FALSE)

# read XML string and parse to data frame
# read url and parse to data frame
url <-
sprintf(
"https://datras.ices.dk/WebServices/DATRASWebService.asmx/getHLdata?survey=%s&year=%i&quarter=%i",
survey, year, quarter)
out <- curlDatras(url)
out <- readDatras(url)
out <- parseDatras(out)

# return
Expand Down
4 changes: 2 additions & 2 deletions R/getSurveyList.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ getSurveyList <- function() {
# check web services are running
if (!checkDatrasWebserviceOK()) return (FALSE)

# read XML string and parse to data frame
# read url and parse to data frame
url <- "https://datras.ices.dk/WebServices/DATRASWebService.asmx/getSurveyList"
out <- curlDatras(url)
out <- readDatras(url)
out <- parseDatras(out)

out$Survey
Expand Down
4 changes: 2 additions & 2 deletions R/getSurveyYearList.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ getSurveyYearList <- function(survey) {
# check web services are running
if (!checkDatrasWebserviceOK()) return (FALSE)

# read XML string and parse to data frame
# read url and parse to data frame
url <-
sprintf(
"https://datras.ices.dk/WebServices/DATRASWebService.asmx/getSurveyYearList?survey=%s",
survey)
out <- curlDatras(url)
out <- readDatras(url)
out <- parseDatras(out)

out$Year
Expand Down
4 changes: 2 additions & 2 deletions R/getSurveyYearQuarterList.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ getSurveyYearQuarterList <- function(survey, year) {
# check web services are running
if (!checkDatrasWebserviceOK()) return (FALSE)

# read XML string and parse to data frame
# read url and parse to data frame
url <-
sprintf(
"https://datras.ices.dk/WebServices/DATRASWebService.asmx/getSurveyYearQuarterList?survey=%s&year=%i",
survey, year)
out <- curlDatras(url)
out <- readDatras(url)
out <- parseDatras(out)

out$Quarter
Expand Down
2 changes: 1 addition & 1 deletion R/icesDatras-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#' \code{\link{getDatrasDataOverview}} \tab surveys, years, and quarters
#' }
#'
#' @author Colin Millar, Einar Hjorleifsson, Scott Large, and Arni Magnusson.
#' @author Colin Millar, Scott Large, and Arni Magnusson.
#'
#' @references
#' ICES DATRAS web services: \url{http://datras.ices.dk/}.
Expand Down
121 changes: 75 additions & 46 deletions R/utilities.R
Original file line number Diff line number Diff line change
@@ -1,74 +1,91 @@
#' @importFrom RCurl basicTextGatherer
#' @importFrom RCurl curlPerform
curlDatras <- function(url) {
# read only XML table and return as string
reader <- basicTextGatherer()
curlPerform(url = url,
httpheader = c('Content-Type' = "text/xml; charset=utf-8", SOAPAction = ""),
writefunction = reader$update,
verbose = FALSE)
# return
reader$value()

#' @importFrom utils download.file
readDatras <- function(url) {
# try downloading first:
# create file name
tmp <- tempfile()
on.exit(unlink(tmp))

# download file
ret <-
if (os.type("windows")) {
download.file(url, destfile = tmp, quiet = TRUE)
} else if (os.type("unix") & Sys.which("wget") != "") {
download.file(url, destfile = tmp, quiet = TRUE, method = "wget")
} else if (os.type("unix") & Sys.which("curl") != "") {
download.file(url, destfile = tmp, quiet = TRUE, method = "curl")
} else {
127
}

# check return value
if (ret == 0) {
# scan lines
scan(tmp, what = "", sep = "\n", quiet = TRUE)
} else {
message("Unable to download file so using slower method url().\n",
"Try setting an appropriate value via\n\t",
"options(download.file.method = ...)\n",
"see ?download.file for more information.")
# connect to url
con <- url(url)
on.exit(close(con))

# scan lines
scan(con, what = "", sep = "\n", quiet = TRUE)
}
}


#' @importFrom XML xmlParse
#' @importFrom XML xmlRoot
#' @importFrom XML xmlSize
#' @importFrom XML getChildrenStrings
#' @importFrom XML removeNodes
#' @importFrom utils capture.output
parseDatras <- function(x) {
# parse XML string to data frame
capture.output(x <- xmlParse(x))
# capture.output is used to suppress the output message from xmlns:
# "xmlns: URI ices.dk.local/DATRAS is not absolute"

# work with root node
x <- xmlRoot(x)
parseDatras <- function(x) {
# parse using line and column separators
type <- gsub(" *<ArrayOf(.*?) .*", "\\1", x[2])
starts <- grep(paste0("<", type, ">"), x)
ends <- grep(paste0("</", type, ">"), x)
ncol <- unique(ends[1] - starts[1]) - 1
# drop everything we don't need
x <- x[-c(1, 2, starts, ends, length(x))]

# exit if no data is being returned
if (xmlSize(x) == 0) return(NULL)
nc <- length(getChildrenStrings(x[[1]]))

# read XML values into matrix, then convert to data frame
x <- replicate(xmlSize(x), {
# remove top record after reading to optimize speed and memory
out <- getChildrenStrings(x[[1]]) # peek
removeNodes(x[[1]]) # pop
out
})
if (nc == 1) x <- matrix(x, 1, length(x), dimnames = list(names(x[1])))
x <- as.data.frame(t(x), stringsAsFactors = FALSE)
if (length(x) == 0) return(NULL)

# simplifying at this point greatly speeds up trimws, worth simplifying twice
# simplify all columns except StatRec (so "45e6" does not become 45000000)
x[names(x) != "StatRec"] <- simplify(x[names(x) != "StatRec"])
# match content of first <tag>
names_x <- gsub(" *<(.*?)>.*", "\\1", x[1:ncol])

# delete all <tags>
x <- gsub(" *<.*?>", "", x)
# trim white space
x <- trimws(x)

# convert to data frame
dim(x) <- c(ncol, length(x)/ncol)
row.names(x) <- names_x
x <- as.data.frame(t(x), stringsAsFactors = FALSE)

# return data frame now if empty
if (nrow(x) == 0) return(x)

# clean trailing white space from text columns
charcol <- which(sapply(x, is.character))
x[charcol] <- lapply(x[charcol], trimws)

# DATRAS uses -9 and "" to indicate NA
x[x == -9] <- NA
x[x == ""] <- NA
# simplify again, as ""->NA may enable us to coerce char->num/int

# simplify all columns except StatRec (so "45e6" does not become 45000000)
x[names(x) != "StatRec"] <- simplify(x[names(x) != "StatRec"])

# return
x
}



# TODO - combine the check into readDatras - and do it at the download.file stage...
checkDatrasWebserviceOK <- function() {
# return TRUE if web service is active, FALSE otherwise
out <- curlDatras("https://datras.ices.dk/WebServices/DATRASWebService.asmx")
out <- readDatras("https://datras.ices.dk/WebServices/DATRASWebService.asmx/getSurveyList")

# check server is not down by inspecting XML response for internal server error message
if (grepl("Internal Server Error", out)) {
if (grepl("Internal Server Error", out[1])) {
warning("Web service failure: the server seems to be down, please try again later.")
FALSE
} else {
Expand Down Expand Up @@ -119,3 +136,15 @@ simplify <- function(x) {
}
x
}


# returns TRUE if correct operating system is passed as an argument
os.type <- function (type = c("unix", "windows", "other"))
{
type <- match.arg(type)
if (type %in% c("windows", "unix")) {
.Platform$OS.type == type
} else {
TRUE
}
}
3 changes: 1 addition & 2 deletions man/getCatchWgt.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/icesDatras-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions tests/testthat.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ library(testthat)

## test package
test_check('icesDatras')