Skip to content

Commit cf4e1a3

Browse files
Andreas BlätteAndreas Blätte
authored andcommitted
corpus_properties() and corpus_property() robust PolMine#69
1 parent ac8a3b7 commit cf4e1a3

File tree

6 files changed

+60
-25
lines changed

6 files changed

+60
-25
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ LinkingTo: Rcpp
5454
Biarch: true
5555
URL: https://github.com/PolMine/RcppCWB
5656
BugReports: https://github.com/PolMine/RcppCWB/issues
57-
RoxygenNote: 7.1.2
57+
RoxygenNote: 7.2.2
5858
Roxygen: list(markdown = TRUE)
5959
Collate:
6060
'RcppCWB_package.R'

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
* C++ code replaces `sprintf()` with `snprintf()` to address security issue.
44
* Package now depends on Rcpp v1.0.10, which replaces one remaining `sprintf()`
55
#70.
6+
* `corpus_properties()` and `corpus_property()` do not crash any more, if corpus
7+
is not loaded or not present #69.
68

79

810
# RcppCWB 0.5.4

R/cl.R

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,8 @@ corpus_s_attributes <- function(corpus, registry = Sys.getenv("CORPUS_REGISTRY")
467467
}
468468

469469
#' @details `corpus_properties()` returns a `character` vector with the corpus
470-
#' properties defined in the registry file.
470+
#' properties defined in the registry file. If the corpus cannot be located,
471+
#' `NA` is returned.
471472
#' @rdname registry_info
472473
#' @examples
473474
#' corpus_properties("REUTERS", registry = get_tmp_registry())
@@ -478,7 +479,8 @@ corpus_properties <- function(corpus, registry = Sys.getenv("CORPUS_REGISTRY")){
478479
}
479480

480481
#' @details `corpus_property()` returns the value of a corpus property defined
481-
#' in the registry file, or `NA` if the property requested is undefined.
482+
#' in the registry file, or `NA` if the corpus does not exist, is not loaded
483+
#' of if the property requested is undefined.
482484
#' @param property A corpus property defined in the registry file (.
483485
#' @rdname registry_info
484486
#' @examples

man/registry_info.Rd

Lines changed: 4 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cl.cpp

Lines changed: 35 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -672,22 +672,31 @@ Rcpp::StringVector corpus_properties(SEXP corpus, SEXP registry){
672672
char* registry_dir = strdup(Rcpp::as<std::string>(registry).c_str());
673673
c = cl_new_corpus(registry_dir, corpus_id);
674674

675-
p = cl_first_corpus_property(c);
676-
677-
678675
n = 0;
679-
while (p != NULL){
680-
p = cl_next_corpus_property(p);
681-
n++;
676+
if (c){
677+
p = cl_first_corpus_property(c);
678+
679+
680+
while (p != NULL){
681+
p = cl_next_corpus_property(p);
682+
n++;
683+
}
684+
} else {
685+
n = 1;
682686
}
687+
683688
Rcpp::StringVector properties(n);
684689

685-
p = cl_first_corpus_property(c);
686-
i = 0;
687-
while (p != NULL){
688-
properties(i) = cl_strdup(p->property);
689-
p = cl_next_corpus_property(p);
690-
i++;
690+
if (c){
691+
p = cl_first_corpus_property(c);
692+
i = 0;
693+
while (p != NULL){
694+
properties(i) = cl_strdup(p->property);
695+
p = cl_next_corpus_property(p);
696+
i++;
697+
}
698+
} else {
699+
properties(0) = NA_STRING;
691700
}
692701

693702
return(properties);
@@ -701,18 +710,24 @@ Rcpp::StringVector corpus_property(SEXP corpus, SEXP registry, SEXP property){
701710
Corpus * c;
702711
char* corpus_id = strdup(Rcpp::as<std::string>(corpus).c_str());
703712
char* registry_dir = strdup(Rcpp::as<std::string>(registry).c_str());
713+
char* prop = strdup(Rcpp::as<std::string>(property).c_str());
714+
704715
c = cl_new_corpus(registry_dir, corpus_id);
705716

706-
char* prop = strdup(Rcpp::as<std::string>(property).c_str());
707-
708-
CorpusProperty p = cl_first_corpus_property(c);
717+
if (c){
718+
CorpusProperty p = cl_first_corpus_property(c);
709719

710-
while (p != NULL && strcmp(prop, p->property)) p = cl_next_corpus_property(p);
711-
712-
if (p != NULL)
713-
result(0) = p->value;
714-
else
720+
while (p != NULL && strcmp(prop, p->property))
721+
p = cl_next_corpus_property(p);
722+
723+
if (p != NULL)
724+
result(0) = p->value;
725+
else
726+
result(0) = NA_STRING;
727+
} else {
715728
result(0) = NA_STRING;
729+
}
730+
716731

717732
return(result);
718733
}

tests/testthat/test_registry_info.R

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,12 @@ test_that(
6767
{
6868
props <- corpus_properties(corpus = "REUTERS", registry = get_tmp_registry())
6969
expect_identical(props, c("language", "charset"))
70+
71+
fail <- corpus_properties(
72+
corpus = "NOTEXISTING",
73+
registry = Sys.getenv("CORPUS_REGISTRY")
74+
)
75+
expect_true(is.na(fail))
7076
}
7177
)
7278

@@ -94,6 +100,14 @@ test_that(
94100
property = "foo"
95101
)
96102
expect_identical(na, NA_character_)
103+
104+
na <- corpus_property(
105+
"NOTEXISTING",
106+
registry = get_tmp_registry(),
107+
property = "foo"
108+
)
109+
expect_identical(na, NA_character_)
110+
97111
}
98112
)
99113

0 commit comments

Comments
 (0)