Skip to content

Commit 6ec55a0

Browse files
Andreas BlätteAndreas Blätte
authored andcommitted
Merge branch 'dev'
2 parents db57832 + df76929 commit 6ec55a0

File tree

1,213 files changed

+1201
-450087
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,213 files changed

+1201
-450087
lines changed

.github/workflows/R-CMD-check.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ on:
66
- main
77
- master
88
- dev
9-
- win
9+
- pcre2
1010
pull_request:
1111
branches:
1212
- main

DESCRIPTION

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
Package: RcppCWB
22
Type: Package
33
Title: 'Rcpp' Bindings for the 'Corpus Workbench' ('CWB')
4-
Version: 0.5.5
5-
Date: 2023-01-24
4+
Version: 0.6.0
5+
Date: 2023-03-21
66
Author: Andreas Blaette [aut, cre],
77
Bernard Desgraupes [aut],
88
Sylvain Loiseau [aut],
99
Oliver Christ [ctb],
1010
Bruno Maximilian Schulze [ctb],
11-
Stefanie Evert [ctb],
11+
Stephanie Evert [ctb],
1212
Arne Fitschen [ctb],
1313
Jeroen Ooms [ctb],
1414
Marius Bertram [ctb],
@@ -19,7 +19,7 @@ Authors@R: c(
1919
person("Sylvain", "Loiseau", role = "aut"),
2020
person("Oliver", "Christ", role = "ctb"),
2121
person("Bruno Maximilian", "Schulze", role = "ctb"),
22-
person("Stefanie", "Evert", role = "ctb"),
22+
person("Stephanie", "Evert", role = "ctb"),
2323
person("Arne", "Fitschen", role = "ctb"),
2424
person("Jeroen", "Ooms", role = "ctb"),
2525
person("Marius", "Bertram", role = "ctb"),
@@ -29,7 +29,7 @@ Maintainer: Andreas Blaette <andreas.blaette@uni-due.de>
2929
Description: 'Rcpp' Bindings for the C code of the 'Corpus Workbench' ('CWB'), an indexing and query
3030
engine to efficiently analyze large corpora (<https://cwb.sourceforge.io>). 'RcppCWB' is licensed
3131
under the GNU GPL-3, in line with the GPL-3 license of the 'CWB' (<https://www.r-project.org/Licenses/GPL-3>).
32-
The 'CWB' relies on 'pcre' (BSD license, see <http://www.pcre.org/licence.txt>)
32+
The 'CWB' relies on 'pcre2' (BSD license, see <http://www.pcre.org/licence.txt>)
3333
and 'GLib' (LGPL license, see <https://www.gnu.org/licenses/lgpl-3.0.en.html>).
3434
See the file LICENSE.note for further information. The package includes modified code of the
3535
'rcqp' package (GPL-2, see <https://cran.r-project.org/package=rcqp>). The original work of the authors
@@ -40,7 +40,7 @@ License: GPL-3
4040
Encoding: UTF-8
4141
Copyright: For the copyrights for the 'Corpus Workbench' (CWB) and acknowledgement of authorship, see file COPYRIGHTS.
4242
NeedsCompilation: yes
43-
SystemRequirements: GNU make, pcre (>= 7 < 10), GLib (>= 2.0.0). On Windows, no prior installations are necessary,
43+
SystemRequirements: GNU make, pcre2 (>= 10.00), GLib (>= 2.0.0). On Windows, no prior installations are necessary,
4444
as pre-built (i.e. cross-compiled) binaries of required libraries are downloaded from a GitHub repository
4545
(<https://github.com/PolMine/libcl>) during installation. On macOS, static libraries of Glib are downloaded
4646
(<https://github.com/PolMine/libglib>) if Glib is not present.
@@ -49,12 +49,14 @@ Imports:
4949
fs
5050
Suggests:
5151
knitr,
52-
testthat
52+
testthat,
53+
rmarkdown
5354
LinkingTo: Rcpp
5455
Biarch: true
56+
VignetteBuilder: knitr
5557
URL: https://github.com/PolMine/RcppCWB
5658
BugReports: https://github.com/PolMine/RcppCWB/issues
57-
RoxygenNote: 7.2.2
59+
RoxygenNote: 7.2.3
5860
Roxygen: list(markdown = TRUE)
5961
Collate:
6062
'RcppCWB_package.R'

NEWS.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
# RcppCWB 0.5.5.9001-.9003
2+
3+
* Rcpp wrappers for Corpus Library (CL) functions are exposed directly and
4+
can be used in C++ functions imported using `Rcpp::sourceCpp()` or
5+
`Rcpp::cppFunction()`.
6+
* Dependency PCRE has been updated to PCRE2 #68.
7+
* The README suggested to install the development version of RcppCWB using the
8+
snippet `devtools::install_github("PolMine/RcppCWB")`. The missing `ref = "dev"`
9+
has been inserted.
10+
* `cwb_encode()` crashed if arguments `data_dir` and `vrt_dir` include a tilde.
11+
Tilde expansion is now applied to these arguments to avoid this #73.
12+
* A new vignette explains how to write C++ inline functions.
13+
114
# RcppCWB 0.5.5
215

316
* C++ code replaces `sprintf()` with `snprintf()` to address security issue.

R/RcppExports.R

Lines changed: 111 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -41,32 +41,60 @@
4141
.Call(`_RcppCWB_cwb_version`)
4242
}
4343

44-
.p_attr_default <- function() {
44+
#' Get default p-attribute
45+
#'
46+
#' Usually the default p-attribute will be "word". Use this function to avoid
47+
#' a hard-coded solution. Extracts the default attribute defined in the CWB
48+
#' source code.
49+
#'
50+
#' @rdname p_attr_default
51+
#' @return A length-one `character` vector.
52+
#' @export
53+
p_attr_default <- function() {
4554
.Call(`_RcppCWB_p_attr_default`)
4655
}
4756

48-
.s_attr <- function(corpus, s_attribute, registry) {
49-
.Call(`_RcppCWB__s_attr`, corpus, s_attribute, registry)
57+
#' @param corpus ID of a CWB corpus (length-one `character` vector).
58+
#' @param s_attribute A structural attribute (length-one `character` vector).
59+
#' @param registry Registry directory.
60+
#' @rdname cl_rework
61+
#' @export
62+
s_attr <- function(corpus, s_attribute, registry) {
63+
.Call(`_RcppCWB_s_attr`, corpus, s_attribute, registry)
5064
}
5165

52-
.p_attr <- function(corpus, p_attribute, registry) {
53-
.Call(`_RcppCWB__p_attr`, corpus, p_attribute, registry)
66+
#' @param p_attribute A positional attribute (length-one `character` vector).
67+
#' @rdname cl_rework
68+
#' @export
69+
p_attr <- function(corpus, p_attribute, registry) {
70+
.Call(`_RcppCWB_p_attr`, corpus, p_attribute, registry)
5471
}
5572

56-
.cl_attribute_size <- function(corpus, attribute, attribute_type, registry) {
57-
.Call(`_RcppCWB__cl_attribute_size`, corpus, attribute, attribute_type, registry)
73+
#' @param attribute Either a positional, or a structural attribute.
74+
#' @param attribute_type Either "p" (positional attribute) or "s" (structural attribute).
75+
#' @rdname cl_functions
76+
attribute_size <- function(corpus, attribute, attribute_type, registry) {
77+
.Call(`_RcppCWB_attribute_size`, corpus, attribute, attribute_type, registry)
5878
}
5979

60-
.p_attr_size <- function(p_attr) {
61-
.Call(`_RcppCWB__p_attr_size`, p_attr)
80+
#' @param p_attr A `externalptr` referencing a p-attribute.
81+
#' @rdname cl_rework
82+
#' @export
83+
p_attr_size <- function(p_attr) {
84+
.Call(`_RcppCWB_p_attr_size`, p_attr)
6285
}
6386

64-
.s_attr_size <- function(s_attr) {
65-
.Call(`_RcppCWB__s_attr_size`, s_attr)
87+
#' @param s_attr A `externalptr` referencing a p-attribute.
88+
#' @rdname cl_rework
89+
#' @export
90+
s_attr_size <- function(s_attr) {
91+
.Call(`_RcppCWB_s_attr_size`, s_attr)
6692
}
6793

68-
.p_attr_lexicon_size <- function(p_attr) {
69-
.Call(`_RcppCWB__lexicon_size`, p_attr)
94+
#' @rdname cl_rework
95+
#' @export
96+
p_attr_lexicon_size <- function(p_attr) {
97+
.Call(`_RcppCWB_p_attr_lexicon_size`, p_attr)
7098
}
7199

72100
.cl_lexicon_size <- function(corpus, p_attribute, registry) {
@@ -77,92 +105,132 @@
77105
.Call(`_RcppCWB__cl_cpos2struc`, corpus, s_attribute, cpos, registry)
78106
}
79107

80-
.cpos_to_struc <- function(s_attr, cpos) {
81-
.Call(`_RcppCWB__cpos_to_struc`, s_attr, cpos)
108+
#' @param cpos An `integer` vector of corpus positions.
109+
#' @rdname cl_rework
110+
#' @export
111+
cpos_to_struc <- function(s_attr, cpos) {
112+
.Call(`_RcppCWB_cpos_to_struc`, s_attr, cpos)
82113
}
83114

84-
.cl_cpos2str <- function(corpus, p_attribute, registry, cpos) {
85-
.Call(`_RcppCWB__cl_cpos2str`, corpus, p_attribute, registry, cpos)
115+
#' Rcpp wrappers for CWB Corpus Library functions
116+
#'
117+
#' @param corpus The ID of a CWB corpus.
118+
#' @param p_attribute A positional attribute.
119+
#' @param registry Path to the corpus registry.
120+
#' @param cpos An integer vector of corpus positions.
121+
#' @rdname cl_functions
122+
cpos2str <- function(corpus, p_attribute, registry, cpos) {
123+
.Call(`_RcppCWB_cpos2str`, corpus, p_attribute, registry, cpos)
86124
}
87125

88-
.cpos_to_str <- function(p_attr, cpos) {
89-
.Call(`_RcppCWB__cpos_to_str`, p_attr, cpos)
126+
#' @rdname cl_rework
127+
#' @export
128+
cpos_to_str <- function(p_attr, cpos) {
129+
.Call(`_RcppCWB_cpos_to_str`, p_attr, cpos)
90130
}
91131

92-
.cl_cpos2id <- function(corpus, p_attribute, registry, cpos) {
93-
.Call(`_RcppCWB__cl_cpos2id`, corpus, p_attribute, registry, cpos)
132+
#' @rdname cl_functions
133+
cpos2id <- function(corpus, p_attribute, registry, cpos) {
134+
.Call(`_RcppCWB_cpos2id`, corpus, p_attribute, registry, cpos)
94135
}
95136

96-
.cpos_to_id <- function(p_attr, cpos) {
97-
.Call(`_RcppCWB__cpos_to_id`, p_attr, cpos)
137+
#' @rdname cl_rework
138+
#' @export
139+
cpos_to_id <- function(p_attr, cpos) {
140+
.Call(`_RcppCWB_cpos_to_id`, p_attr, cpos)
98141
}
99142

100-
.cl_struc2cpos <- function(corpus, s_attribute, registry, struc) {
101-
.Call(`_RcppCWB__cl_struc2cpos`, corpus, s_attribute, registry, struc)
143+
#' @param s_attribute A structural attribute.
144+
#' @param struc An integer value with struc.
145+
#' @rdname cl_functions
146+
struc2cpos <- function(corpus, s_attribute, registry, struc) {
147+
.Call(`_RcppCWB_struc2cpos`, corpus, s_attribute, registry, struc)
102148
}
103149

104-
.struc_to_cpos <- function(s_attr, struc) {
105-
.Call(`_RcppCWB__struc_to_cpos`, s_attr, struc)
150+
#' @param struc A length-one `integer` vector with a struc.
151+
#' @rdname cl_rework
152+
#' @export
153+
struc_to_cpos <- function(s_attr, struc) {
154+
.Call(`_RcppCWB_struc_to_cpos`, s_attr, struc)
106155
}
107156

108-
.cl_id2str <- function(corpus, p_attribute, registry, id) {
109-
.Call(`_RcppCWB__cl_id2str`, corpus, p_attribute, registry, id)
157+
#' @param id An `integer` vector with token ids.
158+
#' @rdname cl_functions
159+
id2str <- function(corpus, p_attribute, registry, id) {
160+
.Call(`_RcppCWB_id2str`, corpus, p_attribute, registry, id)
110161
}
111162

112163
.cl_struc2str <- function(corpus, s_attribute, struc, registry) {
113164
.Call(`_RcppCWB__cl_struc2str`, corpus, s_attribute, struc, registry)
114165
}
115166

116-
.struc_to_str <- function(s_attr, struc) {
117-
.Call(`_RcppCWB__struc_to_str`, s_attr, struc)
167+
#' @rdname cl_rework
168+
#' @export
169+
struc_to_str <- function(s_attr, struc) {
170+
.Call(`_RcppCWB_struc_to_str`, s_attr, struc)
118171
}
119172

120173
.cl_regex2id <- function(corpus, p_attribute, regex, registry) {
121174
.Call(`_RcppCWB__cl_regex2id`, corpus, p_attribute, regex, registry)
122175
}
123176

124-
.regex_to_id <- function(p_attr, regex) {
125-
.Call(`_RcppCWB__regex_to_id`, p_attr, regex)
177+
#' @param regex A regular expression.
178+
#' @rdname cl_rework
179+
#' @export
180+
regex_to_id <- function(p_attr, regex) {
181+
.Call(`_RcppCWB_regex_to_id`, p_attr, regex)
126182
}
127183

128184
.cl_str2id <- function(corpus, p_attribute, str, registry) {
129185
.Call(`_RcppCWB__cl_str2id`, corpus, p_attribute, str, registry)
130186
}
131187

132-
.str_to_id <- function(p_attr, str) {
133-
.Call(`_RcppCWB__str_to_id`, p_attr, str)
188+
#' @param str A `character` vector.
189+
#' @rdname cl_rework
190+
#' @export
191+
str_to_id <- function(p_attr, str) {
192+
.Call(`_RcppCWB_str_to_id`, p_attr, str)
134193
}
135194

136195
.cl_id2freq <- function(corpus, p_attribute, id, registry) {
137196
.Call(`_RcppCWB__cl_id2freq`, corpus, p_attribute, id, registry)
138197
}
139198

140-
.id_to_freq <- function(p_attr, id) {
141-
.Call(`_RcppCWB__id_to_freq`, p_attr, id)
199+
#' @param id An `integer` vector with token ids.
200+
#' @rdname cl_rework
201+
#' @export
202+
id_to_freq <- function(p_attr, id) {
203+
.Call(`_RcppCWB_id_to_freq`, p_attr, id)
142204
}
143205

144206
.cl_id2cpos <- function(corpus, p_attribute, id, registry) {
145207
.Call(`_RcppCWB__cl_id2cpos`, corpus, p_attribute, id, registry)
146208
}
147209

148-
.id_to_cpos <- function(p_attr, id) {
149-
.Call(`_RcppCWB__id_to_cpos`, p_attr, id)
210+
#' @rdname cl_rework
211+
#' @export
212+
id_to_cpos <- function(p_attr, id) {
213+
.Call(`_RcppCWB_id_to_cpos`, p_attr, id)
150214
}
151215

152216
.cl_cpos2lbound <- function(corpus, s_attribute, cpos, registry) {
153217
.Call(`_RcppCWB__cl_cpos2lbound`, corpus, s_attribute, cpos, registry)
154218
}
155219

156-
.cpos_to_lbound <- function(s_attr, cpos) {
157-
.Call(`_RcppCWB__cpos_to_lbound`, s_attr, cpos)
220+
#' @rdname cl_rework
221+
#' @export
222+
cpos_to_lbound <- function(s_attr, cpos) {
223+
.Call(`_RcppCWB_cpos_to_lbound`, s_attr, cpos)
158224
}
159225

160226
.cl_cpos2rbound <- function(corpus, s_attribute, cpos, registry) {
161227
.Call(`_RcppCWB__cl_cpos2rbound`, corpus, s_attribute, cpos, registry)
162228
}
163229

164-
.cpos_to_rbound <- function(s_attr, cpos) {
165-
.Call(`_RcppCWB__cpos_to_rbound`, s_attr, cpos)
230+
#' @rdname cl_rework
231+
#' @export
232+
cpos_to_rbound <- function(s_attr, cpos) {
233+
.Call(`_RcppCWB_cpos_to_rbound`, s_attr, cpos)
166234
}
167235

168236
.cl_find_corpus <- function(corpus, registry) {

0 commit comments

Comments
 (0)