Skip to content

Commit

Permalink
Handle hyphens in dataset() (#57)
Browse files Browse the repository at this point in the history
  • Loading branch information
mortenpi authored Nov 23, 2022
1 parent bd8444e commit 2b31808
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 38 deletions.
26 changes: 11 additions & 15 deletions src/DataSet.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,20 +51,17 @@ separated with forward slashes. Examples:
username/data
organization/project/data
"""
function is_valid_dataset_name(name::AbstractString)
# DataSet names disallow most punctuation for now, as it may be needed as
# delimiters in data-related syntax (eg, for the data REPL).
dataset_name_pattern = r"
^
[[:alpha:]]
(?:
[-[:alnum:]_] |
/ (?=[[:alpha:]])
)*
$
"x
return occursin(dataset_name_pattern, name)
end
is_valid_dataset_name(name::AbstractString) = occursin(DATASET_NAME_REGEX, name)
# DataSet names disallow most punctuation for now, as it may be needed as
# delimiters in data-related syntax (eg, for the data REPL).
const DATASET_NAME_REGEX_STRING = raw"""
[[:alpha:]]
(?:
[-[:alnum:]_] |
/ (?=[[:alpha:]])
)*
"""
const DATASET_NAME_REGEX = Regex("^\n$(DATASET_NAME_REGEX_STRING)\n\$", "x")

function make_valid_dataset_name(name)
if !is_valid_dataset_name(name)
Expand Down Expand Up @@ -191,4 +188,3 @@ function Base.open(as_type, dataset::DataSet)
@! ResourceContexts.detach_context_cleanup(result)
end
end

22 changes: 13 additions & 9 deletions src/data_project.jl
Original file line number Diff line number Diff line change
Expand Up @@ -107,16 +107,20 @@ function _unescapeuri(str)
return String(take!(out))
end

# Parse as a suffix of URI syntax
# name/of/dataset?param1=value1&param2=value2#fragment
const DATASET_SPEC_REGEX = Regex(
"""
^
($(DATASET_NAME_REGEX_STRING))
(?:\\?([^#]*))? # query - a=b&c=d
(?:\\#(.*))? # fragment - ...
\$
""",
"x",
)
function _split_dataspec(spec::AbstractString)
# Parse as a suffix of URI syntax
# name/of/dataset?param1=value1&param2=value2#fragment
m = match(r"
^
((?:[[:alpha:]][[:alnum:]_]*/?)+) # name - a/b/c
(?:\?([^#]*))? # query - a=b&c=d
(?:\#(.*))? # fragment - ...
$"x,
spec)
m = match(DATASET_SPEC_REGEX, spec)
if isnothing(m)
return nothing, nothing, nothing
end
Expand Down
43 changes: 29 additions & 14 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -78,20 +78,20 @@ end

#-------------------------------------------------------------------------------
@testset "Data set names" begin
# Valid names
@test DataSets.is_valid_dataset_name("a_b")
@test DataSets.is_valid_dataset_name("a-b")
@test DataSets.is_valid_dataset_name("a1")
@test DataSets.is_valid_dataset_name("δεδομένα")
@test DataSets.is_valid_dataset_name("a/b")
@test DataSets.is_valid_dataset_name("a/b/c")
# Invalid names
@test !DataSets.is_valid_dataset_name("1")
@test !DataSets.is_valid_dataset_name("a b")
@test !DataSets.is_valid_dataset_name("a.b")
@test !DataSets.is_valid_dataset_name("a/b/")
@test !DataSets.is_valid_dataset_name("a//b")
@test !DataSets.is_valid_dataset_name("/a/b")
@testset "Valid name: $name" for name in (
"a_b", "a-b", "a1", "δεδομένα", "a/b", "a/b/c", "a-", "b_",
)
@test DataSets.is_valid_dataset_name(name)
@test DataSets._split_dataspec(name) == (name, nothing, nothing)
end

@testset "Invalid name: $name" for name in (
"1", "a b", "a.b", "a/b/", "a//b", "/a/b", "a/-", "a/1", "a/ _/b"
)
@test !DataSets.is_valid_dataset_name(name)
@test DataSets._split_dataspec(name) == (nothing, nothing, nothing)
end

# Error message for invalid names
@test_throws ErrorException("DataSet name \"a?b\" is invalid. DataSet names must start with a letter and can contain only letters, numbers, `_` or `/`.") DataSets.check_dataset_name("a?b")

Expand All @@ -107,6 +107,21 @@ end
end

@testset "URL-like dataspec parsing" begin
# Valid dataspecs
DataSets._split_dataspec("foo?x=1#f") == ("foo", ["x" => "1"], "f")
DataSets._split_dataspec("foo#f") == ("foo", nothing, "f")
DataSets._split_dataspec("foo?x=1") == ("foo", ["x" => "1"], nothing)
DataSets._split_dataspec("foo?x=1") == ("foo", ["x" => "1"], nothing)
# Invalid dataspecs
DataSets._split_dataspec("foo ?x=1") == (nothing, nothing, nothing)
DataSets._split_dataspec("foo\n?x=1") == (nothing, nothing, nothing)
DataSets._split_dataspec("foo\nbar?x=1") == (nothing, nothing, nothing)
DataSets._split_dataspec(" foo?x=1") == (nothing, nothing, nothing)
DataSets._split_dataspec("1?x=1") == (nothing, nothing, nothing)
DataSets._split_dataspec("foo-?x=1") == (nothing, nothing, nothing)
DataSets._split_dataspec("foo #f") == (nothing, nothing, nothing)
DataSets._split_dataspec("@?x=1") == (nothing, nothing, nothing)

proj = DataSets.load_project("Data.toml")

@test !haskey(dataset(proj, "a_text_file"), "dataspec")
Expand Down

0 comments on commit 2b31808

Please sign in to comment.