Skip to content

add unicode regex for paths and fix path encoding #1420

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions cwltool/command_line_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import shutil
import threading
import urllib
import urllib.parse
from functools import cmp_to_key, partial
from typing import (
Any,
Expand Down Expand Up @@ -83,7 +84,9 @@
if TYPE_CHECKING:
from .provenance_profile import ProvenanceProfile # pylint: disable=unused-import

ACCEPTLIST_EN_STRICT_RE = re.compile(r"^[a-zA-Z0-9._+-]+$")
ACCEPTLIST_EN_STRICT_RE = re.compile(
r"^[\w.+\-\u2600-\u26FF\U0001f600-\U0001f64f]+$"
) # accept unicode word characters and emojis
ACCEPTLIST_EN_RELAXED_RE = re.compile(r".*") # Accept anything
ACCEPTLIST_RE = ACCEPTLIST_EN_STRICT_RE
DEFAULT_CONTAINER_MSG = """
Expand Down Expand Up @@ -1178,7 +1181,10 @@ def collect_output(
{
"location": g,
"path": fs_access.join(
builder.outdir, g[len(prefix[0]) + 1 :]
builder.outdir,
urllib.parse.unquote(
g[len(prefix[0]) + 1 :]
),
),
"basename": os.path.basename(g),
"nameroot": os.path.splitext(
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ schema-salad>=7.1,<8
prov==1.5.1
bagit==1.7.0
mypy-extensions
psutil
psutil<5.8.0
typing-extensions
coloredlogs
pydot>=1.4.1
Expand Down
97 changes: 97 additions & 0 deletions tests/test_path_checks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# -*- coding: utf-8 -*-
import pytest
from pathlib import Path

from cwltool.main import main

from .util import needs_docker

script = """
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: CommandLineTool
inputs:
- id: input
type: File
inputBinding:
position: 0
- id: output
type: string
outputs:
- id: output
type: File
outputBinding:
glob: "$(inputs.output)"
stdout: "$(inputs.output)"
baseCommand: [cat]
"""


@needs_docker
def test_spaces_in_input_files(tmp_path: Path) -> None:
script_name = tmp_path / "script"
spaces = tmp_path / "test with spaces"
spaces.touch()
with script_name.open(mode="w") as script_file:
script_file.write(script)

params = [
"--debug",
"--outdir",
str(tmp_path / "outdir"),
str(script_name),
"--input",
str(spaces),
"--output",
"test.txt",
]
assert main(params) == 1
assert main(["--relax-path-checks"] + params) == 0


@needs_docker
@pytest.mark.parametrize(
"filename", ["測試", "그래프", "график", "𒁃", "☕😍", "امتحان", "abc+DEFGZ.z_12345-"]
)
def test_unicode_in_input_files(tmp_path: Path, filename: str) -> None:
script_name = tmp_path / "script"
inputfile = tmp_path / filename
inputfile.touch()
with script_name.open(mode="w") as script_file:
script_file.write(script)

params = [
"--debug",
"--outdir",
str(tmp_path / "outdir"),
str(script_name),
"--input",
str(inputfile),
"--output",
"test.txt",
]
assert main(params) == 0


@needs_docker
@pytest.mark.parametrize(
"filename", ["測試", "그래프", "график", "𒁃", "☕😍", "امتحان", "abc+DEFGZ.z_12345-"]
)
def test_unicode_in_output_files(tmp_path: Path, filename: str) -> None:
script_name = tmp_path / "script"
inputfile = tmp_path / "test"
inputfile.touch()
with script_name.open(mode="w") as script_file:
script_file.write(script)

params = [
"--debug",
"--outdir",
str(tmp_path / "outdir"),
str(script_name),
"--input",
str(inputfile),
"--output",
filename,
]
assert main(params) == 0
43 changes: 0 additions & 43 deletions tests/test_relax_path_checks.py

This file was deleted.

2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ deps =
py{36,37,38,39}-mypy: mypy==0.800

setenv =
py{36,37,38,39}-unit: LC_ALL = C
py{36,37,38,39}-unit: LC_ALL = C.UTF-8

commands =
py{36,37,38,39}-unit: python3 -m pip install -U pip setuptools wheel
Expand Down