Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## Current (in progress)

- Nothing yet
- Protect custom type testers against None values [#66](https://github.com/etalab/csvapi/pull/66)

## 1.0.3 (2020-03-04)

Expand Down
4 changes: 4 additions & 0 deletions csvapi/type_tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ def __init__(self, **kwargs):
super(Time, self).__init__(**kwargs)

def cast(self, d):
if d is None:
return d
Comment on lines +28 to +29
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this needed? The next line casts to a string before trying to match the regex.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tests fail w/o this so I guess so :-)

if re.match(r"^(?:[01]\d|2[0-3]|\d):[0-5]\d$", str(d)):
return Text().cast(d)
raise CastError('Can not parse value "%s" as time.' % d)
Expand All @@ -36,6 +38,8 @@ def __init__(self):
super(SirenSiret, self).__init__()

def cast(self, d):
if d is None:
return d
if is_valid_siret(d) or is_valid_siren(d):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These functions don't crash for non-string arguments?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the tests cover this. And I'd say every value come as a string since it's a CSV and we're trying to guess types.

return Text().cast(d)
raise CastError('Can not parse value "%s" as a SIREN or SIRET.' % d)
Expand Down
35 changes: 35 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,22 @@ def csv_siren_siret():
"""


@pytest.fixture
def csv_custom_types_double_cr():
"""
This is clearly an invalid file (double CR)
but it tests an interesting case: None values in
columns detected as custom types.

In this case we'd rather display empty lines and None
values than break.
"""
return """id<sep>siren<sep>siret<sep>time\r\r
a<sep>13002526a5<sep>13002526500013<sep>12:30\r\r
b<sep>522816651<sep>52281665100056<sep>15:50\r\r
"""


def random_url():
return f"https://example.com/{uuid.uuid4()}.csv"

Expand Down Expand Up @@ -169,6 +185,25 @@ async def test_apify_siren_siret_format(rmock, csv_siren_siret, client):
]


async def test_apify_custom_types_double_cr(rmock, csv_custom_types_double_cr, client):
content = csv_custom_types_double_cr.replace('<sep>', ';').encode('utf-8')
url = random_url()
rmock.get(url, body=content)
await client.get(f"/apify?url={url}")
res = await client.get(f"/api/{get_hash(url)}")
assert res.status_code == 200
jsonres = await res.json
assert jsonres['columns'] == ['rowid', 'id', 'siren', 'siret', 'time']
assert jsonres['total'] == 5
assert jsonres['rows'] == [
[1, None, None, None, None],
[2, 'a', '13002526a5', '13002526500013', '12:30'],
[3, None, None, None, None],
[4, 'b', '522816651', '52281665100056', '15:50'],
[5, None, None, None, None]
]


@pytest.mark.parametrize('separator', [';', ',', '\t'])
@pytest.mark.parametrize('encoding', ['utf-8', 'iso-8859-15', 'iso-8859-1'])
async def test_api(client, rmock, csv, separator, encoding):
Expand Down