Skip to content

feat: support geo_values (single geo_type) #237

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 101 additions & 0 deletions integrations/server/test_covidcast.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,107 @@ def test_location_wildcard(self):
'message': 'success',
})

def test_geo_value(self):
"""test different variants of geo types: single, *, multi."""

# insert dummy data
self.cur.execute('''
insert into covidcast values
(0, 'src', 'sig', 'day', 'county', 20200414, '11111',
123, 10, 11, 12, 456, 13, 20200414, 0, 1, False),
(0, 'src', 'sig', 'day', 'county', 20200414, '22222',
123, 20, 21, 22, 456, 23, 20200414, 0, 1, False),
(0, 'src', 'sig', 'day', 'county', 20200414, '33333',
123, 30, 31, 32, 456, 33, 20200414, 0, 1, False),
(0, 'src', 'sig', 'day', 'msa', 20200414, '11111',
123, 40, 41, 42, 456, 43, 20200414, 0, 1, False),
(0, 'src', 'sig', 'day', 'msa', 20200414, '22222',
123, 50, 51, 52, 456, 53, 20200414, 0, 1, False),
(0, 'src', 'sig', 'day', 'msa', 20200414, '33333',
123, 60, 61, 62, 456, 634, 20200414, 0, 1, False)
''')
self.cnx.commit()

def fetch(geo_value):
# make the request
params = {
'source': 'covidcast',
'data_source': 'src',
'signal': 'sig',
'time_type': 'day',
'geo_type': 'county',
'time_values': 20200414,
}
if isinstance(geo_value, list):
params['geo_values'] = ','.join(geo_value)
else:
params['geo_value'] = geo_value
response = requests.get(BASE_URL, params=params)
response.raise_for_status()
response = response.json()

return response

counties = [{
'time_value': 20200414,
'geo_value': '11111',
'value': 10,
'stderr': 11,
'sample_size': 12,
'direction': 13,
'issue': 20200414,
'lag': 0,
'signal': 'sig',
}, {
'time_value': 20200414,
'geo_value': '22222',
'value': 20,
'stderr': 21,
'sample_size': 22,
'direction': 23,
'issue': 20200414,
'lag': 0,
'signal': 'sig',
}, {
'time_value': 20200414,
'geo_value': '33333',
'value': 30,
'stderr': 31,
'sample_size': 32,
'direction': 33,
'issue': 20200414,
'lag': 0,
'signal': 'sig',
}]

# test fetch all
r = fetch('*')
self.assertEqual(r['message'], 'success')
self.assertEqual(r['epidata'], counties)
# test fetch a specific region
r = fetch('11111')
self.assertEqual(r['message'], 'success')
self.assertEqual(r['epidata'], [counties[0]])
# test fetch a specific yet not existing region
r = fetch('55555')
self.assertEqual(r['message'], 'no results')
# test fetch a multiple regions
r = fetch(['11111', '22222'])
self.assertEqual(r['message'], 'success')
self.assertEqual(r['epidata'], [counties[0], counties[1]])
# test fetch a multiple regions in another variant
r = fetch(['11111', '33333'])
self.assertEqual(r['message'], 'success')
self.assertEqual(r['epidata'], [counties[0], counties[2]])
# test fetch a multiple regions but one is not existing
r = fetch(['11111', '55555'])
self.assertEqual(r['message'], 'success')
self.assertEqual(r['epidata'], [counties[0]])
Comment on lines +407 to +409
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is probably the right choice (silently omit bogus geo values from the response) but we should revisit it when we figure out deletion encoding. One of the goals of deletion encoding is to start giving better feedback about "you asked for a bogus county" vs "that information was withheld for privacy reasons", and an error on parts of a query might have to be treated carefully.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it behaves the same way for all kind of parameters that are not existing, e.g., if you specify a wrong data source you just get no results back but it doesn't throw an error. That is the current nature of just generating a where clause

# test fetch a multiple regions but specify no region
r = fetch([])
self.assertEqual(r['message'], 'no results')


def test_location_timeline(self):
"""Select a timeline for a particular location."""

Expand Down
23 changes: 14 additions & 9 deletions src/server/api.php
Original file line number Diff line number Diff line change
Expand Up @@ -929,27 +929,26 @@ function get_dengue_nowcast($locations, $epiweeks) {
// $time_type (required): temporal resolution (e.g. day, week)
// $geo_type (required): spatial resolution (e.g. county, msa, state)
// $time_values (required): array of time values/ranges
// $geo_value (required): location identifier or `*` as a wildcard for all
// $geo_values (required): string, array of string, or `*` as a wildcard for all
// locations (specific to `$geo_type`)
// $issues (optional): array of time values/ranges
// overrides $lag
// default: most recent issue
// $lag (optional): number of time units between each time value and its issue
// overridden by $issues
// default: most recent issue
function get_covidcast($source, $signals, $time_type, $geo_type, $time_values, $geo_value, $as_of, $issues, $lag) {
function get_covidcast($source, $signals, $time_type, $geo_type, $time_values, $geo_values, $as_of, $issues, $lag) {
// required for `mysqli_real_escape_string`
global $dbh;
$source = mysqli_real_escape_string($dbh, $source);
$time_type = mysqli_real_escape_string($dbh, $time_type);
$geo_type = mysqli_real_escape_string($dbh, $geo_type);
$geo_value = mysqli_real_escape_string($dbh, $geo_value);
// basic query info
$table = '`covidcast` t';
$fields = "t.`signal`, t.`time_value`, t.`geo_value`, t.`value`, t.`stderr`, t.`sample_size`, t.`direction`, t.`issue`, t.`lag`";
$order = "t.`signal` ASC, t.`time_value` ASC, t.`geo_value` ASC, t.`issue` ASC";
// data type of each field
$fields_string = array('geo_value','signal');
$fields_string = array('geo_value', 'signal');
$fields_int = array('time_value', 'direction', 'issue', 'lag');
$fields_float = array('value', 'stderr', 'sample_size');
// build the source, signal, time, and location (type and id) filters
Expand All @@ -959,12 +958,16 @@ function get_covidcast($source, $signals, $time_type, $geo_type, $time_values, $
$condition_geo_type = "t.`geo_type` = '{$geo_type}'";
$condition_time_value = filter_integers('t.`time_value`', $time_values);

if ($geo_value === '*') {
if ($geo_values === '*') {
// the wildcard query should return data for all locations in `geo_type`
$condition_geo_value = 'TRUE';
} else if (is_array($geo_values)) {
// return data for multiple location
$condition_geo_value = filter_strings('t.`geo_value`', $geo_values);
} else {
// return data for a particular location
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If a '*' is present in the list will it be interpreted literally (matching geo_values that are '*')?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

atm. yes, there is the clear distinction see PR comment about the variants

$condition_geo_value = "t.`geo_value` = '{$geo_value}'";
$geo_escaped_value = mysqli_real_escape_string($dbh, $geo_values);
$condition_geo_value = "t.`geo_value` = '{$geo_escaped_value}'";
}
$conditions = "({$condition_source}) AND ({$condition_signal}) AND ({$condition_time_type}) AND ({$condition_geo_type}) AND ({$condition_time_value}) AND ({$condition_geo_value})";

Expand Down Expand Up @@ -1518,22 +1521,24 @@ function meta_delphi() {
}
}
} else if($source === 'covidcast') {
if(require_all($data, array('data_source', 'time_type', 'geo_type', 'time_values', 'geo_value'))
&& require_any($data, array('signal', 'signals'))) {
if(require_all($data, array('data_source', 'time_type', 'geo_type', 'time_values'))
&& require_any($data, array('signal', 'signals'))
&& require_any($data, array('geo_value', 'geo_values'))) {
// parse the request
$time_values = extract_dates($_REQUEST['time_values']);
$as_of = isset($_REQUEST['as_of']) ? parse_date($_REQUEST['as_of']) : null;
$issues = isset($_REQUEST['issues']) ? extract_dates($_REQUEST['issues']) : null;
$lag = isset($_REQUEST['lag']) ? intval($_REQUEST['lag']) : null;
$signals = extract_values(isset($_REQUEST['signals']) ? $_REQUEST['signals'] : $_REQUEST['signal'], 'string');
$geo_values = isset($_REQUEST['geo_value']) ? $_REQUEST['geo_value'] : extract_values($_REQUEST['geo_values'], 'string');
// get the data
$epidata = get_covidcast(
$_REQUEST['data_source'],
$signals,
$_REQUEST['time_type'],
$_REQUEST['geo_type'],
$time_values,
$_REQUEST['geo_value'],
$geo_values,
$as_of,
$issues,
$lag);
Expand Down