Skip to content

Fix ILIKE parameters quoted params #2375

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 34 additions & 18 deletions qiita_db/processing_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,42 +141,59 @@ def create(cls, user, parameters, force=False):
If force is True the job is gonna be created even if another job
exists with the same parameters
"""
with qdb.sql_connection.TRN:
TTRN = qdb.sql_connection.TRN
with TTRN:
command = parameters.command

# check if a job with the same parameters already exists
sql = """SELECT processing_job_id, email, processing_job_status
sql = """SELECT processing_job_id, email, processing_job_status,
COUNT(aopj.artifact_id)
FROM qiita.processing_job
LEFT JOIN qiita.processing_job_status
USING (processing_job_status_id)
LEFT JOIN qiita.artifact_output_processing_job aopj
USING (processing_job_id)
WHERE command_id = %s AND processing_job_status IN (
'success', 'waiting', 'running') """
'success', 'waiting', 'running') {0}
GROUP BY processing_job_id, email,
processing_job_status"""

# we need to use ILIKE because of booleans as they can be
# false or False
_format = "command_parameters->>'%s' ILIKE '%s'"
params = []
for k, v in viewitems(parameters.values):
# this is necessary in case we have an Iterable as a value
# but that is not unicode or string
if isinstance(v, Iterable) and not isinstance(v, (str,
unicode)):
for vv in v:
params.append(_format % (k, vv))
params.extend([k, str(vv)])
else:
params.append(_format % (k, v))
params = " AND ".join(params)

sql = sql + ' AND ' if params else sql
qdb.sql_connection.TRN.add(sql + params, [command.id])
existing_jobs = qdb.sql_connection.TRN.execute_fetchindex()
params.extend([k, str(v)])

if params:
# divided by 2 as we have key-value pairs
len_params = len(params)/2
sql = sql.format(' AND ' + ' AND '.join(
["command_parameters->>%s ILIKE %s"] * len_params))
params = [command.id] + params
TTRN.add(sql, params)
else:
# the sql variable expects the list of parameters but if there
# is no param we need to replace the {0} with an empty string
TTRN.add(sql.format(""), [command.id])

# checking that if the job status is success, it has children
# [2] status, [3] children count
existing_jobs = [r for r in TTRN.execute_fetchindex()
if r[2] != 'success' or r[3] > 0]
if existing_jobs and not force:
raise ValueError(
'Cannot create job because the parameters are the same as '
'jobs that are queued, running or already have '
'succeeded:\n%s' % '\n'.join(
["%s: %s" % (jid, status)
for jid, _, status in existing_jobs]))
for jid, _, status, _ in existing_jobs]))

sql = """INSERT INTO qiita.processing_job
(email, command_id, command_parameters,
Expand All @@ -187,8 +204,8 @@ def create(cls, user, parameters, force=False):
"in_construction", "processing_job_status")
sql_args = [user.id, command.id,
parameters.dump(), status]
qdb.sql_connection.TRN.add(sql, sql_args)
job_id = qdb.sql_connection.TRN.execute_fetchlast()
TTRN.add(sql, sql_args)
job_id = TTRN.execute_fetchlast()

# Link the job with the input artifacts
sql = """INSERT INTO qiita.artifact_processing_job
Expand All @@ -202,17 +219,16 @@ def create(cls, user, parameters, force=False):
# still doesn't exists because the current job is part
# of a workflow, so we can't link
if not isinstance(artifact_info, list):
qdb.sql_connection.TRN.add(
sql, [artifact_info, job_id])
TTRN.add(sql, [artifact_info, job_id])
else:
pending[artifact_info[0]][pname] = artifact_info[1]
if pending:
sql = """UPDATE qiita.processing_job
SET pending = %s
WHERE processing_job_id = %s"""
qdb.sql_connection.TRN.add(sql, [dumps(pending), job_id])
TTRN.add(sql, [dumps(pending), job_id])

qdb.sql_connection.TRN.execute()
TTRN.execute()

return cls(job_id)

Expand Down
43 changes: 35 additions & 8 deletions qiita_db/test/test_processing_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,14 +258,19 @@ def test_create(self):
self.assertEqual(obs.step, None)
self.assertTrue(obs in qdb.artifact.Artifact(1).jobs())

def test_create_duplicated(self):
job = _create_job()
job._set_status('running')
with self.assertRaisesRegexp(ValueError, 'Cannot create job because '
'the parameters are the same as jobs '
'that are queued, running or already '
'have succeeded:'):
_create_job(False)
# test with paramters with '
exp_command = qdb.software.Command(1)
exp_params.values["a tests with '"] = 'this is a tests with "'
exp_params.values['a tests with "'] = "this is a tests with '"
obs = qdb.processing_job.ProcessingJob.create(
exp_user, exp_params)
self.assertEqual(obs.user, exp_user)
self.assertEqual(obs.command, exp_command)
self.assertEqual(obs.status, 'in_construction')
self.assertEqual(obs.log, None)
self.assertEqual(obs.heartbeat, None)
self.assertEqual(obs.step, None)
self.assertTrue(obs in qdb.artifact.Artifact(1).jobs())

def test_set_status(self):
job = _create_job()
Expand Down Expand Up @@ -948,5 +953,27 @@ def test_remove_error(self):
tester.remove(tester.graph.edges()[0][0])


@qiita_test_checker()
class ProcessingJobDuplicated(TestCase):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a specific reason why you've created a different class here? That triggers a new reset of the DB, it will be great if we can avoid a reset of DB unless strictly necessary.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As we are setting all the jobs to error the other tests are failing. I think we have 2 options: A. reset back but it seemed ugly to me or B. have it as a different class so the reset happens, this seemed better to me

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nah, it's okay - I just wanted to make sure that there was an actual reason to create the class.

def test_create_duplicated(self):
job = _create_job()
job._set_status('success')
with self.assertRaisesRegexp(ValueError, 'Cannot create job because '
'the parameters are the same as jobs '
'that are queued, running or already '
'have succeeded:') as context:
_create_job(False)

# If it failed it's because we have jobs in non finished status so
# setting them as error. This is basically testing that the duplicated
# job creation allows to create if all jobs are error and if success
# that the job doesn't have children
for jobs in context.exception.message.split('\n')[1:]:
jid, status = jobs.split(': ')
if status != 'success':
qdb.processing_job.ProcessingJob(jid)._set_status('error')
_create_job(False)


if __name__ == '__main__':
main()