diff --git a/securedrop/alembic/versions/b58139cfdc8c_add_checksum_columns.py b/securedrop/alembic/versions/b58139cfdc8c_add_checksum_columns.py new file mode 100644 index 00000000000..ba2789f0f4f --- /dev/null +++ b/securedrop/alembic/versions/b58139cfdc8c_add_checksum_columns.py @@ -0,0 +1,84 @@ +"""add checksum columns + +Revision ID: b58139cfdc8c +Revises: f2833ac34bb6 +Create Date: 2019-04-02 10:45:05.178481 + +""" +import os +from alembic import op +import sqlalchemy as sa + +# raise the errors if we're not in production +raise_errors = os.environ.get('SECUREDROP_ENV', 'prod') != 'prod' + +try: + from journalist_app import create_app + from models import Submission, Reply + from sdconfig import config + from store import queued_add_checksum_for_file + from worker import rq_worker_queue +except: + if raise_errors: + raise + +# revision identifiers, used by Alembic. +revision = 'b58139cfdc8c' +down_revision = 'f2833ac34bb6' +branch_labels = None +depends_on = None + + +def upgrade(): + with op.batch_alter_table('replies', schema=None) as batch_op: + batch_op.add_column(sa.Column('checksum', sa.String(length=255), nullable=True)) + + with op.batch_alter_table('submissions', schema=None) as batch_op: + batch_op.add_column(sa.Column('checksum', sa.String(length=255), nullable=True)) + + try: + app = create_app(config) + + # we need an app context for the rq worker extension to work properly + with app.app_context(): + conn = op.get_bind() + query = sa.text('''SELECT submissions.id, sources.filesystem_id, submissions.filename + FROM submissions + INNER JOIN sources + ON submissions.source_id = sources.id + ''') + for (sub_id, filesystem_id, filename) in conn.execute(query): + full_path = app.storage.path(filesystem_id, filename) + rq_worker_queue.enqueue( + queued_add_checksum_for_file, + Submission, + int(sub_id), + full_path, + app.config['SQLALCHEMY_DATABASE_URI'], + ) + + query = sa.text('''SELECT replies.id, sources.filesystem_id, replies.filename + FROM replies + INNER JOIN sources + ON replies.source_id = sources.id + ''') + for (rep_id, filesystem_id, filename) in conn.execute(query): + full_path = app.storage.path(filesystem_id, filename) + rq_worker_queue.enqueue( + queued_add_checksum_for_file, + Reply, + int(rep_id), + full_path, + app.config['SQLALCHEMY_DATABASE_URI'], + ) + except: + if raise_errors: + raise + + +def downgrade(): + with op.batch_alter_table('submissions', schema=None) as batch_op: + batch_op.drop_column('checksum') + + with op.batch_alter_table('replies', schema=None) as batch_op: + batch_op.drop_column('checksum') diff --git a/securedrop/tests/migrations/migration_b58139cfdc8c.py b/securedrop/tests/migrations/migration_b58139cfdc8c.py new file mode 100644 index 00000000000..73871ab9b65 --- /dev/null +++ b/securedrop/tests/migrations/migration_b58139cfdc8c.py @@ -0,0 +1,199 @@ +# -*- coding: utf-8 -*- +import io +import os +import random +import uuid + +from os import path +from sqlalchemy import text +from sqlalchemy.exc import NoSuchColumnError + +from db import db +from journalist_app import create_app +from .helpers import random_chars, random_datetime + +random.seed('ᕕ( ᐛ )ᕗ') + +DATA = b'wat' +DATA_CHECKSUM = 'sha256:f00a787f7492a95e165b470702f4fe9373583fbdc025b2c8bdf0262cc48fcff4' + + +class Helper: + + def __init__(self): + self.journalist_id = None + self.source_id = None + self._counter = 0 + + @property + def counter(self): + self._counter += 1 + return self._counter + + def create_journalist(self): + if self.journalist_id is not None: + raise RuntimeError('Journalist already created') + + params = { + 'uuid': str(uuid.uuid4()), + 'username': random_chars(50), + } + sql = '''INSERT INTO journalists (uuid, username) + VALUES (:uuid, :username) + ''' + self.journalist_id = db.engine.execute(text(sql), **params).lastrowid + + def create_source(self): + if self.source_id is not None: + raise RuntimeError('Source already created') + + self.source_filesystem_id = 'aliruhglaiurhgliaurg-{}'.format(self.counter) + params = { + 'filesystem_id': self.source_filesystem_id, + 'uuid': str(uuid.uuid4()), + 'journalist_designation': random_chars(50), + 'flagged': False, + 'last_updated': random_datetime(nullable=True), + 'pending': False, + 'interaction_count': 0, + } + sql = '''INSERT INTO sources (filesystem_id, uuid, journalist_designation, flagged, + last_updated, pending, interaction_count) + VALUES (:filesystem_id, :uuid, :journalist_designation, :flagged, :last_updated, + :pending, :interaction_count) + ''' + self.source_id = db.engine.execute(text(sql), **params).lastrowid + + def create_submission(self, checksum=False): + filename = str(uuid.uuid4()) + params = { + 'uuid': str(uuid.uuid4()), + 'source_id': self.source_id, + 'filename': filename, + 'size': random.randint(10, 1000), + 'downloaded': False, + + } + + if checksum: + params['checksum'] = \ + 'sha256:f00a787f7492a95e165b470702f4fe9373583fbdc025b2c8bdf0262cc48fcff4' + sql = '''INSERT INTO submissions (uuid, source_id, filename, size, downloaded, checksum) + VALUES (:uuid, :source_id, :filename, :size, :downloaded, :checksum) + ''' + else: + sql = '''INSERT INTO submissions (uuid, source_id, filename, size, downloaded) + VALUES (:uuid, :source_id, :filename, :size, :downloaded) + ''' + + return (db.engine.execute(text(sql), **params).lastrowid, filename) + + def create_reply(self, checksum=False): + filename = str(uuid.uuid4()) + params = { + 'uuid': str(uuid.uuid4()), + 'source_id': self.source_id, + 'journalist_id': self.journalist_id, + 'filename': filename, + 'size': random.randint(10, 1000), + 'deleted_by_source': False, + } + + if checksum: + params['checksum'] = \ + 'sha256:f00a787f7492a95e165b470702f4fe9373583fbdc025b2c8bdf0262cc48fcff4' + sql = '''INSERT INTO replies (uuid, source_id, journalist_id, filename, size, + deleted_by_source, checksum) + VALUES (:uuid, :source_id, :journalist_id, :filename, :size, + :deleted_by_source, :checksum) + ''' + else: + sql = '''INSERT INTO replies (uuid, source_id, journalist_id, filename, size, + deleted_by_source) + VALUES (:uuid, :source_id, :journalist_id, :filename, :size, + :deleted_by_source) + ''' + return (db.engine.execute(text(sql), **params).lastrowid, filename) + + +class UpgradeTester(Helper): + + def __init__(self, config): + Helper.__init__(self) + self.config = config + self.app = create_app(config) + + def load_data(self): + global DATA + with self.app.app_context(): + self.create_journalist() + self.create_source() + + submission_id, submission_filename = self.create_submission() + reply_id, reply_filename = self.create_reply() + + # we need to actually create files and write data to them so the RQ worker can hash them + for fn in [submission_filename, reply_filename]: + full_path = self.app.storage.path(self.source_filesystem_id, fn) + + dirname = path.dirname(full_path) + if not path.exists(dirname): + os.mkdir(dirname) + + with io.open(full_path, 'wb') as f: + f.write(DATA) + + def check_upgrade(self): + ''' + We cannot inject the `SDConfig` object provided by the fixture `config` into the alembic + subprocess that actually performs the migration. This is needed to get both the value of the + DB URL and access to the function `storage.path`. These values are passed to the `rqworker`, + and without being able to inject this config, the checksum function won't succeed. The above + `load_data` function provides data that can be manually verified by checking the `rqworker` + log file in `/tmp/`. + ''' + pass + + +class DowngradeTester(Helper): + + def __init__(self, config): + Helper.__init__(self) + self.config = config + self.app = create_app(config) + + def load_data(self): + with self.app.app_context(): + self.create_journalist() + self.create_source() + + # create a submission and a reply that we don't add checksums to + self.create_submission(checksum=False) + self.create_reply(checksum=False) + + # create a submission and a reply that have checksums added + self.create_submission(checksum=True) + self.create_reply(checksum=True) + + def check_downgrade(self): + ''' + Verify that the checksum column is now gone. + ''' + with self.app.app_context(): + sql = "SELECT * FROM submissions" + submissions = db.engine.execute(text(sql)).fetchall() + for submission in submissions: + try: + # this should produce an exception since the column is gone + submission['checksum'] + except NoSuchColumnError: + pass + + sql = "SELECT * FROM replies" + replies = db.engine.execute(text(sql)).fetchall() + for reply in replies: + try: + # this should produce an exception since the column is gone + submission['checksum'] + except NoSuchColumnError: + pass