Skip to content

Commit

Permalink
Use db_backup to manage database dump/load
Browse files Browse the repository at this point in the history
  • Loading branch information
KevinMind committed Sep 24, 2024
1 parent a8f911f commit 9910061
Show file tree
Hide file tree
Showing 9 changed files with 326 additions and 228 deletions.
4 changes: 2 additions & 2 deletions docs/topics/development/data_management.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ with specific logic to save/load backups in our specified backup directory.
```

This command creates a dump of the current MySQL database. The command accepts an optional `name` argument which will determine
the name of the directory created in the `DATA_BACKUP_DIR` directory. By default it uses a timestamp to ensure uniqueness.
the name of the directory created in the `DATA_BACKUP_DIRNAME` directory. By default it uses a timestamp to ensure uniqueness.

You can also specify the `--force` argument to overwrite an existing backup with the same name.

Expand All @@ -55,7 +55,7 @@ with specific logic to save/load backups in our specified backup directory.
make data_load [ARGS="--name <name>"]
```

This command will load data from an existing backup directory. The name is required and must match a directory in the `DATA_BACKUP_DIR` directory.
This command will load data from an existing backup directory. The name is required and must match a directory in the `DATA_BACKUP_DIRNAME` directory.

> NOTE: This command will NOT reindex elasticsearch. In most cases you should use the `make initialize_data` command instead.
> You can specify the `--load <name>` argument to load a specific backup and ensure the index is recreated.
Expand Down
3 changes: 3 additions & 0 deletions requirements/dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -308,3 +308,6 @@ pytest-split==0.9.0 \
pytest-reportlog==0.4.0 \
--hash=sha256:5db4d00586546d8c6b95c66466629f1e913440c36d97795a673d2e19c5cedd5c \
--hash=sha256:c9f2079504ee51f776d3118dcf5e4730f163d3dcf26ebc8f600c1fa307bf638c
django-dbbackup==4.2.1 \
--hash=sha256:157a2ec10d482345cd75092e510ac40d6e2ee6084604a1d17abe178c2f06bc69 \
--hash=sha256:b23265600ead0780ca781b1b4b594949aaa8a20d74f08701f91ee9d7eb1f08cd
17 changes: 16 additions & 1 deletion settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,22 @@
INTERNAL_ROUTES_ALLOWED = True

# These apps are great during development.
INSTALLED_APPS += ('olympia.landfill',)
INSTALLED_APPS += (
'olympia.landfill',
'dbbackup',
)

# Settings for django-dbbackup
DATA_BACKUP_DIRNAME = path('backups')
DATA_BACKUP_INIT = '_init'
DATA_BACKUP_DB_FILENAME = 'db.sql'
DATA_BACKUP_STORAGE_FILENAME = 'storage.tar'

DBBACKUP_STORAGE = 'django.core.files.storage.FileSystemStorage'
DBBACKUP_STORAGE_OPTIONS = {'location': DATA_BACKUP_DIRNAME}
DBBACKUP_CONNECTOR_MAPPING = {
'olympia.core.db.mysql': 'dbbackup.db.mysql.MysqlDumpConnector',
}

# Override logging config to enable DEBUG logs for (almost) everything.
LOGGING['root']['level'] = logging.DEBUG
Expand Down
50 changes: 50 additions & 0 deletions src/olympia/amo/management/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import logging
import os
import shutil

from django.conf import settings
from django.core.management import call_command
from django.core.management.base import BaseCommand, CommandError


class BaseDataCommand(BaseCommand):
# Settings for django-dbbackup
data_backup_dirname = os.path.abspath(
os.path.join(settings.ROOT, 'backups')
)
data_backup_init = '_init'
data_backup_db_filename = 'db.sql'
data_backup_storage_filename = 'storage.tar'

call_command = call_command
logger = logging

def backup_dir_path(self, name):
return os.path.abspath(os.path.join(self.data_backup_dirname, name))

def backup_db_path(self, name):
return os.path.abspath(
os.path.join(self.backup_dir_path(name), self.data_backup_db_filename)
)

def backup_storage_path(self, name):
return os.path.abspath(
os.path.join(self.backup_dir_path(name), self.data_backup_storage_filename)
)

def clean_dir(self, name: str) -> None:
path = self.backup_dir_path(name)
logging.info(f'Clearing {path}')
shutil.rmtree(path, ignore_errors=True)

def make_dir(self, name: str, force: bool = False) -> None:
path = self.backup_dir_path(name)
path_exists = os.path.exists(path)

if path_exists and not force:
raise CommandError(
f'path {path} already exists.' 'Use --force to overwrite.'
)

self.clean_dir(name)
os.makedirs(path, exist_ok=True)
59 changes: 19 additions & 40 deletions src/olympia/amo/management/commands/dump_data.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,16 @@
import logging
import os
import shutil
from datetime import datetime

from django.conf import settings
from django.core.management import call_command
from django.core.management.base import BaseCommand, CommandError
from ..base import BaseDataCommand


class Command(BaseCommand):
class Command(BaseDataCommand):
help = 'Dump data with a specified name'

def add_arguments(self, parser):
parser.add_argument(
'--name',
type=str,
default=datetime.now().strftime(
'%Y%m%d%H%M%S'
), # Default to current timestamp
default=datetime.now().strftime('%Y%m%d%H%M%S'),
help='Name of the data dump',
)
parser.add_argument(
Expand All @@ -28,37 +21,23 @@ def handle(self, *args, **options):
name = options.get('name')
force = options.get('force')

dump_path = os.path.abspath(os.path.join(settings.DATA_BACKUP_DIR, name))
dump_path = self.backup_dir_path(name)
db_path = self.backup_db_path(name)
storage_path = self.backup_storage_path(name)

logging.info(f'Dumping data to {dump_path}')
self.make_dir(dump_path, force=force)

if os.path.exists(dump_path):
if force:
shutil.rmtree(dump_path)
else:
raise CommandError(
f'Dump path {dump_path} already exists.'
'Use --force to overwrite or --init to reseed the initial data.'
)

os.makedirs(dump_path, exist_ok=True)

data_file_path = os.path.join(dump_path, 'data.json')
call_command(
'dumpdata',
format='json',
indent=2,
output=data_file_path,
all=True,
natural_foreign=True,
natural_primary=True,
exclude=[
'contenttypes.contenttype',
'auth.permission',
'sessions.session',
]
# For some reason you cannot send the options like this.. but you cannot test for them as named arguments.. fixd it.
self.call_command(
'dbbackup',
output_path=db_path,
interactive=False,
compress=True,
)

storage_from = settings.STORAGE_ROOT
storage_to = os.path.join(dump_path, 'storage')
shutil.copytree(storage_from, storage_to)
self.call_command(
'mediabackup',
output_path=storage_path,
interactive=False,
compress=True,
)
39 changes: 18 additions & 21 deletions src/olympia/amo/management/commands/load_data.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,7 @@
import logging
import os
import shutil
from ..base import BaseDataCommand

from django.conf import settings
from django.core.management import call_command
from django.core.management.base import BaseCommand, CommandError


class Command(BaseCommand):
class Command(BaseDataCommand):
help = 'Load data from a specified name'

def add_arguments(self, parser):
Expand All @@ -20,17 +14,20 @@ def add_arguments(self, parser):

def handle(self, *args, **options):
name = options.get('name')
load_path = os.path.abspath(os.path.join(settings.DATA_BACKUP_DIR, name))

logging.info(f'Loading data from {load_path}')

if not os.path.exists(load_path):
raise CommandError(f'Dump path {load_path} does not exist.')

data_file_path = os.path.join(load_path, 'data.json')
call_command('loaddata', data_file_path)
db_path = self.backup_db_path(name)
storage_path = self.backup_storage_path(name)

self.call_command(
'dbrestore',
input_path=db_path,
interactive=False,
uncompress=True,
)

storage_from = os.path.join(load_path, 'storage')
storage_to = os.path.abspath(settings.STORAGE_ROOT)
logging.info(f'Copying storage from {storage_from} to {storage_to}')
shutil.copytree(storage_from, storage_to, dirs_exist_ok=True)
self.call_command(
'mediarestore',
input_path=storage_path,
interactive=False,
uncompress=True,
replace=True,
)
63 changes: 30 additions & 33 deletions src/olympia/amo/management/commands/seed_data.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,45 @@
import logging
import os
import shutil

from django.conf import settings
from django.core.management import call_command
from django.core.management.base import BaseCommand

from ..base import BaseDataCommand


class Command(BaseCommand):
help = 'Seed the _init data dir with fresh data from the database'
class Command(BaseDataCommand):
help = (
'Reset and seed the database with initial data, '
'generated add-ons, and data from AMO production.'
)

def handle(self, *args, **options):
init_name = settings.DATA_BACKUP_INIT
init_path = os.path.abspath(os.path.join(settings.DATA_BACKUP_DIR, init_name))
logging.info(f'Clearing {init_path}')
shutil.rmtree(init_path, ignore_errors=True)

logging.info('Resetting database...')
call_command('flush', '--noinput')
call_command('migrate', '--noinput')
num_addons = 10
num_themes = 5

self.clean_dir(self.data_backup_init)

self.logger.info('Resetting database...')
self.call_command('flush', '--noinput')
self.call_command('migrate', '--noinput')
# reindex --wipe will force the ES mapping to be re-installed. Useful to
# make sure the mapping is correct before adding a bunch of add-ons.
call_command('reindex', '--wipe', '--force', '--noinput')
self.call_command('reindex', '--wipe', '--force', '--noinput')

logging.info('Loading initial data...')
call_command('loaddata', 'initial.json')
call_command('import_prod_versions')
call_command(
self.logger.info('Loading initial data...')
self.call_command('loaddata', 'initial.json')
self.call_command('import_prod_versions')
self.call_command(
'createsuperuser',
'--no-input',
'--username',
settings.LOCAL_ADMIN_USERNAME,
'--email',
settings.LOCAL_ADMIN_EMAIL,
)
call_command('loaddata', 'zadmin/users')

logging.info('Generating add-ons...')
call_command('generate_addons', '--app', 'firefox', 10)
call_command('generate_addons', '--app', 'android', 10)
call_command('generate_themes', 5)
# These add-ons are specifically useful for the addons-frontend
# homepage. You may have to re-run this, in case the data there
# changes.
call_command('generate_default_addons_for_frontend')
logging.info(f'Dumping data to {init_path}')
call_command('dump_data', '--name', init_name)
self.call_command('loaddata', 'zadmin/users')

self.logger.info('Generating add-ons...')
self.call_command('generate_addons', '--app', 'firefox', num_addons)
self.call_command('generate_addons', '--app', 'android', num_addons)
self.call_command('generate_themes', num_themes)

self.call_command('generate_default_addons_for_frontend')

self.call_command('dump_data', '--name', self.data_backup_init)
Loading

0 comments on commit 9910061

Please sign in to comment.