Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use db_backup + management commands to implement data dump/load/seed #22693

Merged
merged 13 commits into from
Sep 26, 2024
38 changes: 11 additions & 27 deletions Makefile-docker
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ export PYTHON_COMMAND=python3
export PIP_COMMAND=$(PYTHON_COMMAND) -m pip
APP=src/olympia/

NUM_ADDONS=10
NUM_THEMES=$(NUM_ADDONS)

NODE_MODULES := $(NPM_CONFIG_PREFIX)node_modules/

REQUIRED_FILES := \
Expand Down Expand Up @@ -52,37 +49,24 @@ check_django: ## check if the django app is configured properly
.PHONY: check
check: check_files check_olympia_user check_debian_packages check_pip_packages check_django

.PHONY: data_dump
data_dump:
./manage.py data_dump $(ARGS)

.PHONY: data_load
data_load:
./manage.py data_load $(ARGS)

.PHONY: initialize_db
initialize_db: ## create a new database
rm -rf ./user-media/* ./tmp/*
$(PYTHON_COMMAND) manage.py create_db --force
$(PYTHON_COMMAND) manage.py migrate --noinput
$(PYTHON_COMMAND) manage.py loaddata initial.json
$(PYTHON_COMMAND) manage.py import_prod_versions
# The superuser needs to have a mozilla.com address for admin tools access
$(PYTHON_COMMAND) manage.py createsuperuser \
--no-input \
--username "local_admin" \
--email "local_admin@mozilla.com"
$(PYTHON_COMMAND) manage.py loaddata zadmin/users
# Seed the database with initial data
KevinMind marked this conversation as resolved.
Show resolved Hide resolved
./manage.py data_seed

.PHONY: reindex_data
reindex_data: ## reindex the data in elasticsearch
$(PYTHON_COMMAND) manage.py reindex --force --noinput

.PHONY: populate_data
populate_data: ## populate a new database
# reindex --wipe will force the ES mapping to be re-installed. Useful to
# make sure the mapping is correct before adding a bunch of add-ons.
$(PYTHON_COMMAND) manage.py reindex --wipe --force --noinput
$(PYTHON_COMMAND) manage.py generate_addons --app firefox $(NUM_ADDONS)
$(PYTHON_COMMAND) manage.py generate_addons --app android $(NUM_ADDONS)
$(PYTHON_COMMAND) manage.py generate_themes $(NUM_THEMES)
# These add-ons are specifically useful for the addons-frontend
# homepage. You may have to re-run this, in case the data there
# changes.
$(PYTHON_COMMAND) manage.py generate_default_addons_for_frontend

.PHONY: update_db
update_db: ## run the database migrations
$(PYTHON_COMMAND) manage.py migrate --noinput
Expand Down Expand Up @@ -157,7 +141,7 @@ dbshell: ## connect to a database shell
$(PYTHON_COMMAND) ./manage.py dbshell

.PHONY: initialize
initialize: initialize_db update_assets populate_data reindex_data ## init the dependencies, the database, and assets
initialize: initialize_db update_assets reindex_data ## init the dependencies, the database, and assets

PYTEST_SRC := src/olympia/

Expand Down
22 changes: 1 addition & 21 deletions Makefile-os
Original file line number Diff line number Diff line change
Expand Up @@ -77,26 +77,6 @@ shell: ## connect to a running addons-server docker shell
rootshell: ## connect to a running addons-server docker shell with root user
docker compose exec --user root web bash

.PHONY: data_export
data_export:
@ mkdir -p $(EXPORT_DIR)

# Extracting mysql database
docker compose exec mysqld /usr/bin/mysqldump olympia > $(EXPORT_DIR)/data_mysqld.sql

.PHONY: data_restore
data_restore:
@[ -d $(RESTORE_DIR) ] || (echo "Directory $(RESTORE_DIR) does not exist" && exit 1)

# Wait for MySQL server to be ready
docker compose exec mysqld bash \
-c 'while ! mysqladmin ping --silent; do echo "waiting"; sleep 1; done'

# Restoring mysql database
docker compose exec -T mysqld /usr/bin/mysql olympia < $(RESTORE_DIR)/data_mysqld.sql

$(MAKE) reindex

.PHONY: docker_compose_config
docker_compose_config: ## Show the docker compose configuration
@docker compose config web --format json
Expand Down Expand Up @@ -168,7 +148,7 @@ initialize_docker: up
docker compose exec --user olympia web make initialize

%: ## This directs any other recipe (command) to the web container's make.
docker compose exec --user olympia web make $(MAKECMDGOALS) ARGS=$(ARGS)
docker compose exec --user olympia web make $(MAKECMDGOALS) ARGS="$(shell echo $(ARGS))"

# You probably want to put new commands in Makefile-docker, unless they operate
# on multiple containers or are host-os specific.
40 changes: 28 additions & 12 deletions docs/topics/development/data_management.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,30 +35,46 @@ The `make initialize` command, executed as part of `make initialize_docker`, per

## Exporting and Loading Data Snapshots

You can export and load data snapshots to manage data states across different environments or for backup purposes. The Makefile provides commands to facilitate this.
You can export and load data snapshots to manage data states across different environments or for backup purposes.
The Makefile provides commands to facilitate this.
These commands rely internally on [django-dbbackup](https://django-dbbackup.readthedocs.io/en/stable/)

- **Exporting Data**:
- **Data dump**:

```sh
make data_export [EXPORT_DIR=<path>]
make data_dump [ARGS="--name <name> --force"]
```

This command creates a dump of the current MySQL database. The optional `EXPORT_DIR` argument allows you to specify a custom path for the export directory.
The default value is a timestamp in the `backups` directory.
This command creates a dump of the current MySQL database. The command accepts an optional `name` argument which will determine
the name of the directory created in the backup directory. By default it uses a timestamp to ensure uniqueness.

The data exported will be a .sql dump of the current state of the database including any data that has been added or modified.
You can also specify the `--force` argument to overwrite an existing backup with the same name.

- **Loading Data**:

```sh
make data_restore [RESTORE_DIR=<path>]
make data_load [ARGS="--name <name>"]
```

This command restores a MySQL database from a previously exported snapshot. The optional `RESTORE_DIR` argument allows you to specify the path of the import file.
This must be an absolute path. It defaults to the latest stored snapshot in the `backups` directory.
This command will load data from an existing backup directory, synchronize the storage directory and reindex elasticsearch.
The name is required and must match a directory in the backup directory.

Refer to the Makefile for detailed instructions on these commands.
## Hard Reset Database

This comprehensive setup ensures that the development environment is fully prepared with the necessary data.
The actual mysql database is created and managed by the `mysqld` container. The database is created on container start
and the actual data is stored in a persistent data volume. This enables data to persist across container restarts.

By following these practices, developers can manage data effectively in the **addons-server** project. The use of persistent volumes, external mounts, data snapshots, and automated data population ensures a robust and flexible data management strategy. For more detailed instructions, refer to the project's Makefile and Docker Compose configuration in the repository.
`addons-server` assumes that a database named `olympia` already exists and most data management commands will fail
if it does not.

If you need to hard reset the database (for example, to start with a fresh state), you can use the following command:

```bash
make down && docker_mysqld_volume_remove
KevinMind marked this conversation as resolved.
Show resolved Hide resolved
```

This will stop the containers and remove the `mysqld` data volume from docker. The next time you run `make up` it will
create a new empty volume for you and mysql will recreate the database.

> NOTE: removing the data volume will remove the actual data! You can and should save a backup before doing this
> if you want to keep the data.
3 changes: 3 additions & 0 deletions requirements/dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -308,3 +308,6 @@ pytest-split==0.9.0 \
pytest-reportlog==0.4.0 \
--hash=sha256:5db4d00586546d8c6b95c66466629f1e913440c36d97795a673d2e19c5cedd5c \
--hash=sha256:c9f2079504ee51f776d3118dcf5e4730f163d3dcf26ebc8f600c1fa307bf638c
django-dbbackup==4.2.1 \
--hash=sha256:157a2ec10d482345cd75092e510ac40d6e2ee6084604a1d17abe178c2f06bc69 \
--hash=sha256:b23265600ead0780ca781b1b4b594949aaa8a20d74f08701f91ee9d7eb1f08cd
11 changes: 10 additions & 1 deletion settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,16 @@
INTERNAL_ROUTES_ALLOWED = True

# These apps are great during development.
INSTALLED_APPS += ('olympia.landfill',)
INSTALLED_APPS += (
'olympia.landfill',
'dbbackup',
)

DBBACKUP_STORAGE = 'django.core.files.storage.FileSystemStorage'

DBBACKUP_CONNECTOR_MAPPING = {
'olympia.core.db.mysql': 'dbbackup.db.mysql.MysqlDumpConnector',
}

# Override logging config to enable DEBUG logs for (almost) everything.
LOGGING['root']['level'] = logging.DEBUG
Expand Down
47 changes: 47 additions & 0 deletions src/olympia/amo/management/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
import logging
import os
import shutil

from django.conf import settings
from django.core.management.base import BaseCommand, CommandError

from celery import chord, group
Expand Down Expand Up @@ -143,3 +148,45 @@ def handle(self, *args, **options):
else:
ts = group(grouping)
ts.apply_async()


class BaseDataCommand(BaseCommand):
# Settings for django-dbbackup
data_backup_dirname = os.path.abspath(os.path.join(settings.ROOT, 'backups'))
data_backup_init = '_init'
data_backup_db_filename = 'db.sql'
data_backup_storage_filename = 'storage.tar'

logger = logging

def backup_dir_path(self, name):
return os.path.abspath(os.path.join(self.data_backup_dirname, name))

def backup_db_path(self, name):
return os.path.abspath(
os.path.join(self.backup_dir_path(name), self.data_backup_db_filename)
)

def backup_storage_path(self, name):
return os.path.abspath(
os.path.join(self.backup_dir_path(name), self.data_backup_storage_filename)
)

def clean_dir(self, name: str) -> None:
path = self.backup_dir_path(name)
logging.info(f'Clearing {path}')
shutil.rmtree(path, ignore_errors=True)

def make_dir(self, name: str, force: bool = False) -> None:
path = self.backup_dir_path(name)
path_exists = os.path.exists(path)

if path_exists:
if force:
self.clean_dir(name)
else:
raise CommandError(
f'path {path} already exists.' 'Use --force to overwrite.'
)

os.makedirs(path, exist_ok=True)
48 changes: 48 additions & 0 deletions src/olympia/amo/management/commands/data_dump.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from datetime import datetime

from django.core.management import call_command

from .. import BaseDataCommand


class Command(BaseDataCommand):
help = 'Dump data with a specified name'

def add_arguments(self, parser):
parser.add_argument(
'--name',
type=str,
default=datetime.now().strftime('%Y%m%d%H%M%S'),
help='Name of the data dump',
)
parser.add_argument(
'--force', action='store_true', help='Force overwrite of existing dump'
)

def handle(self, *args, **options):
name = options.get('name')
force = options.get('force')

dump_path = self.backup_dir_path(name)
db_path = self.backup_db_path(name)
storage_path = self.backup_storage_path(name)

try:
self.make_dir(dump_path, force=force)

call_command(
'dbbackup',
output_path=db_path,
interactive=False,
compress=True,
)

call_command(
'mediabackup',
output_path=storage_path,
interactive=False,
compress=True,
)
except Exception as e:
self.clean_dir(dump_path)
raise e
eviljeff marked this conversation as resolved.
Show resolved Hide resolved
50 changes: 50 additions & 0 deletions src/olympia/amo/management/commands/data_load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import os

from django.core.management import call_command
from django.core.management.base import CommandError

from .. import BaseDataCommand


class Command(BaseDataCommand):
help = 'Load data from a specified name'

def add_arguments(self, parser):
parser.add_argument(
'--name',
type=str,
required=True,
help='Name of the data dump',
)

def handle(self, *args, **options):
name = options.get('name')
db_path = self.backup_db_path(name)
storage_path = self.backup_storage_path(name)

if not os.path.exists(db_path):
print('DB backup not found: {db_path}')
raise CommandError(f'DB backup not found: {db_path}')

call_command(
'dbrestore',
input_path=db_path,
interactive=False,
uncompress=True,
)

if not os.path.exists(storage_path):
raise CommandError(f'Storage backup not found: {storage_path}')

call_command(
'mediarestore',
input_path=storage_path,
interactive=False,
uncompress=True,
replace=True,
)

# reindex --wipe will force the ES mapping to be re-installed.
# After loading data from a backup, we should always reindex
# to make sure the mapping is correct.
call_command('reindex', '--wipe', '--force', '--noinput')
46 changes: 46 additions & 0 deletions src/olympia/amo/management/commands/data_seed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from django.conf import settings
from django.core.management import call_command

from .. import BaseDataCommand


class Command(BaseDataCommand):
help = (
'Reset and seed the database with initial data, '
'generated add-ons, and data from AMO production.'
)

def handle(self, *args, **options):
num_addons = 10
num_themes = 5

self.clean_dir(self.data_backup_init)

self.logger.info('Resetting database...')
call_command('flush', '--noinput')
# reindex --wipe will force the ES mapping to be re-installed.
call_command('reindex', '--wipe', '--force', '--noinput')
call_command('migrate', '--noinput')

self.logger.info('Loading initial data...')
call_command('loaddata', 'initial.json')
call_command('import_prod_versions')
call_command(
'createsuperuser',
'--no-input',
'--username',
settings.LOCAL_ADMIN_USERNAME,
'--email',
settings.LOCAL_ADMIN_EMAIL,
)
call_command('loaddata', 'zadmin/users')

self.logger.info('Generating add-ons...')
KevinMind marked this conversation as resolved.
Show resolved Hide resolved
call_command('generate_addons', '--app', 'firefox', num_addons)
call_command('generate_addons', '--app', 'android', num_addons)
call_command('generate_themes', num_themes)

call_command('generate_default_addons_for_frontend')

call_command('data_dump', '--name', self.data_backup_init)
call_command('data_load', '--name', self.data_backup_init)
KevinMind marked this conversation as resolved.
Show resolved Hide resolved
Loading
Loading