Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Rewrite/rename import_reg management command #66

Merged
merged 3 commits into from
Jan 29, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 42 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -210,18 +210,54 @@ $ sphinx-apidoc -F -o docs regcore_read
$ sphinx-apidoc -F -o docs regcore_write
```

## Importing Regulation JSON
## Importing Data

There is a `django_admin` command that facilitates the import of JSON
regulation content into the database. The command is called `import_reg` and
is used as follows, from the root `regcore` directory.
### Via the `eregs` parser

The `eregs` script (see
[regulations-parser](http://github.com/eregs/regulations-parser)) includes
subcommands which will write processed data to a running API. Notably, if
`write_to` (the last step of `pipeline`) is directed at a target beginning
with `http://` or `https://`, it will write the relevant data to that host.
Note that HTTP authentication can be encoded within these urls. For example,
if the API is running on the localhost, port 8000, you could run:

```bash
$ eregs write_to http://localhost:8000/
```

See the command line
[docs](https://eregs-parser.readthedocs.io/en/latest/commandline.html) for
more detail.

### Via the `import_docs` Django command

If you've already exported data from the parser, you may import it from the
command line with the `import_docs` Django management command. It should be
given the root directory of the data as its only parameter. Note that this
does not require a running API.

```bash
$ python manage.py import_reg -r <regulation-number> -s <path/to/stub/root>
$ ls /path/to/data-root
diff layer notice regulation
$ python manage.py import_docs /path/to/data-root
```

For an example of JSON content, see [`regulations-stub`](https://github.com/cfpb/regulations-stub/)
### Via curl

You may also simulate sending data to a running API via curl, if you've
exported data from the parser. For example, if the API is running on the
localhost, port 8000, you could run:

```bash
$ cd /path/to/data-root
$ ls
diff layer notice regulation
$ for TAIL in $(find */* -type f | sort -r) \
do \
curl -X PUT http://localhost:8000/$TAIL -d @$TAIL \
done
```

## Running Tests

Expand Down
52 changes: 52 additions & 0 deletions regcore/management/commands/import_docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import logging
import os

from django.core.management.base import BaseCommand
from django.test import Client, override_settings


logger = logging.getLogger(__name__)


def scoped_files(root):
"""Find all of the files which will need to be "uploaded"; trim them down
to their suffix and separate their path components. We'll assume that
`root` has no trailing slash"""
for path, _, file_names in os.walk(root):
for file_name in file_names:
file_path = os.path.join(path, file_name)
trimmed = file_path[len(root):]
yield trimmed.split(os.sep)


def save_file(root, file_parts):
"""Given a file (indicated by a root file path and a set of file path
components), read the file from disk and write it to the database. Log
results."""
file_path = os.path.join(root, *file_parts)
with open(file_path, 'rb') as f:
content = f.read()
result = Client().put('/'.join(file_parts), data=content,
content_type='application/json')
if result.status_code == 204:
logger.info('Saved %s', file_path)
else:
logger.error('Failed to save %s: (%s), %s',
file_path, result.status_code, result.content[:100])


class Command(BaseCommand):
help = "Import a collection of JSON files into the database."

def add_arguments(self, parser):
parser.add_argument(
'base_dir', default=os.getcwd(), nargs='?',
help='the base filesystem path for importing JSON files'
)

@override_settings(ROOT_URLCONF='regcore.urls', ALLOWED_HOSTS=['*'])
def handle(self, *args, **options):
root = options['base_dir'].rstrip(os.sep)

for file_parts in scoped_files(root):
save_file(root, file_parts)
214 changes: 0 additions & 214 deletions regcore/management/commands/import_reg.py

This file was deleted.

Empty file.
Empty file.
53 changes: 53 additions & 0 deletions regcore/tests/management/commands/import_docs_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from mock import Mock

from regcore.management.commands import import_docs


def test_scoped_files(tmpdir):
"""We get the file path components for all created files, regardless of
how deep in the directory structure"""
a = tmpdir.mkdir('a')
b = tmpdir.mkdir('b')
a.mkdir('1')
a.mkdir('2')
b.mkdir('1').mkdir('i')

tmpdir.ensure('a', '1', 'i')
tmpdir.ensure('a', '1', 'ii')
tmpdir.ensure('a', '2', 'i')
tmpdir.ensure('b', '1', 'i', 'A')
tmpdir.ensure('b', '1', 'i', 'B')
tmpdir.ensure('b', '1', 'i', 'C')

result = {tuple(file_parts)
for file_parts in import_docs.scoped_files(str(tmpdir))}
# These should always begin with an empty string due to leaving in the
# trailing slash
assert result == {
('', 'a', '1', 'i'), ('', 'a', '1', 'ii'),
('', 'a', '2', 'i'),
('', 'b', '1', 'i', 'A'), ('', 'b', '1', 'i', 'B'),
('', 'b', '1', 'i', 'C'),
}


def test_save_file(monkeypatch, tmpdir):
"""Saving a file should send it to a corresponding url"""
monkeypatch.setattr(import_docs, 'Client', Mock())
monkeypatch.setattr(import_docs, 'logger', Mock())
# Client().put
put = import_docs.Client.return_value.put
put.return_value.status_code = 204

tmpdir.mkdir('a').mkdir('1').join('i').write(b'content')
import_docs.save_file(str(tmpdir), ['', 'a', '1', 'i'])
assert put.call_args == (
('/a/1/i',), {'data': b'content', 'content_type': 'application/json'})
assert import_docs.logger.info.called

put.reset_mock()
put.return_value.status_code = 404
put.return_value.content = 'a'*1000
import_docs.save_file(str(tmpdir), ['', 'a', '1', 'i'])
assert import_docs.logger.error.called
assert import_docs.logger.error.call_args[0][3] == 'a'*100 # trimmed