Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Commit

Permalink
Merge pull request #66 from cmc333333/rewrite-import
Browse files Browse the repository at this point in the history
Rewrite/rename import_reg management command
  • Loading branch information
cmc333333 authored Jan 29, 2017
2 parents de7e36f + 6384a89 commit c7c1317
Show file tree
Hide file tree
Showing 6 changed files with 147 additions and 220 deletions.
48 changes: 42 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -210,18 +210,54 @@ $ sphinx-apidoc -F -o docs regcore_read
$ sphinx-apidoc -F -o docs regcore_write
```

## Importing Regulation JSON
## Importing Data

There is a `django_admin` command that facilitates the import of JSON
regulation content into the database. The command is called `import_reg` and
is used as follows, from the root `regcore` directory.
### Via the `eregs` parser

The `eregs` script (see
[regulations-parser](http://github.com/eregs/regulations-parser)) includes
subcommands which will write processed data to a running API. Notably, if
`write_to` (the last step of `pipeline`) is directed at a target beginning
with `http://` or `https://`, it will write the relevant data to that host.
Note that HTTP authentication can be encoded within these urls. For example,
if the API is running on the localhost, port 8000, you could run:

```bash
$ eregs write_to http://localhost:8000/
```

See the command line
[docs](https://eregs-parser.readthedocs.io/en/latest/commandline.html) for
more detail.

### Via the `import_docs` Django command

If you've already exported data from the parser, you may import it from the
command line with the `import_docs` Django management command. It should be
given the root directory of the data as its only parameter. Note that this
does not require a running API.

```bash
$ python manage.py import_reg -r <regulation-number> -s <path/to/stub/root>
$ ls /path/to/data-root
diff layer notice regulation
$ python manage.py import_docs /path/to/data-root
```

For an example of JSON content, see [`regulations-stub`](https://github.com/cfpb/regulations-stub/)
### Via curl

You may also simulate sending data to a running API via curl, if you've
exported data from the parser. For example, if the API is running on the
localhost, port 8000, you could run:

```bash
$ cd /path/to/data-root
$ ls
diff layer notice regulation
$ for TAIL in $(find */* -type f | sort -r) \
do \
curl -X PUT http://localhost:8000/$TAIL -d @$TAIL \
done
```

## Running Tests

Expand Down
52 changes: 52 additions & 0 deletions regcore/management/commands/import_docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import logging
import os

from django.core.management.base import BaseCommand
from django.test import Client, override_settings


logger = logging.getLogger(__name__)


def scoped_files(root):
"""Find all of the files which will need to be "uploaded"; trim them down
to their suffix and separate their path components. We'll assume that
`root` has no trailing slash"""
for path, _, file_names in os.walk(root):
for file_name in file_names:
file_path = os.path.join(path, file_name)
trimmed = file_path[len(root):]
yield trimmed.split(os.sep)


def save_file(root, file_parts):
"""Given a file (indicated by a root file path and a set of file path
components), read the file from disk and write it to the database. Log
results."""
file_path = os.path.join(root, *file_parts)
with open(file_path, 'rb') as f:
content = f.read()
result = Client().put('/'.join(file_parts), data=content,
content_type='application/json')
if result.status_code == 204:
logger.info('Saved %s', file_path)
else:
logger.error('Failed to save %s: (%s), %s',
file_path, result.status_code, result.content[:100])


class Command(BaseCommand):
help = "Import a collection of JSON files into the database."

def add_arguments(self, parser):
parser.add_argument(
'base_dir', default=os.getcwd(), nargs='?',
help='the base filesystem path for importing JSON files'
)

@override_settings(ROOT_URLCONF='regcore.urls', ALLOWED_HOSTS=['*'])
def handle(self, *args, **options):
root = options['base_dir'].rstrip(os.sep)

for file_parts in scoped_files(root):
save_file(root, file_parts)
214 changes: 0 additions & 214 deletions regcore/management/commands/import_reg.py

This file was deleted.

Empty file.
Empty file.
53 changes: 53 additions & 0 deletions regcore/tests/management/commands/import_docs_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from mock import Mock

from regcore.management.commands import import_docs


def test_scoped_files(tmpdir):
"""We get the file path components for all created files, regardless of
how deep in the directory structure"""
a = tmpdir.mkdir('a')
b = tmpdir.mkdir('b')
a.mkdir('1')
a.mkdir('2')
b.mkdir('1').mkdir('i')

tmpdir.ensure('a', '1', 'i')
tmpdir.ensure('a', '1', 'ii')
tmpdir.ensure('a', '2', 'i')
tmpdir.ensure('b', '1', 'i', 'A')
tmpdir.ensure('b', '1', 'i', 'B')
tmpdir.ensure('b', '1', 'i', 'C')

result = {tuple(file_parts)
for file_parts in import_docs.scoped_files(str(tmpdir))}
# These should always begin with an empty string due to leaving in the
# trailing slash
assert result == {
('', 'a', '1', 'i'), ('', 'a', '1', 'ii'),
('', 'a', '2', 'i'),
('', 'b', '1', 'i', 'A'), ('', 'b', '1', 'i', 'B'),
('', 'b', '1', 'i', 'C'),
}


def test_save_file(monkeypatch, tmpdir):
"""Saving a file should send it to a corresponding url"""
monkeypatch.setattr(import_docs, 'Client', Mock())
monkeypatch.setattr(import_docs, 'logger', Mock())
# Client().put
put = import_docs.Client.return_value.put
put.return_value.status_code = 204

tmpdir.mkdir('a').mkdir('1').join('i').write(b'content')
import_docs.save_file(str(tmpdir), ['', 'a', '1', 'i'])
assert put.call_args == (
('/a/1/i',), {'data': b'content', 'content_type': 'application/json'})
assert import_docs.logger.info.called

put.reset_mock()
put.return_value.status_code = 404
put.return_value.content = 'a'*1000
import_docs.save_file(str(tmpdir), ['', 'a', '1', 'i'])
assert import_docs.logger.error.called
assert import_docs.logger.error.call_args[0][3] == 'a'*100 # trimmed

0 comments on commit c7c1317

Please sign in to comment.