Skip to content
This repository has been archived by the owner on Feb 16, 2023. It is now read-only.

Commit

Permalink
Add suggest
Browse files Browse the repository at this point in the history
  • Loading branch information
sbrunner committed May 30, 2021
1 parent 7dd9571 commit 2ae4a78
Show file tree
Hide file tree
Showing 3 changed files with 178 additions and 30 deletions.
27 changes: 24 additions & 3 deletions src/documents/management/commands/document_retagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,20 @@ def add_arguments(self, parser):
action="store_true",
help="If set, the progress bar will not be shown"
)
parser.add_argument(
"--suggest",
default=False,
action="store_true",
help="Return the suggestion, don't change anything."
)
parser.add_argument(
"--base-url",
help="The base URL to use to build the link to the documents."
)

def handle(self, *args, **options):
# Detect if we support color
color = self.style.ERROR("test") != "test"

if options["inbox_only"]:
queryset = Document.objects.filter(tags__is_inbox_tag=True)
Expand All @@ -85,18 +97,27 @@ def handle(self, *args, **options):
document=document,
classifier=classifier,
replace=options['overwrite'],
use_first=options['use_first'])
use_first=options['use_first'],
suggest=options['suggest'],
base_url=options['base_url'],
color=color)

if options['document_type']:
set_document_type(sender=None,
document=document,
classifier=classifier,
replace=options['overwrite'],
use_first=options['use_first'])
use_first=options['use_first'],
suggest=options['suggest'],
base_url=options['base_url'],
color=color)

if options['tags']:
set_tags(
sender=None,
document=document,
classifier=classifier,
replace=options['overwrite'])
replace=options['overwrite'],
suggest=options['suggest'],
base_url=options['base_url'],
color=color)
122 changes: 100 additions & 22 deletions src/documents/signals/handlers.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
import logging
import os

from django.utils import termcolors
from django.conf import settings
from django.contrib.admin.models import ADDITION, LogEntry
from django.contrib.auth.models import User
from django.contrib.contenttypes.models import ContentType
from django.db import models, DatabaseError
from django.db.models import Q
from django.dispatch import receiver
from django.utils import timezone
from django.utils import termcolors, timezone
from filelock import FileLock

from .. import matching
from ..file_handling import delete_empty_directories, \
create_source_path_directory, \
generate_unique_filename
from ..models import Document, Tag
from ..models import Document, Tag, MatchingModel


logger = logging.getLogger("paperless.handlers")
Expand All @@ -32,6 +33,9 @@ def set_correspondent(sender,
classifier=None,
replace=False,
use_first=True,
suggest=False,
base_url=None,
color=False,
**kwargs):
if document.correspondent and not replace:
return
Expand Down Expand Up @@ -60,13 +64,31 @@ def set_correspondent(sender,
return

if selected or replace:
logger.info(
f"Assigning correspondent {selected} to {document}",
extra={'group': logging_group}
)
if suggest:
if base_url:
print(
termcolors.colorize(str(document), fg='green')
if color
else str(document)
)
print(f"{base_url}/documents/{document.pk}")
else:
print(
(
termcolors.colorize(str(document), fg='green')
if color
else str(document)
) + f" [{document.pk}]"
)
print(f"Suggest correspondent {selected}")
else:
logger.info(
f"Assigning correspondent {selected} to {document}",
extra={'group': logging_group}
)

document.correspondent = selected
document.save(update_fields=("correspondent",))
document.correspondent = selected
document.save(update_fields=("correspondent",))


def set_document_type(sender,
Expand All @@ -75,6 +97,9 @@ def set_document_type(sender,
classifier=None,
replace=False,
use_first=True,
suggest=False,
base_url=None,
color=False,
**kwargs):
if document.document_type and not replace:
return
Expand Down Expand Up @@ -104,20 +129,41 @@ def set_document_type(sender,
return

if selected or replace:
logger.info(
f"Assigning document type {selected} to {document}",
extra={'group': logging_group}
)
if suggest:
if base_url:
print(
termcolors.colorize(str(document), fg='green')
if color
else str(document)
)
print(f"{base_url}/documents/{document.pk}")
else:
print(
(
termcolors.colorize(str(document), fg='green')
if color
else str(document)
) + f" [{document.pk}]"
)
print(f"Sugest document type {selected}")
else:
logger.info(
f"Assigning document type {selected} to {document}",
extra={'group': logging_group}
)

document.document_type = selected
document.save(update_fields=("document_type",))
document.document_type = selected
document.save(update_fields=("document_type",))


def set_tags(sender,
document=None,
logging_group=None,
classifier=None,
replace=False,
suggest=False,
base_url=None,
color=False,
**kwargs):

if replace:
Expand All @@ -132,16 +178,48 @@ def set_tags(sender,

relevant_tags = set(matched_tags) - current_tags

if not relevant_tags:
return
if suggest:
extra_tags = current_tags - set(matched_tags)
extra_tags = [
t for t in extra_tags
if t.matching_algorithm == MatchingModel.MATCH_AUTO
]
if not relevant_tags and not extra_tags:
return
if base_url:
print(
termcolors.colorize(str(document), fg='green')
if color
else str(document)
)
print(f"{base_url}/documents/{document.pk}")
else:
print(
(
termcolors.colorize(str(document), fg='green')
if color
else str(document)
) + f" [{document.pk}]"
)
if relevant_tags:
print(
"Suggest tags: " + ", ".join([t.name for t in relevant_tags])
)
if extra_tags:
print("Extra tags: " + ", ".join([t.name for t in extra_tags]))
else:
if not relevant_tags:
return

message = 'Tagging "{}" with "{}"'
logger.info(
message.format(document, ", ".join([t.name for t in relevant_tags])),
extra={'group': logging_group}
)
message = 'Tagging "{}" with "{}"'
logger.info(
message.format(
document, ", ".join([t.name for t in relevant_tags])
),
extra={'group': logging_group}
)

document.tags.add(*relevant_tags)
document.tags.add(*relevant_tags)


@receiver(models.signals.post_delete, sender=Document)
Expand Down
59 changes: 54 additions & 5 deletions src/documents/tests/test_management_retagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,17 @@ def make_models(self):
self.d1 = Document.objects.create(checksum="A", title="A", content="first document")
self.d2 = Document.objects.create(checksum="B", title="B", content="second document")
self.d3 = Document.objects.create(checksum="C", title="C", content="unrelated document")
self.d4 = Document.objects.create(checksum="D", title="D", content="auto document")

self.tag_first = Tag.objects.create(name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY)
self.tag_second = Tag.objects.create(name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY)
self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True)
self.tag_no_match = Tag.objects.create(name="test2")
self.tag_auto = Tag.objects.create(name="tagauto", matching_algorithm=Tag.MATCH_AUTO)

self.d3.tags.add(self.tag_inbox)
self.d3.tags.add(self.tag_no_match)
self.d4.tags.add(self.tag_auto)


self.correspondent_first = Correspondent.objects.create(
Expand All @@ -32,33 +35,35 @@ def make_models(self):
name="dt2", match="second", matching_algorithm=DocumentType.MATCH_ANY)

def get_updated_docs(self):
return Document.objects.get(title="A"), Document.objects.get(title="B"), Document.objects.get(title="C")
return Document.objects.get(title="A"), Document.objects.get(title="B"), \
Document.objects.get(title="C"), Document.objects.get(title="D")

def setUp(self) -> None:
super(TestRetagger, self).setUp()
self.make_models()

def test_add_tags(self):
call_command('document_retagger', '--tags')
d_first, d_second, d_unrelated = self.get_updated_docs()
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()

self.assertEqual(d_first.tags.count(), 1)
self.assertEqual(d_second.tags.count(), 1)
self.assertEqual(d_unrelated.tags.count(), 2)
self.assertEqual(d_auto.tags.count(), 1)

self.assertEqual(d_first.tags.first(), self.tag_first)
self.assertEqual(d_second.tags.first(), self.tag_second)

def test_add_type(self):
call_command('document_retagger', '--document_type')
d_first, d_second, d_unrelated = self.get_updated_docs()
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()

self.assertEqual(d_first.document_type, self.doctype_first)
self.assertEqual(d_second.document_type, self.doctype_second)

def test_add_correspondent(self):
call_command('document_retagger', '--correspondent')
d_first, d_second, d_unrelated = self.get_updated_docs()
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()

self.assertEqual(d_first.correspondent, self.correspondent_first)
self.assertEqual(d_second.correspondent, self.correspondent_second)
Expand All @@ -68,11 +73,55 @@ def test_overwrite_preserve_inbox(self):

call_command('document_retagger', '--tags', '--overwrite')

d_first, d_second, d_unrelated = self.get_updated_docs()
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()

self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id))

self.assertCountEqual([tag.id for tag in d_first.tags.all()], [self.tag_first.id])
self.assertCountEqual([tag.id for tag in d_second.tags.all()], [self.tag_second.id])
self.assertCountEqual([tag.id for tag in d_unrelated.tags.all()], [self.tag_inbox.id, self.tag_no_match.id])
self.assertEqual(d_auto.tags.count(), 0)

def test_add_tags_suggest(self):
call_command('document_retagger', '--tags', '--suggest')
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()

self.assertEqual(d_first.tags.count(), 0)
self.assertEqual(d_second.tags.count(), 0)
self.assertEqual(d_auto.tags.count(), 1)

def test_add_type_suggest(self):
call_command('document_retagger', '--document_type', '--suggest')
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()

self.assertEqual(d_first.document_type, None)
self.assertEqual(d_second.document_type, None)

def test_add_correspondent_suggest(self):
call_command('document_retagger', '--correspondent', '--suggest')
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()

self.assertEqual(d_first.correspondent, None)
self.assertEqual(d_second.correspondent, None)

def test_add_tags_suggest_url(self):
call_command('document_retagger', '--tags', '--suggest', '--base-url=http://localhost')
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()

self.assertEqual(d_first.tags.count(), 0)
self.assertEqual(d_second.tags.count(), 0)
self.assertEqual(d_auto.tags.count(), 1)

def test_add_type_suggest_url(self):
call_command('document_retagger', '--document_type', '--suggest', '--base-url=http://localhost')
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()

self.assertEqual(d_first.document_type, None)
self.assertEqual(d_second.document_type, None)

def test_add_correspondent_suggest_url(self):
call_command('document_retagger', '--correspondent', '--suggest', '--base-url=http://localhost')
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()

self.assertEqual(d_first.correspondent, None)
self.assertEqual(d_second.correspondent, None)

0 comments on commit 2ae4a78

Please sign in to comment.