diff --git a/src/documents/management/commands/document_retagger.py b/src/documents/management/commands/document_retagger.py index ce3f94bf2..6636af20a 100644 --- a/src/documents/management/commands/document_retagger.py +++ b/src/documents/management/commands/document_retagger.py @@ -63,8 +63,20 @@ def add_arguments(self, parser): action="store_true", help="If set, the progress bar will not be shown" ) + parser.add_argument( + "--suggest", + default=False, + action="store_true", + help="Return the suggestion, don't change anything." + ) + parser.add_argument( + "--base-url", + help="The base URL to use to build the link to the documents." + ) def handle(self, *args, **options): + # Detect if we support color + color = self.style.ERROR("test") != "test" if options["inbox_only"]: queryset = Document.objects.filter(tags__is_inbox_tag=True) @@ -85,18 +97,27 @@ def handle(self, *args, **options): document=document, classifier=classifier, replace=options['overwrite'], - use_first=options['use_first']) + use_first=options['use_first'], + suggest=options['suggest'], + base_url=options['base_url'], + color=color) if options['document_type']: set_document_type(sender=None, document=document, classifier=classifier, replace=options['overwrite'], - use_first=options['use_first']) + use_first=options['use_first'], + suggest=options['suggest'], + base_url=options['base_url'], + color=color) if options['tags']: set_tags( sender=None, document=document, classifier=classifier, - replace=options['overwrite']) + replace=options['overwrite'], + suggest=options['suggest'], + base_url=options['base_url'], + color=color) diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index f46adbb16..8fb8faf51 100644 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -1,6 +1,7 @@ import logging import os +from django.utils import termcolors from django.conf import settings from django.contrib.admin.models import ADDITION, LogEntry from django.contrib.auth.models import User @@ -8,14 +9,14 @@ from django.db import models, DatabaseError from django.db.models import Q from django.dispatch import receiver -from django.utils import timezone +from django.utils import termcolors, timezone from filelock import FileLock from .. import matching from ..file_handling import delete_empty_directories, \ create_source_path_directory, \ generate_unique_filename -from ..models import Document, Tag +from ..models import Document, Tag, MatchingModel logger = logging.getLogger("paperless.handlers") @@ -32,6 +33,9 @@ def set_correspondent(sender, classifier=None, replace=False, use_first=True, + suggest=False, + base_url=None, + color=False, **kwargs): if document.correspondent and not replace: return @@ -60,13 +64,31 @@ def set_correspondent(sender, return if selected or replace: - logger.info( - f"Assigning correspondent {selected} to {document}", - extra={'group': logging_group} - ) + if suggest: + if base_url: + print( + termcolors.colorize(str(document), fg='green') + if color + else str(document) + ) + print(f"{base_url}/documents/{document.pk}") + else: + print( + ( + termcolors.colorize(str(document), fg='green') + if color + else str(document) + ) + f" [{document.pk}]" + ) + print(f"Suggest correspondent {selected}") + else: + logger.info( + f"Assigning correspondent {selected} to {document}", + extra={'group': logging_group} + ) - document.correspondent = selected - document.save(update_fields=("correspondent",)) + document.correspondent = selected + document.save(update_fields=("correspondent",)) def set_document_type(sender, @@ -75,6 +97,9 @@ def set_document_type(sender, classifier=None, replace=False, use_first=True, + suggest=False, + base_url=None, + color=False, **kwargs): if document.document_type and not replace: return @@ -104,13 +129,31 @@ def set_document_type(sender, return if selected or replace: - logger.info( - f"Assigning document type {selected} to {document}", - extra={'group': logging_group} - ) + if suggest: + if base_url: + print( + termcolors.colorize(str(document), fg='green') + if color + else str(document) + ) + print(f"{base_url}/documents/{document.pk}") + else: + print( + ( + termcolors.colorize(str(document), fg='green') + if color + else str(document) + ) + f" [{document.pk}]" + ) + print(f"Sugest document type {selected}") + else: + logger.info( + f"Assigning document type {selected} to {document}", + extra={'group': logging_group} + ) - document.document_type = selected - document.save(update_fields=("document_type",)) + document.document_type = selected + document.save(update_fields=("document_type",)) def set_tags(sender, @@ -118,6 +161,9 @@ def set_tags(sender, logging_group=None, classifier=None, replace=False, + suggest=False, + base_url=None, + color=False, **kwargs): if replace: @@ -132,16 +178,48 @@ def set_tags(sender, relevant_tags = set(matched_tags) - current_tags - if not relevant_tags: - return + if suggest: + extra_tags = current_tags - set(matched_tags) + extra_tags = [ + t for t in extra_tags + if t.matching_algorithm == MatchingModel.MATCH_AUTO + ] + if not relevant_tags and not extra_tags: + return + if base_url: + print( + termcolors.colorize(str(document), fg='green') + if color + else str(document) + ) + print(f"{base_url}/documents/{document.pk}") + else: + print( + ( + termcolors.colorize(str(document), fg='green') + if color + else str(document) + ) + f" [{document.pk}]" + ) + if relevant_tags: + print( + "Suggest tags: " + ", ".join([t.name for t in relevant_tags]) + ) + if extra_tags: + print("Extra tags: " + ", ".join([t.name for t in extra_tags])) + else: + if not relevant_tags: + return - message = 'Tagging "{}" with "{}"' - logger.info( - message.format(document, ", ".join([t.name for t in relevant_tags])), - extra={'group': logging_group} - ) + message = 'Tagging "{}" with "{}"' + logger.info( + message.format( + document, ", ".join([t.name for t in relevant_tags]) + ), + extra={'group': logging_group} + ) - document.tags.add(*relevant_tags) + document.tags.add(*relevant_tags) @receiver(models.signals.post_delete, sender=Document) diff --git a/src/documents/tests/test_management_retagger.py b/src/documents/tests/test_management_retagger.py index 907a23d09..39e9c80b7 100644 --- a/src/documents/tests/test_management_retagger.py +++ b/src/documents/tests/test_management_retagger.py @@ -11,14 +11,17 @@ def make_models(self): self.d1 = Document.objects.create(checksum="A", title="A", content="first document") self.d2 = Document.objects.create(checksum="B", title="B", content="second document") self.d3 = Document.objects.create(checksum="C", title="C", content="unrelated document") + self.d4 = Document.objects.create(checksum="D", title="D", content="auto document") self.tag_first = Tag.objects.create(name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY) self.tag_second = Tag.objects.create(name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY) self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True) self.tag_no_match = Tag.objects.create(name="test2") + self.tag_auto = Tag.objects.create(name="tagauto", matching_algorithm=Tag.MATCH_AUTO) self.d3.tags.add(self.tag_inbox) self.d3.tags.add(self.tag_no_match) + self.d4.tags.add(self.tag_auto) self.correspondent_first = Correspondent.objects.create( @@ -32,7 +35,8 @@ def make_models(self): name="dt2", match="second", matching_algorithm=DocumentType.MATCH_ANY) def get_updated_docs(self): - return Document.objects.get(title="A"), Document.objects.get(title="B"), Document.objects.get(title="C") + return Document.objects.get(title="A"), Document.objects.get(title="B"), \ + Document.objects.get(title="C"), Document.objects.get(title="D") def setUp(self) -> None: super(TestRetagger, self).setUp() @@ -40,25 +44,26 @@ def setUp(self) -> None: def test_add_tags(self): call_command('document_retagger', '--tags') - d_first, d_second, d_unrelated = self.get_updated_docs() + d_first, d_second, d_unrelated, d_auto = self.get_updated_docs() self.assertEqual(d_first.tags.count(), 1) self.assertEqual(d_second.tags.count(), 1) self.assertEqual(d_unrelated.tags.count(), 2) + self.assertEqual(d_auto.tags.count(), 1) self.assertEqual(d_first.tags.first(), self.tag_first) self.assertEqual(d_second.tags.first(), self.tag_second) def test_add_type(self): call_command('document_retagger', '--document_type') - d_first, d_second, d_unrelated = self.get_updated_docs() + d_first, d_second, d_unrelated, d_auto = self.get_updated_docs() self.assertEqual(d_first.document_type, self.doctype_first) self.assertEqual(d_second.document_type, self.doctype_second) def test_add_correspondent(self): call_command('document_retagger', '--correspondent') - d_first, d_second, d_unrelated = self.get_updated_docs() + d_first, d_second, d_unrelated, d_auto = self.get_updated_docs() self.assertEqual(d_first.correspondent, self.correspondent_first) self.assertEqual(d_second.correspondent, self.correspondent_second) @@ -68,11 +73,55 @@ def test_overwrite_preserve_inbox(self): call_command('document_retagger', '--tags', '--overwrite') - d_first, d_second, d_unrelated = self.get_updated_docs() + d_first, d_second, d_unrelated, d_auto = self.get_updated_docs() self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id)) self.assertCountEqual([tag.id for tag in d_first.tags.all()], [self.tag_first.id]) self.assertCountEqual([tag.id for tag in d_second.tags.all()], [self.tag_second.id]) self.assertCountEqual([tag.id for tag in d_unrelated.tags.all()], [self.tag_inbox.id, self.tag_no_match.id]) + self.assertEqual(d_auto.tags.count(), 0) + def test_add_tags_suggest(self): + call_command('document_retagger', '--tags', '--suggest') + d_first, d_second, d_unrelated, d_auto = self.get_updated_docs() + + self.assertEqual(d_first.tags.count(), 0) + self.assertEqual(d_second.tags.count(), 0) + self.assertEqual(d_auto.tags.count(), 1) + + def test_add_type_suggest(self): + call_command('document_retagger', '--document_type', '--suggest') + d_first, d_second, d_unrelated, d_auto = self.get_updated_docs() + + self.assertEqual(d_first.document_type, None) + self.assertEqual(d_second.document_type, None) + + def test_add_correspondent_suggest(self): + call_command('document_retagger', '--correspondent', '--suggest') + d_first, d_second, d_unrelated, d_auto = self.get_updated_docs() + + self.assertEqual(d_first.correspondent, None) + self.assertEqual(d_second.correspondent, None) + + def test_add_tags_suggest_url(self): + call_command('document_retagger', '--tags', '--suggest', '--base-url=http://localhost') + d_first, d_second, d_unrelated, d_auto = self.get_updated_docs() + + self.assertEqual(d_first.tags.count(), 0) + self.assertEqual(d_second.tags.count(), 0) + self.assertEqual(d_auto.tags.count(), 1) + + def test_add_type_suggest_url(self): + call_command('document_retagger', '--document_type', '--suggest', '--base-url=http://localhost') + d_first, d_second, d_unrelated, d_auto = self.get_updated_docs() + + self.assertEqual(d_first.document_type, None) + self.assertEqual(d_second.document_type, None) + + def test_add_correspondent_suggest_url(self): + call_command('document_retagger', '--correspondent', '--suggest', '--base-url=http://localhost') + d_first, d_second, d_unrelated, d_auto = self.get_updated_docs() + + self.assertEqual(d_first.correspondent, None) + self.assertEqual(d_second.correspondent, None)