Skip to content

Manual validation #1097

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 26 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
b8cfb0e
Start working on validation API
OskarPersson Dec 16, 2019
9cf3504
Add information_package field to ValidatorWorkflowSerializer
OskarPersson Dec 16, 2019
428f9b1
Use empty list as standard form for validators
OskarPersson Dec 16, 2019
9832b1c
Add Validate task and update view and serializers
OskarPersson Dec 16, 2019
dda23ef
form => get_form
OskarPersson Dec 16, 2019
7c24262
Add form and serializer to XMLComparisonValidator
OskarPersson Dec 16, 2019
745f554
Add form and serializer to XMLSyntaxValidator
OskarPersson Dec 16, 2019
e864aa8
Remove old validation objects when retrying Validate task
OskarPersson Dec 16, 2019
7ee7bf9
Add purpose to validation workflow serializer
OskarPersson Dec 16, 2019
f262ff5
Remove required field from recursive field in form
OskarPersson Dec 16, 2019
ed399ac
Add manual validation functionality
Dec 16, 2019
630618a
Do not apply filtering on validations list if there is no validation …
Dec 16, 2019
b6056c9
Validate path
OskarPersson Dec 17, 2019
36d7001
Make purpose not required
Dec 17, 2019
526b6dc
Add base url attribute to validation component
Jan 7, 2020
5f4b9ea
Add tests for FilePathField
OskarPersson Jan 10, 2020
78a7db7
Add tests for validation viewsets
OskarPersson Jan 10, 2020
5f109ef
Remove DiffCheckValidatorSerializer
OskarPersson Mar 19, 2020
337acac
Fix serializers, validators and tests
OskarPersson Mar 20, 2020
3fec58c
Generalize serializers, more testing
OskarPersson Mar 20, 2020
f30bb33
Fix ValidateLogicalPhysicalRepresentation and CompareXMLFiles
OskarPersson Mar 20, 2020
0510e0d
Working GUI, improved error reporting and parallel validation
OskarPersson Mar 20, 2020
8b25e4f
Merge branch 'master' into manual-validator-workflow
OskarPersson Mar 20, 2020
5d35bf3
Fix tests
OskarPersson Mar 23, 2020
e56ae43
Merge branch 'master' into manual-validator-workflow
OskarPersson Mar 30, 2020
d2d3b05
Merge branch 'master' into manual-validator-workflow
OskarPersson Mar 31, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions ESSArch_Core/api/fields.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import os

from django.utils.translation import gettext_lazy as _
from rest_framework import serializers


class FilePathField(serializers.CharField):
default_error_messages = {
'invalid_path': _('{input} is not a valid path.'),
}

def __init__(self, path, **kwargs):
self.path = path
super().__init__(**kwargs)

def to_internal_value(self, data):
data = super().to_internal_value(data)
if not os.path.exists(os.path.join(self.path, data)):
self.fail('invalid_path', input=data)

return os.path.join(self.path, data)
20 changes: 20 additions & 0 deletions ESSArch_Core/api/tests/test_fields.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import os

from rest_framework import serializers
from rest_framework.test import APITestCase

from ESSArch_Core.api.fields import FilePathField


class FilePathFieldTests(APITestCase):
@classmethod
def setUpTestData(cls):
cls.field = FilePathField(os.path.abspath(os.path.dirname(__file__)))

def test_valid_path(self):
self.assertEqual(self.field.run_validation(__file__), __file__)
self.assertEqual(self.field.run_validation(os.path.basename(__file__)), __file__)

def test_invalid_path(self):
with self.assertRaises(serializers.ValidationError):
self.field.run_validation('invalid_file')
1 change: 1 addition & 0 deletions ESSArch_Core/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,7 @@
RABBITMQ_URL = os.environ.get('RABBITMQ_URL_ESSARCH', 'amqp://guest:guest@localhost:5672')
CELERY_BROKER_URL = RABBITMQ_URL
CELERY_IMPORTS = (
"ESSArch_Core.fixity.validation.tasks",
"ESSArch_Core.ip.tasks",
"ESSArch_Core.maintenance.tasks",
"ESSArch_Core.preingest.tasks",
Expand Down
3 changes: 2 additions & 1 deletion ESSArch_Core/config/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
ConversionToolViewSet,
ValidationFilesViewSet,
ValidationViewSet,
ValidatorViewSet,
)
from ESSArch_Core.ip.views import (
ConsignMethodViewSet,
Expand Down Expand Up @@ -295,7 +296,6 @@


router.register(r'organizations', OrganizationViewSet, basename='organizations')

router.register(r'appraisal-jobs', AppraisalJobViewSet).register(
r'information-packages',
AppraisalJobInformationPackageViewSet,
Expand All @@ -321,6 +321,7 @@
router.register(r'conversion-templates', ConversionTemplateViewSet)
router.register(r'conversion-tools', ConversionToolViewSet)
router.register(r'features', FeatureViewSet, basename='features')
router.register(r'validators', ValidatorViewSet, basename='validators')
router.register(r'validations', ValidationViewSet)
router.register(r'events', EventIPViewSet)
router.register(r'event-types', EventTypeViewSet)
Expand Down
42 changes: 42 additions & 0 deletions ESSArch_Core/fixity/serializers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from rest_framework import serializers

from ESSArch_Core.fixity.models import ConversionTool, Validation
from ESSArch_Core.fixity.validation import get_backend as get_validator
from ESSArch_Core.ip.models import InformationPackage


class ConversionToolSerializer(serializers.ModelSerializer):
Expand All @@ -11,6 +13,46 @@ class Meta:
fields = ('name', 'form',)


class ValidatorWorkflowSerializer(serializers.Serializer):
purpose = serializers.CharField(default='Validation')
information_package = serializers.PrimaryKeyRelatedField(queryset=InformationPackage.objects.all())
validators = serializers.ListField(min_length=1, child=serializers.JSONField())

def validate_validators(self, validators):
new_data = []
ip = self.context['request'].data.get('information_package', None)
sub_context = {'information_package': ip}
sub_context.update(self.context)

for validator in validators:
name = validator.pop('name')
klass = get_validator(name)

serializer = klass.get_serializer_class()(
data=validator, context=sub_context,
)
serializer.is_valid(True)
data = serializer.validated_data
data['name'] = name

options_data = validator.pop('options', {})
options_context = {
'information_package': ip,
'base_data': data,
}
options_serializer = klass.get_options_serializer_class()(
data=options_data,
context=options_context,
)

options_serializer.is_valid(True)
data['options'] = options_serializer.validated_data

new_data.append(data)

return new_data


class ValidationSerializer(serializers.ModelSerializer):
specification = serializers.JSONField(read_only=True)

Expand Down
13 changes: 13 additions & 0 deletions ESSArch_Core/fixity/tests/test_views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from django.contrib.auth import get_user_model
from django.urls import reverse
from rest_framework import status
from rest_framework.test import APITestCase

User = get_user_model()


class ValidatorViewSetTests(APITestCase):
def test_list(self):
url = reverse('validators-list')
response = self.client.get(url)
self.assertEqual(response.status_code, status.HTTP_200_OK)
20 changes: 12 additions & 8 deletions ESSArch_Core/fixity/validation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,15 @@
PATH_VARIABLE = "_PATH"


def get_backend(name):
try:
module_name, klass = AVAILABLE_VALIDATORS[name].rsplit('.', 1)
except KeyError:
raise ValueError('Validator "%s" not found' % name)

return getattr(importlib.import_module(module_name), klass)


def _validate_file(path, validators, task=None, ip=None, stop_at_failure=True, responsible=None):
for validator in validators:
included = False
Expand Down Expand Up @@ -94,21 +103,16 @@ def validate_path(path, validators, profile, data=None, task=None, ip=None, stop
validator_instances = []

for name in validators:
try:
module_name, validator_class = AVAILABLE_VALIDATORS[name].rsplit('.', 1)
except KeyError:
raise ValueError('Validator "%s" not found' % name)

validator = getattr(importlib.import_module(module_name), validator_class)

validator_klass = get_backend(name)
for specification in profile.specification.get(name, []):
required = specification.get('required', True)
context = specification.get('context')
include = [os.path.join(path, included) for included in specification.get('include', [])]
exclude = [os.path.join(path, excluded) for excluded in specification.get('exclude', [])]
options = specification.get('options', {})

validator_instance = validator(
validator_instance = validator_klass(
name,
context=context,
include=include,
exclude=exclude,
Expand Down
25 changes: 25 additions & 0 deletions ESSArch_Core/fixity/validation/backends/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import click
from rest_framework import serializers

from ESSArch_Core.api.fields import FilePathField


class BaseValidator:
Expand All @@ -19,6 +22,28 @@ def __init__(self, context=None, include=None, exclude=None, options=None,
self.ip = ip
self.responsible = responsible

class Serializer(serializers.Serializer):
context = serializers.CharField()

def __init__(self, *args, **kwargs):
from ESSArch_Core.ip.models import InformationPackage

super().__init__(*args, **kwargs)
ip_pk = kwargs['context']['information_package']
ip = InformationPackage.objects.get(pk=ip_pk)
self.fields['path'] = FilePathField(ip.object_path, allow_blank=True, default='')

class OptionsSerializer(serializers.Serializer):
pass

@classmethod
def get_serializer_class(cls):
return cls.Serializer

@classmethod
def get_options_serializer_class(cls):
return cls.OptionsSerializer

def validate(self, filepath, expected=None):
raise NotImplementedError('subclasses of BaseValidator must provide a validate() method')

Expand Down
72 changes: 66 additions & 6 deletions ESSArch_Core/fixity/validation/backends/checksum.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import logging
import traceback
import os

from django.utils import timezone
from rest_framework import serializers

from ESSArch_Core.essxml.util import find_file
from ESSArch_Core.exceptions import ValidationError
Expand All @@ -25,6 +26,59 @@ class ChecksumValidator(BaseValidator):
* ``block_size``: Defaults to 65536
"""

label = 'Checksum Validator'

@classmethod
def get_form(cls):
return [
{
'key': 'path',
'type': 'input',
'templateOptions': {
'label': 'Path to validate',
'required': True,
}
},
{
'key': 'options.algorithm',
'type': 'select',
'defaultValue': 'SHA-256',
'templateOptions': {
'label': 'Checksum algorithm',
'required': True,
'labelProp': 'name',
'valueProp': 'value',
'options': [
{'name': 'MD5', 'value': 'MD5'},
{'name': 'SHA-1', 'value': 'SHA-1'},
{'name': 'SHA-224', 'value': 'SHA-224'},
{'name': 'SHA-256', 'value': 'SHA-256'},
{'name': 'SHA-384', 'value': 'SHA-384'},
{'name': 'SHA-512', 'value': 'SHA-512'},
]
}
},
{
'key': 'options.expected',
'type': 'input',
'templateOptions': {
'label': 'Checksum',
'required': True,
}
},
]

class Serializer(BaseValidator.Serializer):
context = serializers.CharField(default='checksum_str')
block_size = serializers.IntegerField(default=65536)

class OptionsSerializer(BaseValidator.OptionsSerializer):
expected = serializers.CharField()
algorithm = serializers.ChoiceField(
choices=['MD5', 'SHA-1', 'SHA-224', 'SHA-256', 'SHA-384', 'SHA-512'],
default='SHA-256',
)

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

Expand All @@ -36,8 +90,14 @@ def __init__(self, *args, **kwargs):

def validate(self, filepath, expected=None):
logger.debug('Validating checksum of %s' % filepath)

if self.ip is not None:
relpath = os.path.relpath(filepath, self.ip.object_path)
else:
relpath = filepath

val_obj = Validation.objects.create(
filename=filepath,
filename=relpath,
time_started=timezone.now(),
validator=self.__class__.__name__,
required=self.required,
Expand Down Expand Up @@ -66,14 +126,14 @@ def validate(self, filepath, expected=None):
actual_checksum = calculate_checksum(filepath, algorithm=self.algorithm, block_size=self.block_size)
if actual_checksum != checksum:
raise ValidationError("checksum for %s is not valid (%s != %s)" % (
filepath, checksum, actual_checksum
relpath, checksum, actual_checksum
))
passed = True
except Exception:
val_obj.message = traceback.format_exc()
except Exception as e:
val_obj.message = str(e)
raise
else:
message = 'Successfully validated checksum of %s' % filepath
message = 'Successfully validated checksum of %s' % relpath
val_obj.message = message
logger.info(message)
finally:
Expand Down
Loading