Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
8f6c43e
Added flyway script to fix database sequences
milanmajchrak Aug 12, 2025
1394297
Created submission-processes and submission-maps
milanmajchrak Aug 13, 2025
58fbfb3
Validated submission-forms
milanmajchrak Aug 13, 2025
8ed5fa3
Copied cfgs from v5 and templates
milanmajchrak Aug 13, 2025
fd0b286
Created scripts for generating evyuka_forms and for fetching vocabula…
milanmajchrak Aug 13, 2025
ef927e7
Created python script for copying vocabularies and generated fresh vo…
milanmajchrak Aug 13, 2025
6a72170
Generated forms using python script
milanmajchrak Aug 13, 2025
9da1823
Updated README - use python scripts
milanmajchrak Aug 13, 2025
96ec532
Added v7 forms
milanmajchrak Aug 13, 2025
de4b21b
Removed not existing forms
milanmajchrak Aug 13, 2025
97d7674
Added default qualifier tag
milanmajchrak Aug 13, 2025
403d269
temp current item-submission.xml
milanmajchrak Aug 13, 2025
1a90dae
Just problem with importing forms
milanmajchrak Aug 13, 2025
1e76a82
Updated fast dspace api package build
milanmajchrak Aug 14, 2025
ae89095
Created evyuka-types.xml for evyuka schema
milanmajchrak Aug 14, 2025
f728aad
Upload optional configuration fix
milanmajchrak Aug 14, 2025
dd1f513
Load content of the form-definitions from the external form definitio…
milanmajchrak Aug 14, 2025
a19724d
Do not create two types of the DCInputsReader, because the values (va…
milanmajchrak Aug 14, 2025
91987e5
Updated external form definitions and they are imported to the submis…
milanmajchrak Aug 14, 2025
f1dfe36
some changes
milanmajchrak Aug 15, 2025
b702b7e
good somehow
milanmajchrak Aug 15, 2025
e0b7f73
Another improvement in the cfg
milanmajchrak Aug 15, 2025
633c20a
Manually copied AUD vp and added it into README
milanmajchrak Aug 15, 2025
9d2c63e
Added missing imports
milanmajchrak Aug 15, 2025
8228027
The xml file cannot be empty
milanmajchrak Aug 15, 2025
45cae9b
The CLARIN versioning is used instead of vanilla one
milanmajchrak Aug 15, 2025
bb0e944
Fixed checkstyle issues
milanmajchrak Aug 15, 2025
506924e
Fixed testing submissino-forms
milanmajchrak Aug 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 203 additions & 0 deletions convert_forms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
#!/usr/bin/env python3
"""
Script to convert v5 DSpace form definitions to v7 format
"""
import os
import re
import xml.etree.ElementTree as ET
from pathlib import Path

def is_v5_form_file(file_path):
"""Check if a file contains v5 form definitions (has <page number=> structure)"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
return '<page number=' in content and '<form name=' in content
except:
return False

def is_v7_form_file(file_path):
"""Check if a file contains v7 form definitions (has <form-definitions> structure)"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
return '<form-definitions>' in content or ('<form name=' in content and '<row>' in content)
except:
return False

def extract_form_code_from_filename(filename):
"""Extract form code from filename patterns like evyuka_form_HGF.xml"""
# Match patterns like evyuka_form_XXX.xml, form_XXX.xml, etc.
patterns = [
r'evyuka_form_([^.]+)\.xml',
r'form_([^.]+)\.xml',
r'([^_]+)_form\.xml',
r'([^.]+)\.xml'
]

for pattern in patterns:
match = re.search(pattern, filename)
if match:
return match.group(1)

# Fallback: use filename without extension
return Path(filename).stem

def extract_form_name_from_content(content):
"""Extract the original form name from v5 content"""
form_match = re.search(r'<form name="([^"]+)">', content)
if form_match:
return form_match.group(1)
return None

def convert_form_to_v7(input_file, output_file, form_code=None):
"""Convert a v5 form file to v7 format"""

# Read the original v5 form
with open(input_file, 'r', encoding='utf-8') as f:
content = f.read()

# Extract form name from content
original_form_name = extract_form_name_from_content(content)
if not original_form_name:
print(f"Could not find form name in {input_file}")
return False

# If no form_code provided, try to extract from filename or use original form name
if not form_code:
form_code = extract_form_code_from_filename(input_file.name)

# Extract all fields from all pages
page_pattern = r'<page number="(\d+)">(.*?)</page>'
pages = re.findall(page_pattern, content, re.DOTALL)

if not pages:
print(f"No pages found in {input_file}")
return False

# Start building the v7 form
v7_content = '''<?xml version="1.0"?>
<!DOCTYPE form-definitions SYSTEM "submission-forms.dtd">

<form-definitions>
'''

# Process each page and distribute fields across 3 forms
all_fields = []
for page_num, page_content in pages:
field_pattern = r'<field>(.*?)</field>'
fields = re.findall(field_pattern, page_content, re.DOTALL)
all_fields.extend(fields)

if not all_fields:
print(f"No fields found in {input_file}")
return False

# Distribute fields across 3 pages (or the number of original pages if less than 3)
num_target_pages = max(3, len(pages))
fields_per_page = len(all_fields) // num_target_pages
remainder = len(all_fields) % num_target_pages

page_field_counts = [fields_per_page] * num_target_pages
for i in range(remainder):
page_field_counts[i] += 1

# Generate form names based on the original form name
page_names = []
if 'e-vyuka' in original_form_name:
# Handle evyuka forms specially
base_name = original_form_name.replace('e-vyuka-', '').replace('e-vyuka', form_code)
page_names = [f"e-vyuka-{form_code}page{word}" for word in ['one', 'two', 'three']]
else:
# Generic form naming
page_names = [f"{original_form_name}page{word}" for word in ['one', 'two', 'three']]

# Ensure we have enough page names
while len(page_names) < num_target_pages:
page_names.append(f"{original_form_name}page{len(page_names)+1}")

field_index = 0
for page_num in range(num_target_pages):
if page_num < len(page_names):
page_name = page_names[page_num]
else:
page_name = f"{original_form_name}page{page_num+1}"

v7_content += f' <form name="{page_name}">\n'

# Add fields for this page
fields_added = 0
target_fields = page_field_counts[page_num] if page_num < len(page_field_counts) else 0

while fields_added < target_fields and field_index < len(all_fields):
field_content = all_fields[field_index].strip()
# Convert field to row format
v7_content += ' <row>\n'
v7_content += ' <field>\n'

# Clean up the field content and add proper indentation
field_lines = field_content.split('\n')
for line in field_lines:
cleaned_line = line.strip()
if cleaned_line:
v7_content += ' ' + cleaned_line + '\n'

v7_content += ' </field>\n'
v7_content += ' </row>\n\n'
field_index += 1
fields_added += 1

v7_content += ' </form>\n\n'

v7_content += '</form-definitions>'

# Write the converted form
with open(output_file, 'w', encoding='utf-8') as f:
f.write(v7_content)

print(f"Converted {input_file} to v7 format -> {output_file}")
return True

def find_all_form_files(directory):
"""Find all XML files that contain DSpace form definitions"""
form_files = []
directory = Path(directory)

for xml_file in directory.glob('*.xml'):
if is_v5_form_file(xml_file):
form_files.append(xml_file)
elif is_v7_form_file(xml_file):
print(f"Skipping {xml_file.name} - already in v7 format")

return form_files

def main():
"""Main function to convert all form files"""
vsb_dir = Path('C:/dspace-be/dspace/config/vsb')

if not vsb_dir.exists():
print(f"Directory {vsb_dir} does not exist")
return

# Find all v5 form definition files
v5_form_files = find_all_form_files(vsb_dir)

if not v5_form_files:
print("No v5 form definition files found for conversion")
return

print(f"Found {len(v5_form_files)} v5 form files to convert:")
for file in v5_form_files:
print(f" - {file.name}")

# Convert each file
converted_count = 0
for input_file in v5_form_files:
form_code = extract_form_code_from_filename(input_file.name)
if convert_form_to_v7(input_file, input_file, form_code):
converted_count += 1

print(f"\nConversion complete: {converted_count}/{len(v5_form_files)} files converted successfully")

if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,11 @@ private void processDefinition(Node e)
if (rows.size() < 1) {
throw new DCInputsReaderException("Form " + formName + " has no rows");
}
} else if (nd.getNodeName().equals("form-definitions")) {
// Handle nested form-definitions elements (from XML entity expansion)
// Recursively process the nested form-definitions
processDefinition(nd);
numForms++; // Count this as having found forms to avoid the "No form definition found" error
}
}
if (numForms == 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,20 +96,33 @@ private static synchronized void initPluginNames() {
if (pluginNames == null) {
try {
dcis = new HashMap<Locale, DCInputsReader>();

// Add default locale to locales if not already present
Locale defaultLocale = I18nUtil.getDefaultLocale();
Set<Locale> localeSet = new HashSet<>(Arrays.asList(locales));
if (!localeSet.contains(defaultLocale)) {
localeSet.add(defaultLocale);
locales = localeSet.toArray(new Locale[0]);
}

for (Locale locale : locales) {
dcis.put(locale, new DCInputsReader(I18nUtil.getInputFormsFileName(locale)));
String inputFormsFileName = I18nUtil.getInputFormsFileName(locale);
if (inputFormsFileName != null) {
dcis.put(locale, new DCInputsReader(inputFormsFileName));
} else {
// Fallback to default submission-forms.xml for this locale
dcis.put(locale, new DCInputsReader());
}
}
for (Locale l : locales) {
Iterator pi = dcis.get(l).getPairsNameIterator();

// Collect all unique pair names from all locales
for (Locale l : dcis.keySet()) {
DCInputsReader dci = dcis.get(l);
Iterator pi = dci.getPairsNameIterator();
while (pi.hasNext()) {
names.add((String) pi.next());
}
}
DCInputsReader dcirDefault = new DCInputsReader();
Iterator pi = dcirDefault.getPairsNameIterator();
while (pi.hasNext()) {
names.add((String) pi.next());
}
} catch (DCInputsReaderException e) {
log.error("Failed reading DCInputs initialization: ", e);
}
Expand All @@ -124,10 +137,13 @@ private void init() {
values = new HashMap<String, String[]>();
labels = new HashMap<String, String[]>();
String pname = this.getPluginInstanceName();
boolean foundAnyPairs = false;

for (Locale l : dcis.keySet()) {
DCInputsReader dci = dcis.get(l);
List<String> pairs = dci.getPairs(pname);
if (pairs != null) {
foundAnyPairs = true;
String[] valuesLocale = new String[pairs.size() / 2];
String[]labelsLocale = new String[pairs.size() / 2];
for (int i = 0; i < pairs.size(); i += 2) {
Expand All @@ -137,11 +153,15 @@ private void init() {
values.put(l.getLanguage(), valuesLocale);
labels.put(l.getLanguage(), labelsLocale);
log.debug("Found pairs for name=" + pname + ",locale=" + l);
} else {
log.error("Failed to find any pairs for name=" + pname, new IllegalStateException());
}
}

if (!foundAnyPairs) {
log.error("Failed to find any pairs for name=" + pname + " in any locale", new IllegalStateException());
// Initialize empty arrays to prevent NPE
values.put(I18nUtil.getDefaultLocale().getLanguage(), new String[0]);
labels.put(I18nUtil.getDefaultLocale().getLanguage(), new String[0]);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
--
-- The contents of this file are subject to the license and copyright
-- detailed in the LICENSE and NOTICE files at the root of the source
-- tree and available online at
--
-- http://www.dspace.org/license/
--

-- ===============================================================
-- WARNING: This script is designed to fix sequence synchronization
-- issues that occur when migrating from DSpace v5 to DSpace v7.
-- It updates all sequences to start from the correct values based
-- on existing data in the database.
-- ===============================================================

-- This migration addresses the issue where sequences are not properly
-- updated after migration from DSpace v5, causing constraint violations
-- during the RegistryUpdater callback execution.

-- Fix all sequences to prevent constraint violations during registry updates
SELECT setval('alert_id_seq', COALESCE((SELECT max(alert_id) FROM systemwidealert), 1));
SELECT setval('bitstreamformatregistry_seq', COALESCE((SELECT max(bitstream_format_id) FROM bitstreamformatregistry), 1));
SELECT setval('checksum_history_check_id_seq', COALESCE((SELECT max(check_id) FROM checksum_history), 1));
SELECT setval('cwf_claimtask_seq', COALESCE((SELECT max(claimtask_id) FROM cwf_claimtask), 1));
SELECT setval('cwf_collectionrole_seq', COALESCE((SELECT max(collectionrole_id) FROM cwf_collectionrole), 1));
SELECT setval('cwf_in_progress_user_seq', COALESCE((SELECT max(in_progress_user_id) FROM cwf_in_progress_user), 1));
SELECT setval('cwf_pooltask_seq', COALESCE((SELECT max(pooltask_id) FROM cwf_pooltask), 1));
SELECT setval('cwf_workflowitem_seq', COALESCE((SELECT max(workflowitem_id) FROM cwf_workflowitem), 1));
SELECT setval('cwf_workflowitemrole_seq', COALESCE((SELECT max(workflowitemrole_id) FROM cwf_workflowitemrole), 1));
SELECT setval('doi_seq', COALESCE((SELECT max(doi_id) FROM doi), 1));
SELECT setval('entity_type_id_seq', COALESCE((SELECT max(id) FROM entity_type), 1));
SELECT setval('fileextension_seq', COALESCE((SELECT max(file_extension_id) FROM fileextension), 1));
SELECT setval('handle_id_seq', COALESCE((SELECT max(handle_id) FROM handle), 1));
SELECT setval('harvested_collection_seq', COALESCE((SELECT max(id) FROM harvested_collection), 1));
SELECT setval('harvested_item_seq', COALESCE((SELECT max(id) FROM harvested_item), 1));
SELECT setval('metadatafieldregistry_seq', COALESCE((SELECT max(metadata_field_id) FROM metadatafieldregistry), 1));
SELECT setval('metadataschemaregistry_seq', COALESCE((SELECT max(metadata_schema_id) FROM metadataschemaregistry), 1));
SELECT setval('metadatavalue_seq', COALESCE((SELECT max(metadata_value_id) FROM metadatavalue), 1));
SELECT setval('openurltracker_seq', COALESCE((SELECT max(tracker_id) FROM openurltracker), 1));
SELECT setval('orcid_history_id_seq', COALESCE((SELECT max(id) FROM orcid_history), 1));
SELECT setval('orcid_queue_id_seq', COALESCE((SELECT max(id) FROM orcid_queue), 1));
SELECT setval('orcid_token_id_seq', COALESCE((SELECT max(id) FROM orcid_token), 1));
SELECT setval('process_id_seq', COALESCE((SELECT max(process_id) FROM process), 1));
SELECT setval('registrationdata_seq', COALESCE((SELECT max(registrationdata_id) FROM registrationdata), 1));
SELECT setval('relationship_id_seq', COALESCE((SELECT max(id) FROM relationship), 1));
SELECT setval('relationship_type_id_seq', COALESCE((SELECT max(id) FROM relationship_type), 1));
SELECT setval('requestitem_seq', COALESCE((SELECT max(requestitem_id) FROM requestitem), 1));
SELECT setval('resourcepolicy_seq', COALESCE((SELECT max(policy_id) FROM resourcepolicy), 1));
SELECT setval('subscription_parameter_seq', COALESCE((SELECT max(subscription_id) FROM subscription_parameter), 1));
SELECT setval('subscription_seq', COALESCE((SELECT max(subscription_id) FROM subscription), 1));

-- Handle sequences that might not exist in all DSpace installations
DO $$
BEGIN
-- Fix supervision_orders sequence if it exists
IF EXISTS (SELECT 1 FROM information_schema.sequences WHERE sequence_name = 'supervision_orders_id_seq') THEN
PERFORM setval('supervision_orders_id_seq', COALESCE((SELECT max(id) FROM supervision_orders), 1));
END IF;

-- Fix versionhistory sequence if it exists
IF EXISTS (SELECT 1 FROM information_schema.sequences WHERE sequence_name = 'versionhistory_seq') THEN
PERFORM setval('versionhistory_seq', COALESCE((SELECT max(versionhistory_id) FROM versionhistory), 1));
END IF;

-- Fix versionitem sequence if it exists
IF EXISTS (SELECT 1 FROM information_schema.sequences WHERE sequence_name = 'versionitem_seq') THEN
PERFORM setval('versionitem_seq', COALESCE((SELECT max(versionitem_id) FROM versionitem), 1));
END IF;

-- Fix workspaceitem sequence if it exists
IF EXISTS (SELECT 1 FROM information_schema.sequences WHERE sequence_name = 'workspaceitem_seq') THEN
PERFORM setval('workspaceitem_seq', COALESCE((SELECT max(workspace_item_id) FROM workspaceitem), 1));
END IF;

-- Fix most_recent_checksum sequence if it exists
IF EXISTS (SELECT 1 FROM information_schema.sequences WHERE sequence_name = 'most_recent_checksum_seq') THEN
PERFORM setval('most_recent_checksum_seq', COALESCE((SELECT max(result_id) FROM most_recent_checksum), 1));
END IF;

-- Fix checksum_history sequence if it exists
IF EXISTS (SELECT 1 FROM information_schema.sequences WHERE sequence_name = 'checksum_history_seq') THEN
PERFORM setval('checksum_history_seq', COALESCE((SELECT max(result_id) FROM checksum_history), 1));
END IF;

-- Fix preview sequences if they exist
IF EXISTS (SELECT 1 FROM information_schema.sequences WHERE sequence_name = 'preview_content_seq') THEN
PERFORM setval('preview_content_seq', COALESCE((SELECT max(preview_content_id) FROM preview_content), 1));
END IF;

IF EXISTS (SELECT 1 FROM information_schema.sequences WHERE sequence_name = 'preview_content_bitstream_seq') THEN
PERFORM setval('preview_content_bitstream_seq', COALESCE((SELECT max(preview_content_bitstream_id) FROM preview_content_bitstream), 1));
END IF;
END $$;
Loading
Loading