Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ jobs:
python -m pip install --upgrade pip
pip install mypy ruff pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
sudo apt-get update
sudo apt-get install pandoc tidy ghostscript python3 texlive-fonts-recommended texlive-lang-cyrillic texlive-latex-extra texlive-plain-generic
- name: Lint with ruff
run: ruff check --output-format=github
Expand All @@ -45,7 +46,9 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Install apt packages (for debbuild)
run: sudo apt-get install debhelper dh-virtualenv dpkg-dev python3-venv automake g++ make libboost-regex-dev libgmp-dev python3 git build-essential
run: |
sudo apt-get update
sudo apt-get install debhelper dh-virtualenv dpkg-dev python3-venv automake g++ make libboost-regex-dev libgmp-dev python3 git build-essential
shell: bash
- name: Build debian packages
run: make builddeb
38 changes: 34 additions & 4 deletions problemtools/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@
import re
from dataclasses import dataclass, field
from enum import StrEnum
from pathlib import Path
from typing import Any, Literal, Self, Type, Union
from uuid import UUID

from pydantic import BaseModel, ConfigDict, Field
import yaml

from . import config
from . import formatversion
from . import statement_util


class ProblemType(StrEnum):
Expand Down Expand Up @@ -189,7 +192,7 @@ class Metadata(BaseModel):
"""

problem_format_version: str
type: list[str]
type: list[ProblemType]
name: dict[str, str]
uuid: UUID | None
version: str | None
Expand Down Expand Up @@ -224,7 +227,12 @@ def from_legacy(cls: Type[Self], legacy: MetadataLegacy, names_from_statements:
metadata = legacy.model_dump()
metadata['type'] = [metadata['type']]
# Support for *ancient* problems where names_from_statements is empty
metadata['name'] = names_from_statements if names_from_statements else {'': metadata['name']}
if names_from_statements:
metadata['name'] = names_from_statements
elif metadata['name']:
metadata['name'] = {'en': metadata['name']}
else:
metadata['name'] = {}
metadata['version'] = None

def parse_author_field(author: str) -> list[Person]:
Expand Down Expand Up @@ -301,7 +309,9 @@ def parse_person(person: str | Person) -> Person:


def parse_metadata(
version: formatversion.FormatData, problem_yaml_data: dict[str, Any], names_from_statements: dict[str, str]
version: formatversion.FormatData,
problem_yaml_data: dict[str, Any],
names_from_statements: dict[str, str] | None = None,
) -> Metadata:
"""
Parses a data structure from problem.yaml into a Metadata model
Expand All @@ -318,8 +328,28 @@ def parse_metadata(

if version.name == formatversion.VERSION_LEGACY:
legacy_model = MetadataLegacy.model_validate(data)
return Metadata.from_legacy(legacy_model, names_from_statements)
return Metadata.from_legacy(legacy_model, names_from_statements or {})
else:
assert version.name == formatversion.VERSION_2023_07
model_2023_07 = Metadata2023_07.model_validate(data)
return Metadata.from_2023_07(model_2023_07)


def load_metadata(problem_root: Path) -> tuple[Metadata, dict]:
"""
Loads metadata from a problem directory.

Returns Metadata as well as the raw parsed yaml. The latter is likely only of use to verifyproblem.
Leaks exceptions, which is a bit of a mess. Unclear how to best deal with error handling.
"""
with (problem_root / 'problem.yaml').open() as f:
data = yaml.safe_load(f)
if data is None: # Loading empty yaml returns None
data = {}

version = formatversion.get_format_data_by_name(data.get('problem_format_version', formatversion.VERSION_LEGACY))
if version.name == formatversion.VERSION_LEGACY:
names_from_statements = statement_util.load_names_from_statements(problem_root, version)
else:
names_from_statements = None
return parse_metadata(version, data, names_from_statements), data
50 changes: 40 additions & 10 deletions problemtools/statement_util.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,54 @@
import os
from typing import Optional, List, Tuple
import collections
import html
import json
import os
import re
import subprocess
import tempfile
from pathlib import Path
from typing import Optional, List, Tuple

from . import formatversion
from . import verifyproblem
from . import metadata

ALLOWED_IMAGE_EXTENSIONS = ('.png', '.jpg', '.jpeg') # ".svg"
FOOTNOTES_STRINGS = ['<section class="footnotes">', '<aside class="footnotes">']


def find_statements(problem_root: Path, version: formatversion.FormatData) -> dict[str, list[Path]]:
"""Returns a dict mapping language code to a list of paths to statements (relative to problem_root)

Note that in well-formed problem packages, there should only be a single
statement for each language, but this function returns all found
statements, to let the caller inform the user of errors.
"""

directory = problem_root / version.statement_directory
ret = collections.defaultdict(list)
if directory.is_dir():
filename_re = re.compile(r'^problem(\.([a-z]{2,3}|[a-z]{2}-[A-Z]{2}))?\.(%s)$' % ('|'.join(version.statement_extensions)))
for file in directory.iterdir():
if m := filename_re.search(file.name):
if m.group(2) is None: # problem.tex is allowed and assumed to be 'en' in legacy. We ignore it in newer formats.
if version.name == formatversion.VERSION_LEGACY:
ret['en'].append(file)
else:
ret[m.group(2)].append(file)
return dict(ret)


def load_names_from_statements(problem_root: Path, version: formatversion.FormatData) -> dict[str, str]:
"""Returns a dict mapping language code => problem name"""

assert version.name == formatversion.VERSION_LEGACY, 'load_names_from_statements only makes sense for legacy format'
ret: dict[str, str] = {}
for lang, files in find_statements(problem_root, version).items():
hit = re.search(r'\\problemname{(.*)}', files[0].read_text(), re.MULTILINE)
if hit:
ret[lang] = hit.group(1).strip()
return ret


def find_statement(problem_root: str, extension: str, language: Optional[str]) -> Optional[str]:
"""Finds the "best" statement for given language and extension"""
statement_dir = Path(problem_root) / formatversion.get_format_data(problem_root).statement_directory
Expand Down Expand Up @@ -56,13 +91,8 @@ def find_statement_extension(problem_root: str, language: Optional[str]) -> str:
def get_yaml_problem_name(problem: str, language: Optional[str]) -> str:
"""Finds the problem name from the problem.yaml file"""

# Minimal setup to get the problem name
problem_obj = verifyproblem.Problem(problem)
statement_obj = verifyproblem.ProblemStatement(problem_obj)
problem_obj._data[statement_obj.PART_NAME] = statement_obj.setup()
verifyproblem.ProblemConfig(problem_obj).setup()

names = problem_obj.getMetadata().name
problem_metadata, _ = metadata.load_metadata(Path(problem))
names = problem_metadata.name
# If there is only one language, per the spec that is the one we want
if len(names) == 1:
return next(iter(names.values()))
Expand Down
39 changes: 13 additions & 26 deletions problemtools/verifyproblem.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import random
import traceback
import uuid
from pathlib import Path

import yaml

Expand Down Expand Up @@ -818,41 +819,27 @@ def get_config(self) -> dict[str, dict[str, str]]:
class ProblemConfig(ProblemPart):
PART_NAME = 'config'

@staticmethod
def setup_dependencies():
return {ProblemStatement}

def setup(self):
self.debug(' Loading problem config')
self.configfile = os.path.join(self.problem.probdir, 'problem.yaml')

self._data = {}
if os.path.isfile(self.configfile):
try:
with open(self.configfile) as f:
self._data = yaml.safe_load(f)
# Loading empty yaml yields None, for no apparent reason...
if self._data is None:
self._data = {}
except Exception as e:
self.error(str(e))
else:
# This should likely be a fatal error, but I'm not sure there's a clean way to fail from setup
self.error(f'No config file {self.configfile} found')

self._origdata = copy.deepcopy(self._data)

try:
self._metadata = metadata.parse_metadata(
self.problem.format,
self._data,
self.problem.get(ProblemStatement).get('name', {}),
)
self._metadata, self._origdata = metadata.load_metadata(Path(self.problem.probdir))
self.problem.setMetadata(self._metadata)
except ValidationError as e:
# This should likely be a fatal error, but I'm not sure there's a clean way to fail from setup
error_str = '\n'.join([f' {"->".join((str(loc) for loc in err["loc"]))}: {err["msg"]}' for err in e.errors()])
self.error(f'Failed parsing problem.yaml. Found {len(e.errors())} errors:\n{error_str}')
# For now, set metadata to an empty legacy config to avoid crashing.
self.problem.setMetadata(
metadata.parse_metadata(formatversion.get_format_data_by_name(formatversion.VERSION_LEGACY), {})
)
except Exception as e:
# This should likely be a fatal error, but I'm not sure there's a clean way to fail from setup
self.error(f'Failed loading problem configuration: {e}')
# For now, set metadata to an empty legacy config to avoid crashing.
self.problem.setMetadata(
metadata.parse_metadata(formatversion.get_format_data_by_name(formatversion.VERSION_LEGACY), {})
)
return {}

def __str__(self) -> str:
Expand Down
26 changes: 24 additions & 2 deletions tests/test_metadata.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# -*- coding: utf-8 -*-
from pathlib import Path

import pytest

from pydantic import ValidationError
Expand All @@ -8,13 +10,18 @@


def test_parse_empty_legacy():
m = metadata.parse_metadata(formatversion.get_format_data_by_name(formatversion.VERSION_LEGACY), {}, {'en': 'Hello World!'})
m = metadata.parse_metadata(formatversion.get_format_data_by_name(formatversion.VERSION_LEGACY), {})
# Just check off a few random things
assert m.name['en'] == 'Hello World!'
assert not m.name
assert not m.source
assert not m.credits.authors


def test_parse_legacy_with_problem_names():
m = metadata.parse_metadata(formatversion.get_format_data_by_name(formatversion.VERSION_LEGACY), {}, {'en': 'Hello World!'})
assert m.name['en'] == 'Hello World!'


def test_parse_empty_2023_fails():
with pytest.raises(ValidationError):
metadata.parse_metadata(formatversion.get_format_data_by_name(formatversion.VERSION_2023_07), {}, {'en': 'Hello World!'})
Expand Down Expand Up @@ -78,3 +85,18 @@ def test_parse_multi_source(minimal_2023_conf):
assert m.source[1].url is None
assert m.source[2].name == 'SEERC 2024'
assert m.source[2].url is None


def test_load_hello():
m, _ = metadata.load_metadata(Path(__file__).parent / 'hello')
assert m.name['en'] == 'Hello World!'
assert m.name['sv'] == 'Hej Världen!'
assert len(m.source) == 1
assert m.source[0].name == 'Kattis'
assert m.source[0].url is None
assert m.license is metadata.License.PUBLIC_DOMAIN
assert len(m.type) == 1
assert m.type[0] is metadata.ProblemType.PASS_FAIL
assert m.is_pass_fail()
assert not m.is_scoring()
assert not m.is_interactive()