Skip to content

Commit

Permalink
fix: replace AnnType with LayerEnum
Browse files Browse the repository at this point in the history
  • Loading branch information
10zinten committed Nov 30, 2022
1 parent fce9d7d commit e0feb81
Show file tree
Hide file tree
Showing 14 changed files with 210 additions and 200 deletions.
12 changes: 7 additions & 5 deletions openpecha/core/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

from pydantic import BaseModel, validator

from .annotations import *
from .ids import get_uuid
from .metadata import PechaMetadata
from openpecha.core.annotations import *
from openpecha.core.ids import get_uuid
from openpecha.core.metadata import PechaMetadata


class LayerEnum(Enum):
Expand All @@ -28,6 +28,7 @@ class LayerEnum(Enum):
correction = "Correction"
error_candidate = "ErrorCandidate"
peydurma = "Peydurma"
pedurma_note = "PedurmaNote"
sabche = "Sabche"
tsawa = "Tsawa"
yigchung = "Yigchung"
Expand Down Expand Up @@ -87,6 +88,7 @@ def _get_annotation_class(layer_name: LayerEnum):
else:
return BaseAnnotation


class Layer(BaseModel):
id: str = None
annotation_type: LayerEnum
Expand Down Expand Up @@ -125,7 +127,7 @@ def get_annotation(self, annotation_id: str) -> Optional[BaseAnnotation]:
ann = ann_class.parse_obj(ann_dict)
return ann

def set_annotation(self, ann: BaseAnnotation, ann_id = None):
def set_annotation(self, ann: BaseAnnotation, ann_id=None):
"""Add or Update annotation `ann` to the layer, returns the annotation id"""
ann_id = ann_id if ann_id is not None else get_uuid()
self.annotations[ann_id] = json.loads(ann.json())
Expand All @@ -145,4 +147,4 @@ class SpanINFO(BaseModel):

class OCRConfidenceLayer(Layer):
confidence_threshold: float
annotation_type: LayerEnum = LayerEnum.ocr_confidence
annotation_type: LayerEnum = LayerEnum.ocr_confidence
18 changes: 10 additions & 8 deletions openpecha/formatters/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
from pathlib import Path
from uuid import uuid4

from .. import config
from .layers import *
from .layers import AnnType, _attr_names

from openpecha import config
from openpecha.core.layer import LayerEnum
from openpecha.formatters.layers import *
from openpecha.formatters.layers import _attr_names
from openpecha.utils import dump_yaml, load_yaml


class Global2LocalId:
"""Map global id of annotation in a layer to local id of a layer."""

Expand Down Expand Up @@ -249,7 +250,7 @@ def update_layer(self, layer, anns, vol_id):

def _get_vol_layers(self, layers):
for layer_name in layers:
if layer_name in [AnnType.topic, AnnType.sub_topic]:
if layer_name in [LayerEnum.topic, LayerEnum.sub_topic]:
continue
layers[layer_name] = _name(layer_name, layers[layer_name])
return zip(*layers.values())
Expand All @@ -260,7 +261,7 @@ def format_layer(self, layers):

# filter cross vols layers from layers
cross_vols_layers = {}
for cross_ann_name in [AnnType.topic, AnnType.sub_topic]:
for cross_ann_name in [LayerEnum.topic, LayerEnum.sub_topic]:
cross_vols_layers[cross_ann_name] = layers[cross_ann_name]
del layers[cross_ann_name]

Expand All @@ -282,12 +283,13 @@ def format_layer(self, layers):

yield result, vol_id

if AnnType.topic not in old_layers:
if LayerEnum.topic not in old_layers:
# Create Index layer
Index_layer = Layer(self.get_unique_id(), "index")
# loop over each topic
for topics, sub_topics in zip(
cross_vols_layers[AnnType.topic], cross_vols_layers[AnnType.sub_topic]
cross_vols_layers[LayerEnum.topic],
cross_vols_layers[LayerEnum.sub_topic],
):
if topics:
Topic = deepcopy(Text)
Expand Down
53 changes: 26 additions & 27 deletions openpecha/formatters/hfml.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@
HFML (Human Friendly Markup Language) contains tagset used for structuring and annotating the text.
"""
import re
from json import encoder
from pathlib import Path

import yaml

from ..utils import Vol2FnManager, dump_yaml, load_yaml
from .formatter import BaseFormatter
from .layers import *
from .layers import AnnType, _attr_names
from openpecha.utils import Vol2FnManager, dump_yaml, load_yaml
from openpecha.formatters.formatter import BaseFormatter
from openpecha.formatters.layers import *
from openpecha.formatters.layers import LayerEnum, _attr_names


class HFMLFormatter(BaseFormatter):
Expand Down Expand Up @@ -103,7 +102,7 @@ def get_input(self, input_path):
yield self.text_preprocess(fn.read_text()), fn.name, fns_len

def total_pattern(self, pat_list, annotated_line):
""" It calculates the length of all the annotation detected in a line.
"""It calculates the length of all the annotation detected in a line.
Args:
pat_list (dict): It contains all the annotation's regex pattern as value and name of annotation as key.
Expand Down Expand Up @@ -193,7 +192,7 @@ def total_pattern(self, pat_list, annotated_line):
return total_length

def merge(self, start_list, end_list):
""" It merges two list.
"""It merges two list.
The starting and ending of annotation(citaion,yigchung,sabche and tsawa) are stored in two list.
Merging these two list will generate a list in which both starting and ending of an annotation together in a tuple.
It is applicable only if the annotaions are not cross volume.
Expand All @@ -217,7 +216,7 @@ def merge(self, start_list, end_list):
return result

def search_before(self, ann, pat_list, line):
""" It calculates the length of annotation detected in a given line before a given annotation.
"""It calculates the length of annotation detected in a given line before a given annotation.
Args:
ann (match object): It is a match object of the annotation of which we want to calculate
the length of any annotation detected before it.
Expand Down Expand Up @@ -319,7 +318,7 @@ def search_before(self, ann, pat_list, line):
return length_before

def base_extract(self, pat_list, annotated_line):
""" It extract the base text from annotated text.
"""It extract the base text from annotated text.
Args:
pat_list (dict): It contains all the annotation's regex pattern as value and name of annotation as key.
annotated_line (str): It contains the annotated line from which we want to extract the base text.
Expand Down Expand Up @@ -947,29 +946,29 @@ def get_result(self):
self.sub_topic = self.sub_topic[1:]
self.sub_topic = self.__final_sub_topic(self.sub_topic)
result = {
AnnType.book_title: self.book_title,
AnnType.book_number: self.book_number,
AnnType.author: self.author,
AnnType.poti_title: self.poti_title,
AnnType.chapter: self.chapter_title,
AnnType.citation: self.citation_pattern,
AnnType.pagination: self.page, # page variable format (start_index,end_index,pg_Info,pg_ann)
AnnType.topic: self.topic_id,
AnnType.sub_topic: self.sub_topic,
AnnType.sabche: self.sabche_pattern,
AnnType.tsawa: self.tsawa_pattern,
AnnType.yigchung: self.yigchung_pattern,
AnnType.correction: self.error_id,
AnnType.error_candidate: self.abs_er_id,
AnnType.peydurma: self.notes_id,
AnnType.archaic: self.archaic_word_id,
AnnType.durchen: self.durchen_pattern,
LayerEnum.book_title: self.book_title,
LayerEnum.book_number: self.book_number,
LayerEnum.author: self.author,
LayerEnum.poti_title: self.poti_title,
LayerEnum.chapter: self.chapter_title,
LayerEnum.citation: self.citation_pattern,
LayerEnum.pagination: self.page, # page variable format (start_index,end_index,pg_Info,pg_ann)
LayerEnum.topic: self.topic_id,
LayerEnum.sub_topic: self.sub_topic,
LayerEnum.sabche: self.sabche_pattern,
LayerEnum.tsawa: self.tsawa_pattern,
LayerEnum.yigchung: self.yigchung_pattern,
LayerEnum.correction: self.error_id,
LayerEnum.error_candidate: self.abs_er_id,
LayerEnum.peydurma: self.notes_id,
LayerEnum.archaic: self.archaic_word_id,
LayerEnum.durchen: self.durchen_pattern,
}

return result

def __final_sub_topic(self, sub_topics):
""" It include all the sub topic belonging in one topic in a list.
"""It include all the sub topic belonging in one topic in a list.
Args:
sub_topic (list): It contains all the sub topic annotation's starting and ending index along with sub-topic info.
Expand Down
4 changes: 2 additions & 2 deletions openpecha/formatters/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
This module contains format variable for all the annotations
"""

from collections import namedtuple
from enum import Enum

__all__ = [
Expand Down Expand Up @@ -31,7 +30,7 @@
]


class AnnType(Enum):
class LayerEnum(Enum):

This comment has been minimized.

Copy link
@eroux

eroux Nov 30, 2022

Contributor

you should remove the definition here, otherwise it will be defined twice

book_title = "BookTitle"
sub_title = "SubTitle"
book_number = "BookNumber"
Expand All @@ -55,6 +54,7 @@ class AnnType(Enum):
durchen = "Durchen"
footnote = "Footnote"


class _attr_names:
# Layer
ID = "id" # Uique id for annotation of specific Pecha or Abstract work. type: str
Expand Down
16 changes: 8 additions & 8 deletions openpecha/formatters/pedurma.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import re
from pathlib import Path

from ..utils import Vol2FnManager
from .formatter import BaseFormatter
from .layers import *
from .layers import AnnType, _attr_names
from openpecha.core.layer import LayerEnum
from openpecha.formatters.formatter import BaseFormatter
from openpecha.formatters.layers import *
from openpecha.utils import Vol2FnManager


class PedurmaFormatter(BaseFormatter):
Expand Down Expand Up @@ -154,10 +154,10 @@ def build_layers(self, text):

def get_result(self):
result = {
AnnType.topic: [],
AnnType.sub_topic: [],
AnnType.pagination: self.page,
AnnType.pedurma_note: self.durchen,
LayerEnum.topic: [],
LayerEnum.sub_topic: [],
LayerEnum.pagination: self.page,
LayerEnum.pedurma_note: self.durchen,
}
return result

Expand Down
39 changes: 18 additions & 21 deletions openpecha/formatters/tsadra.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
import os
from copy import deepcopy
from functools import partial
from pathlib import Path
from re import sub

from bs4 import BeautifulSoup

from .formatter import BaseFormatter
from .layers import *
from .layers import AnnType
from openpecha.formatters.formatter import BaseFormatter
from openpecha.formatters.layers import *
from openpecha.formatters.layers import LayerEnum


class TsadraTemplate:
"""Content tsadra template components.
"""
"""Content tsadra template components."""

author = [
"credits-page_front-page---text-author",
Expand Down Expand Up @@ -450,20 +447,20 @@ def get_result(self):
"""
self.get_footnote_ann()
result = {
AnnType.book_title: [self.book_title],
AnnType.sub_title: [self.sub_title],
AnnType.book_number: [self.book_number],
AnnType.poti_title: [self.poti_title],
AnnType.author: [self.author],
AnnType.chapter: [self.chapter],
AnnType.topic: [self.topic],
AnnType.sub_topic: [self.sub_topic],
AnnType.pagination: [self.pagination],
AnnType.tsawa: [self.root_text],
AnnType.citation: [self.citation],
AnnType.sabche: [self.sabche],
AnnType.yigchung: [self.yigchung],
AnnType.footnote: [self.footnote],
LayerEnum.book_title: [self.book_title],
LayerEnum.sub_title: [self.sub_title],
LayerEnum.book_number: [self.book_number],
LayerEnum.poti_title: [self.poti_title],
LayerEnum.author: [self.author],
LayerEnum.chapter: [self.chapter],
LayerEnum.topic: [self.topic],
LayerEnum.sub_topic: [self.sub_topic],
LayerEnum.pagination: [self.pagination],
LayerEnum.tsawa: [self.root_text],
LayerEnum.citation: [self.citation],
LayerEnum.sabche: [self.sabche],
LayerEnum.yigchung: [self.yigchung],
LayerEnum.footnote: [self.footnote],
}
return result

Expand Down
Loading

0 comments on commit e0feb81

Please sign in to comment.