Skip to content

Commit

Permalink
Support for extracting images as separate elements from both question…
Browse files Browse the repository at this point in the history
…s and answers.
  • Loading branch information
rolfis committed Jan 15, 2021
1 parent ae555cb commit 9a54f29
Show file tree
Hide file tree
Showing 7 changed files with 111 additions and 17 deletions.
2 changes: 2 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ name = "pypi"
[packages]
logzero = "*"
lxml = "*"
python-docx = "*"
htmldocx = "*"

[dev-packages]

Expand Down
33 changes: 32 additions & 1 deletion Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 46 additions & 6 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@
"""

__author__ = "Rolf Johansson"
__license__ = "Apache License 2.0"
__version__ = "0.1.0"
__license__ = "Apache"

from qti_parser import question_type
import formats
import argparse
import json
import re
import hashlib
from logzero import logger
from lxml import etree

Expand All @@ -18,7 +21,6 @@
}

def main(args):
""" Main entry point of the app """
logger.info("QTI converter utility.")
logger.info(args)

Expand All @@ -31,6 +33,8 @@ def main(args):
'title': '',
'question': []
}

# TODO: Should be prefixed with PATH part of input filename since paths in XML are relative
this_assessment_xml = this_assessment['id'] + "/" + this_assessment['id'] + ".xml"

for xml_item in etree.parse(this_assessment_xml).getroot().findall(".//{http://www.imsglobal.org/xsd/ims_qtiasiv1p2}item"):
Expand All @@ -42,6 +46,35 @@ def main(args):
'text': xml_item.find("{http://www.imsglobal.org/xsd/ims_qtiasiv1p2}presentation/{http://www.imsglobal.org/xsd/ims_qtiasiv1p2}material/{http://www.imsglobal.org/xsd/ims_qtiasiv1p2}mattext").text
}

# TODO: Fix images in a better way
image = []
if this_question['text'].lower().find("<p>.*<img"):
for match in re.finditer('<p>.*<img src=\"([^\"]+)\".*>.*</p>', this_question['text'], re.DOTALL):
image.append({
'id': str(hashlib.md5(match.group(1).replace("%24IMS-CC-FILEBASE%24/", "").encode()).hexdigest()),
'href': match.group(1).replace("%24IMS-CC-FILEBASE%24/", "")
})
p = re.compile('<p>.*<img src=\"([^\"]+)\".*>.*</p>')
subn_tuple = p.subn('', this_question['text'])
if subn_tuple[1] > 0:
this_question['text'] = subn_tuple[0]

elif this_question['text'].lower().find("<img"):
for match in re.finditer('<img src=\"([^\"]+)\".*>', this_question['text'], re.DOTALL):
image.append({
'id': str(hashlib.md5(match.group(1).replace("%24IMS-CC-FILEBASE%24/", "").encode()).hexdigest()),
'href': match.group(1).replace("%24IMS-CC-FILEBASE%24/", "")
})
p = re.compile('<img src=\"([^\"]+)\".*>')
subn_tuple = p.subn('', this_question['text'])
if subn_tuple[1] > 0:
this_question['text'] = subn_tuple[0]

if image:
this_question['image'] = image

# <p><img src="Exercise_09_05-06_03a.png" alt="Exercise_09_05-06_03a.png" width="393" height="126"></p>

if this_question['question_type'] == "multiple_choice_question":
this_question['answer'] = question_type.multiple_choice.get_answers(xml_item)
elif this_question['question_type'] == "true_false_question":
Expand All @@ -56,8 +89,16 @@ def main(args):

qti_resource['assessment'].append(this_assessment)

qti_resource_json = json.dumps(qti_resource, indent = 2)
print(qti_resource_json)
if (args.format.lower() == "json"):
logger.info("Output to STDOUT as JSON.")
qti_resource_json = json.dumps(qti_resource, indent = 2)
print(qti_resource_json)
elif (args.format.lower() == "pdf"):
logger.error("Format not supported yet: " + args.format)
elif (args.format.lower() == "docx"):
formats.docx.write_file(qti_resource)
else:
logger.error("Unknown format: " + args.format)

except OSError as e:
logger.error("%s", e)
Expand All @@ -68,11 +109,10 @@ def main(args):

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Convert QTI files into other formats.", add_help=False)
parser.add_argument("input", help="QTI input file.")
parser.add_argument("input", help="QTI input file (imsmanifest.xml).")
parser.add_argument("-v", action="count", default=0, help="Verbosity (-v, -vv, etc).")
parser.add_argument("-f", action="store", dest="format", default="json", help="Output format, defaults to JSON.")
parser.add_argument( "--version", action="version", help="Display version and exit.", version="%(prog)s (version {version})".format(version=__version__))
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help='Show this help message and exit.')
args = parser.parse_args()
print(args)
main(args)
3 changes: 2 additions & 1 deletion src/qti_parser/question_type/multiple_answers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def get_answers(xml):
{
'id': xml_answer_item.get("ident"),
'text': xml_answer_item.find("{http://www.imsglobal.org/xsd/ims_qtiasiv1p2}material/{http://www.imsglobal.org/xsd/ims_qtiasiv1p2}mattext").text,
'correct': True if xml_answer_item.get("ident") in correct_answers else False
'correct': True if xml_answer_item.get("ident") in correct_answers else False,
'display': True
}
)
except OSError as e:
Expand Down
32 changes: 25 additions & 7 deletions src/qti_parser/question_type/multiple_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

from lxml import etree
from logzero import logger
import re
import hashlib

def get_answers(xml):
""" Return an array of possible answers """
Expand All @@ -15,13 +17,29 @@ def get_answers(xml):

try:
for xml_answer_item in xml.findall(".//{http://www.imsglobal.org/xsd/ims_qtiasiv1p2}response_label"):
answers.append(
{
'id': xml_answer_item.get("ident"),
'text': xml_answer_item.find("{http://www.imsglobal.org/xsd/ims_qtiasiv1p2}material/{http://www.imsglobal.org/xsd/ims_qtiasiv1p2}mattext").text,
'correct': True if xml_answer_item.get("ident") in correct_answers else False
}
)
image = []
this_answer = {}
this_answer['id'] = xml_answer_item.get("ident")
this_answer['text'] = xml_answer_item.find("{http://www.imsglobal.org/xsd/ims_qtiasiv1p2}material/{http://www.imsglobal.org/xsd/ims_qtiasiv1p2}mattext").text
this_answer['correct'] = True if xml_answer_item.get("ident") in correct_answers else False
this_answer['display'] = True

if this_answer['text'].lower().find("<img.*"):
for match in re.finditer('^<img src="([^"]+)".*>', this_answer['text'], re.DOTALL):
image.append({
'id': str(hashlib.md5(match.group(1).replace("%24IMS-CC-FILEBASE%24/", "").encode()).hexdigest()),
'href': match.group(1).replace("%24IMS-CC-FILEBASE%24/", "")
})
p = re.compile('<img src="([^"]+)".*>')
subn_tuple = p.subn('', this_answer['text'])
if subn_tuple[1] > 0:
this_answer['text'] = subn_tuple[0]

if image:
this_answer['image'] = image

answers.append(this_answer)

except OSError as e:
logger.error("%s", e)
except etree.ParseError as e:
Expand Down
3 changes: 2 additions & 1 deletion src/qti_parser/question_type/short_answer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ def get_answers(xml):
{
'id': str(i),
'text': xml_answer_item.text,
'correct': True
'correct': True,
'display': False
}
)
except OSError as e:
Expand Down
3 changes: 2 additions & 1 deletion src/qti_parser/question_type/true_false.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def get_answers(xml):
{
'id': xml_answer_item.get("ident"),
'text': xml_answer_item.find("{http://www.imsglobal.org/xsd/ims_qtiasiv1p2}material/{http://www.imsglobal.org/xsd/ims_qtiasiv1p2}mattext").text,
'correct': True if xml_answer_item.get("ident") in correct_answers else False
'correct': True if xml_answer_item.get("ident") in correct_answers else False,
'display': True
}
)
except OSError as e:
Expand Down

0 comments on commit 9a54f29

Please sign in to comment.