-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_questions.py
165 lines (144 loc) · 6.45 KB
/
generate_questions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#!/usr/bin/env python3
""" Generate the Moodle SC/MC-questions for a course from a YAML file. """
import os
import sys
import logging
from enum import Enum
from openai import AzureOpenAI
#from azure.identity import DefaultAzureCredential, get_bearer_token_provider
from dotenv import load_dotenv
from pptx import Presentation
import fitz # PyMuPDF
load_dotenv() # take environment variables from .env.
# Setup logging
LOGGING_LEVEL = os.environ.get('LOGGING_LEVEL', 'INFO') # default to INFO if no env var set
numeric_level = getattr(logging, LOGGING_LEVEL.upper(), None)
if not isinstance(numeric_level, int):
raise ValueError(f'Invalid log level: {LOGGING_LEVEL}')
logging.basicConfig(level=numeric_level)
logger = logging.getLogger(__name__)
endpoint = os.getenv("OPENAI_ENDPOINT_URL")
apikey = os.getenv("OPENAI_API_KEY")
deployment = os.getenv("OPENAI_DEPLOYMENT_NAME")
class QuestionFormat(Enum):
""" Enumeration of question formats with samples. """
QFORMAT = """
Use the following output structure (this first line contains the table column headers, every further line represents a question; Question1 and Question2 are samples not to be contained in your output):
questionname,questiontext,A,B,C,D,Answer 1,Answer 2,answernumbering,correctfeedback,partiallycorrectfeedback,incorrectfeedback,defaultmark
Question1,The dmesg command,Shows user login logoff attempts,Shows the syslog file for info messages,kernel log messages,Shows the daemon log messages,C,,123,Your answer is correct.,Your answer is partially correct.,Your answer is incorrect.,1
Question2,The command “mknod myfifo b 4 16”,Will create a block device if user is root,Will create a block device for all users,Will create a FIFO if user is not root,"None ,of the mentioned",A,B,ABCD,Your answer is correct.,Your answer is partially correct.,Your answer is incorrect.,1
"""
# AIKEN format description see https://docs.moodle.org/403/en/Aiken_format
AIKEN = """
What is the correct answer to this question?
A. This is not the correct answer
B. This answer is wrong
C. Also here a wrong answer
D. This answer is correct!
ANSWER: D
"""
# GIFT format description see https://docs.moodle.org/403/en/GIFT_format, https://docs.moodle.org/404/en/GIFT_format#Multiple_Answers
GIFT = """
//A Comment for a question1
:: What is the correct answer to this question?{
=A correct answer
~Wrong answer1
#A response to wrong answer1
~Wrong answer2
#A response to wrong answer2
~Wrong answer3
#A response to wrong answer3
~Wrong answer4
#A response to wrong answer4
}
"""
def load_contents(source_file:str, export_content:bool=True)->str:
""" Load contents of a file into a string. """
logger.info("Loading contents of %s ...", source_file)
content = ""
source_filename, source_ext = os.path.splitext(source_file)
if source_ext == '.pptx':
# see https://python-pptx.readthedocs.io/en/latest/user/quickstart.html
presentation = Presentation(source_file)
for slide in presentation.slides:
for shape in slide.shapes:
if hasattr(shape, "text"):
content += shape.text + "\n"
elif source_ext == '.pdf':
# see https://pymupdf.readthedocs.io/en/latest/tutorial.html
pdf_document = fitz.open(source_file)
for page_num in range(pdf_document.page_count):
page = pdf_document.load_page(page_num)
content += page.get_text()
else:
# treat as text-file
with open(source_file, 'r', encoding="utf-8") as content_file:
content = content_file.read()
if export_content:
with open(source_filename + '.txt', 'w', encoding="utf-8") as output_file:
output_file.write(content)
return content
def generate_questions(course_title:str, slides_title:str, source_file:str, num:int=10,
target_file:str=None, question_format:QuestionFormat=QuestionFormat.AIKEN)->str:
""" Generate the Moodle SC/MC-questions for a course from a YAML file. """
logger.info("Generating questions for %s ...", source_file)
#token_provider = get_bearer_token_provider(
# DefaultAzureCredential(),
# "https://cognitiveservices.azure.com/.default")
client = AzureOpenAI(
api_key=apikey,
api_version="2024-05-01-preview",
azure_endpoint=endpoint,
# azure_ad_token_provider=token_provider,
)
# Load contents of markdown_file into a string
content = load_contents(source_file)
messages= [
{
"role": "system",
"content": f"""You are an tutor for a course on informatics dealing with {course_title}.
The current topic is {slides_title}.
You will create questions about the topic and output as file, which is later used by the Moodle Question import.:
{question_format.value}
"""},{
"role": "user",
"content": f"Generate {num} multiple-choice questions with one single correct answere covering only the specified contents, provided in Markdown format, as follows: {content}"
}]
#print(messages)
completion = client.chat.completions.create(
model=deployment,
messages=messages,
max_tokens=1000, # ca. 100 tokens needed per question
temperature=0.7,
top_p=0.95,
frequency_penalty=0,
presence_penalty=0,
stop=None,
stream=False
)
questions = completion.choices[0].message.content
if target_file is not None:
logger.info("Writing questions to %s", target_file)
with open(target_file, 'w', encoding="utf-8") as output_file:
output_file.write(questions)
return questions
if __name__ == "__main__":
# Check if two arguments were provided
if len(sys.argv) < 5:
print(f"Usage: {sys.argv[0]} <course-title> <slides-title> <markdown-file> <num-questions> [target-file]")
print()
print("Example:")
print(f"{sys.argv[0]} \"Software Engineering 1 - Labor\" \"Introduction to Programming in C#\" \"catalogs/fhtw/programming/csharp/csharp_intro.md\" 10")
sys.exit(0)
# Path to the YAML file
coursetitle = sys.argv[1]
slidestitle = sys.argv[2]
markdownfile = sys.argv[3]
numquestions = int(sys.argv[4])
if len(sys.argv) > 5:
targetfile = sys.argv[5]
generate_questions(coursetitle, slidestitle, markdownfile, numquestions, targetfile)
print(f"Questions written to {targetfile}")
else:
result = generate_questions(coursetitle, slidestitle, markdownfile, num=numquestions)
print(result)