Skip to content

Commit 9ec40f0

Browse files
committed
implemented strategy which procedurally turns a list of questions and solutions to JSON in mathpic_to_llm_to_procedural_to_JSON.ipynb
1 parent 0ac47de commit 9ec40f0

File tree

2 files changed

+34
-68
lines changed

2 files changed

+34
-68
lines changed

conversion2025/mathpix_to_llm_to_llm_to_JSON.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@
117117
"outputs": [],
118118
"source": [
119119
"folder_path = \"conversion_content\"\n",
120-
"output_path = f\"{folder_path}/mathpix_to_llm_to_JSON_out\"\n",
120+
"output_path = f\"{folder_path}/mathpix_to_llm_to_llm_to_JSON_out\"\n",
121121
"media_path = f\"{output_path}/media\"\n",
122122
"\n",
123123
"Path(media_path).mkdir(parents=True, exist_ok=True)\n",

conversion2025/mathpix_to_llm_to_procedural_to_JSON.ipynb

Lines changed: 33 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@
117117
"outputs": [],
118118
"source": [
119119
"folder_path = \"conversion_content\"\n",
120-
"output_path = f\"{folder_path}/mathpix_to_llm_to_JSON_out\"\n",
120+
"output_path = f\"{folder_path}/mathpix_to_llm_to_procedural_to_JSON_out\"\n",
121121
"media_path = f\"{output_path}/media\"\n",
122122
"\n",
123123
"Path(media_path).mkdir(parents=True, exist_ok=True)\n",
@@ -457,74 +457,40 @@
457457
" publish: bool = Field(..., description=\"Publish flag\")\n",
458458
" title: str = Field(..., description=\"Question title\")\n",
459459
"\n",
460-
"def create_question_json(question: str, solution: str) -> dict:\n",
461-
" # Initialize the output parser using the defined Pydantic model.\n",
462-
" parser = PydanticOutputParser(pydantic_object=QuestionJson)\n",
463-
"\n",
464-
" # Minimum JSON template to guide the model. (Used as context.)\n",
465-
" minimum_json_template = r'''{\n",
466-
" \"orderNumber\": 0,\n",
467-
" \"displayFinalAnswer\": true,\n",
468-
" \"displayStructuredTutorial\": true,\n",
469-
" \"displayWorkedSolution\": true,\n",
470-
" \"displayChatbot\": false,\n",
471-
" \"masterContent\": \"Top level question here\",\n",
472-
" \"parts\": [\n",
473-
" {\n",
474-
" \"answerContent\": \"final answer here corresponding the part, is no answer found, leave empty\",\n",
475-
" \"content\": \"part question text here, if only one part, then leave empty\",\n",
476-
" \"orderNumber\": 0,\n",
477-
" \"responseAreas\": [],\n",
478-
" \"tutorial\": [],\n",
479-
" \"workedSolution\": {\n",
480-
" \"content\": \"Part worked solution here\",\n",
481-
" \"title\": \"\",\n",
482-
" \"children\": []\n",
483-
" }\n",
484-
" }\n",
485-
" ],\n",
486-
" \"publish\": false,\n",
487-
" \"title\": \"Question title here\"\n",
488-
" }'''\n",
489-
"\n",
490-
" # Construct the prompt, appending the parser's format instructions.\n",
491-
" question_prompt = f'''\n",
492-
" JSON_TEMPLATE\n",
493-
" ```json\n",
494-
" {minimum_json_template}\n",
495-
" ```\n",
496-
"\n",
497-
" IMPORTED_QUESTION\n",
498-
" ```markdown\n",
499-
" {question}\n",
500-
" ```\n",
501-
"\n",
502-
" IMPORTED_SOLUTION\n",
503-
" ```markdown\n",
504-
" {solution}\n",
505-
" ```\n",
506-
"\n",
507-
" Preserve the markdown math formatting to use $...$ for math expressions. Do not modify the original text of the question.\n",
508-
"\n",
509-
" Infer the final answer and put it in the answerContent field of the part. \n",
510-
" The worked solution should be in the workedSolution.content field.\n",
511-
"\n",
512-
" If you cannot find a suitable text for any of the sections, leave it empty.\n",
513-
"\n",
514-
" {parser.get_format_instructions()}\n",
515-
" '''\n",
460+
"def create_question_json(question: dict, solution: dict) -> dict:\n",
461+
" \n",
462+
" # create the list of parts from the question and solution.\n",
463+
" # Each part corresponds to a subquestion and its worked solution.\n",
464+
" # The orderNumber is the index of the subquestion in the list.\n",
465+
" parts = []\n",
466+
" for idx, (subquestion, workedSolution) in enumerate(zip(question.get(\"subquestions\", []), solution.get(\"workedSolutions\", []))):\n",
467+
" part = Part(\n",
468+
" answerContent=\"\",\n",
469+
" content=subquestion,\n",
470+
" orderNumber=idx,\n",
471+
" responseAreas=[],\n",
472+
" tutorial=[],\n",
473+
" workedSolution=WorkedSolution(\n",
474+
" content=workedSolution,\n",
475+
" title=subquestion,\n",
476+
" children=[]\n",
477+
" )\n",
478+
" )\n",
479+
" parts.append(part)\n",
480+
" \n",
481+
" # Create the QuestionJson object with the provided question and solution.\n",
482+
" return QuestionJson(\n",
483+
" orderNumber=0,\n",
484+
" displayFinalAnswer=True,\n",
485+
" displayStructuredTutorial=True,\n",
486+
" displayWorkedSolution=True,\n",
487+
" masterContent=question.get(\"content\", \"\"),\n",
488+
" parts=parts,\n",
489+
" publish=False,\n",
490+
" title=question.get(\"title\", \"\")\n",
491+
" ).model_dump()\n",
516492
"\n",
517-
" # Invoke the language model.\n",
518-
" response = llm.invoke(question_prompt)\n",
519493
"\n",
520-
" try:\n",
521-
" # Parse the response using the output parser.\n",
522-
" parsed_output = parser.parse(response.content)\n",
523-
" return parsed_output.model_dump() # Return as a dictionary.\n",
524-
" except Exception as e:\n",
525-
" print(\"Error parsing JSON from LLM response:\", e)\n",
526-
" print(\"LLM response:\", response.content)\n",
527-
" return None\n",
528494
"\n"
529495
]
530496
},

0 commit comments

Comments
 (0)