|
117 | 117 | "outputs": [], |
118 | 118 | "source": [ |
119 | 119 | "folder_path = \"conversion_content\"\n", |
120 | | - "output_path = f\"{folder_path}/mathpix_to_llm_to_JSON_out\"\n", |
| 120 | + "output_path = f\"{folder_path}/mathpix_to_llm_to_procedural_to_JSON_out\"\n", |
121 | 121 | "media_path = f\"{output_path}/media\"\n", |
122 | 122 | "\n", |
123 | 123 | "Path(media_path).mkdir(parents=True, exist_ok=True)\n", |
|
457 | 457 | " publish: bool = Field(..., description=\"Publish flag\")\n", |
458 | 458 | " title: str = Field(..., description=\"Question title\")\n", |
459 | 459 | "\n", |
460 | | - "def create_question_json(question: str, solution: str) -> dict:\n", |
461 | | - " # Initialize the output parser using the defined Pydantic model.\n", |
462 | | - " parser = PydanticOutputParser(pydantic_object=QuestionJson)\n", |
463 | | - "\n", |
464 | | - " # Minimum JSON template to guide the model. (Used as context.)\n", |
465 | | - " minimum_json_template = r'''{\n", |
466 | | - " \"orderNumber\": 0,\n", |
467 | | - " \"displayFinalAnswer\": true,\n", |
468 | | - " \"displayStructuredTutorial\": true,\n", |
469 | | - " \"displayWorkedSolution\": true,\n", |
470 | | - " \"displayChatbot\": false,\n", |
471 | | - " \"masterContent\": \"Top level question here\",\n", |
472 | | - " \"parts\": [\n", |
473 | | - " {\n", |
474 | | - " \"answerContent\": \"final answer here corresponding the part, is no answer found, leave empty\",\n", |
475 | | - " \"content\": \"part question text here, if only one part, then leave empty\",\n", |
476 | | - " \"orderNumber\": 0,\n", |
477 | | - " \"responseAreas\": [],\n", |
478 | | - " \"tutorial\": [],\n", |
479 | | - " \"workedSolution\": {\n", |
480 | | - " \"content\": \"Part worked solution here\",\n", |
481 | | - " \"title\": \"\",\n", |
482 | | - " \"children\": []\n", |
483 | | - " }\n", |
484 | | - " }\n", |
485 | | - " ],\n", |
486 | | - " \"publish\": false,\n", |
487 | | - " \"title\": \"Question title here\"\n", |
488 | | - " }'''\n", |
489 | | - "\n", |
490 | | - " # Construct the prompt, appending the parser's format instructions.\n", |
491 | | - " question_prompt = f'''\n", |
492 | | - " JSON_TEMPLATE\n", |
493 | | - " ```json\n", |
494 | | - " {minimum_json_template}\n", |
495 | | - " ```\n", |
496 | | - "\n", |
497 | | - " IMPORTED_QUESTION\n", |
498 | | - " ```markdown\n", |
499 | | - " {question}\n", |
500 | | - " ```\n", |
501 | | - "\n", |
502 | | - " IMPORTED_SOLUTION\n", |
503 | | - " ```markdown\n", |
504 | | - " {solution}\n", |
505 | | - " ```\n", |
506 | | - "\n", |
507 | | - " Preserve the markdown math formatting to use $...$ for math expressions. Do not modify the original text of the question.\n", |
508 | | - "\n", |
509 | | - " Infer the final answer and put it in the answerContent field of the part. \n", |
510 | | - " The worked solution should be in the workedSolution.content field.\n", |
511 | | - "\n", |
512 | | - " If you cannot find a suitable text for any of the sections, leave it empty.\n", |
513 | | - "\n", |
514 | | - " {parser.get_format_instructions()}\n", |
515 | | - " '''\n", |
| 460 | + "def create_question_json(question: dict, solution: dict) -> dict:\n", |
| 461 | + " \n", |
| 462 | + " # create the list of parts from the question and solution.\n", |
| 463 | + " # Each part corresponds to a subquestion and its worked solution.\n", |
| 464 | + " # The orderNumber is the index of the subquestion in the list.\n", |
| 465 | + " parts = []\n", |
| 466 | + " for idx, (subquestion, workedSolution) in enumerate(zip(question.get(\"subquestions\", []), solution.get(\"workedSolutions\", []))):\n", |
| 467 | + " part = Part(\n", |
| 468 | + " answerContent=\"\",\n", |
| 469 | + " content=subquestion,\n", |
| 470 | + " orderNumber=idx,\n", |
| 471 | + " responseAreas=[],\n", |
| 472 | + " tutorial=[],\n", |
| 473 | + " workedSolution=WorkedSolution(\n", |
| 474 | + " content=workedSolution,\n", |
| 475 | + " title=subquestion,\n", |
| 476 | + " children=[]\n", |
| 477 | + " )\n", |
| 478 | + " )\n", |
| 479 | + " parts.append(part)\n", |
| 480 | + " \n", |
| 481 | + " # Create the QuestionJson object with the provided question and solution.\n", |
| 482 | + " return QuestionJson(\n", |
| 483 | + " orderNumber=0,\n", |
| 484 | + " displayFinalAnswer=True,\n", |
| 485 | + " displayStructuredTutorial=True,\n", |
| 486 | + " displayWorkedSolution=True,\n", |
| 487 | + " masterContent=question.get(\"content\", \"\"),\n", |
| 488 | + " parts=parts,\n", |
| 489 | + " publish=False,\n", |
| 490 | + " title=question.get(\"title\", \"\")\n", |
| 491 | + " ).model_dump()\n", |
516 | 492 | "\n", |
517 | | - " # Invoke the language model.\n", |
518 | | - " response = llm.invoke(question_prompt)\n", |
519 | 493 | "\n", |
520 | | - " try:\n", |
521 | | - " # Parse the response using the output parser.\n", |
522 | | - " parsed_output = parser.parse(response.content)\n", |
523 | | - " return parsed_output.model_dump() # Return as a dictionary.\n", |
524 | | - " except Exception as e:\n", |
525 | | - " print(\"Error parsing JSON from LLM response:\", e)\n", |
526 | | - " print(\"LLM response:\", response.content)\n", |
527 | | - " return None\n", |
528 | 494 | "\n" |
529 | 495 | ] |
530 | 496 | }, |
|
0 commit comments