run-llama · nerdai · Nov 26, 2024 · Nov 19, 2024 · Nov 19, 2024 · Nov 20, 2024
diff --git a/docs/docs/examples/workflow/checkpointing_workflows.ipynb b/docs/docs/examples/workflow/checkpointing_workflows.ipynb
diff --git a/docs/docs/examples/workflow/parallel_execution.ipynb b/docs/docs/examples/workflow/parallel_execution.ipynb
@@ -30,7 +30,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!pip install llama-index-core llama-index-utils-workflow"
+    "# %pip install llama-index-core llama-index-utils-workflow -q"
    ]
   },
   {
@@ -110,13 +110,14 @@
     "        data_list = [\"A\", \"B\", \"C\"]\n",
     "        await ctx.set(\"num_to_collect\", len(data_list))\n",
     "        for item in data_list:\n",
-    "            self.send_event(ProcessEvent(data=item))\n",
+    "            ctx.send_event(ProcessEvent(data=item))\n",
     "        return None\n",
     "\n",
-    "    @step\n",
+    "    @step(num_workers=1)\n",
     "    async def process_data(self, ev: ProcessEvent) -> ResultEvent:\n",
     "        # Simulate some time-consuming processing\n",
-    "        await asyncio.sleep(random.randint(1, 2))\n",
+    "        processing_time = 2 + random.random()\n",
+    "        await asyncio.sleep(processing_time)\n",
     "        result = f\"Processed: {ev.data}\"\n",
     "        print(f\"Completed processing: {ev.data}\")\n",
     "        return ResultEvent(result=result)\n",
@@ -140,13 +141,14 @@
     "        data_list = [\"A\", \"B\", \"C\"]\n",
     "        await ctx.set(\"num_to_collect\", len(data_list))\n",
     "        for item in data_list:\n",
-    "            self.send_event(ProcessEvent(data=item))\n",
+    "            ctx.send_event(ProcessEvent(data=item))\n",
     "        return None\n",
     "\n",
     "    @step(num_workers=3)\n",
     "    async def process_data(self, ev: ProcessEvent) -> ResultEvent:\n",
     "        # Simulate some time-consuming processing\n",
-    "        await asyncio.sleep(random.randint(1, 2))\n",
+    "        processing_time = 2 + random.random()\n",
+    "        await asyncio.sleep(processing_time)\n",
     "        result = f\"Processed: {ev.data}\"\n",
     "        print(f\"Completed processing: {ev.data}\")\n",
     "        return ResultEvent(result=result)\n",
@@ -194,14 +196,14 @@
       "Completed processing: B\n",
       "Completed processing: C\n",
       "Workflow result: Processed: A, Processed: B, Processed: C\n",
-      "Time taken: 4.008663654327393 seconds\n",
+      "Time taken: 7.439495086669922 seconds\n",
       "------------------------------\n",
       "Start a parallel workflow with setting num_workers in the step of process_data\n",
       "Completed processing: C\n",
       "Completed processing: A\n",
       "Completed processing: B\n",
       "Workflow result: Processed: C, Processed: A, Processed: B\n",
-      "Time taken: 2.0040180683135986 seconds\n"
+      "Time taken: 2.5881590843200684 seconds\n"
      ]
     }
    ],
@@ -238,7 +240,7 @@
    "source": [
     "# Note\n",
     "\n",
-    "- Without setting num_workers, it might take 3 to 6 seconds. By setting num_workers, the processing occurs in parallel, handling 3 items at a time, and only takes 2 seconds.\n",
+    "- Without setting `num_workers=1`, it might take a total of 6-9 seconds. By setting `num_workers=3`, the processing occurs in parallel, handling 3 items at a time, and only takes 2-3 seconds total.\n",
     "- In ParallelWorkflow, the order of the completed results may differ from the input order, depending on the completion time of the tasks.\n"
    ]
   },
@@ -248,6 +250,283 @@
    "source": [
     "This example demonstrates the execution speed with and without using num_workers, and how to implement parallel processing in a workflow. By setting num_workers, we can control the degree of parallelism, which is very useful for scenarios that need to balance performance and resource usage."
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Checkpointing"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Checkpointing a parallel execution Workflow like the one defined above is also possible. To do so, we must wrap the `Workflow` with a `WorkflowCheckpointer` object and perfrom the runs with these instances. During the execution of the workflow, checkpoints are stored in this wrapper object and can be used for inspection and as starting points for run executions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.core.workflow.checkpointer import WorkflowCheckpointer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Completed processing: C\n",
+      "Completed processing: A\n",
+      "Completed processing: B\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'Processed: C, Processed: A, Processed: B'"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "wflow_ckptr = WorkflowCheckpointer(workflow=parallel_workflow)\n",
+    "handler = wflow_ckptr.run()\n",
+    "await handler"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Checkpoints for the above run are stored in the `WorkflowCheckpointer.checkpoints` Dict attribute."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Run: 90812bec-b571-4513-8ad5-aa957ad7d4fb has ['process_data', 'process_data', 'process_data', 'combine_results']\n"
+     ]
+    }
+   ],
+   "source": [
+    "for run_id, ckpts in wflow_ckptr.checkpoints.items():\n",
+    "    print(f\"Run: {run_id} has {[c.last_completed_step for c in ckpts]}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can run from any of the checkpoints stored, using `WorkflowCheckpointer.run_from(checkpoint=...)` method. Let's take the first checkpoint that was stored after the first completion of \"process_data\" and run from it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Completed processing: B\n",
+      "Completed processing: A\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'Processed: C, Processed: B, Processed: A'"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ckpt = wflow_ckptr.checkpoints[run_id][0]\n",
+    "ckpt\n",
+    "handler = wflow_ckptr.run_from(ckpt)\n",
+    "await handler"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Invoking a `run_from` or `run` will create a new run entry in the `checkpoints` attribute. In the latest run from the specified checkpoint, we can see that only two more \"process_data\" steps and the final \"combine_results\" steps were left to be completed."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Run: 90812bec-b571-4513-8ad5-aa957ad7d4fb has ['process_data', 'process_data', 'process_data', 'combine_results']\n",
+      "Run: 4e1d24cd-c672-4ed1-bb5b-b9f1a252abed has ['process_data', 'process_data', 'combine_results']\n"
+     ]
+    }
+   ],
+   "source": [
+    "for run_id, ckpts in wflow_ckptr.checkpoints.items():\n",
+    "    print(f\"Run: {run_id} has {[c.last_completed_step for c in ckpts]}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now, if we use the checkpoint associated with the second completion of \"process_data\" of the same initial run as the starting point, then we should see a new entry that only has two steps: \"process_data\" and \"combine_results\"."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'90812bec-b571-4513-8ad5-aa957ad7d4fb'"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# get the run_id of the first initial run\n",
+    "first_run_id = next(iter(wflow_ckptr.checkpoints.keys()))\n",
+    "first_run_id"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Completed processing: B\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'Processed: C, Processed: A, Processed: B'"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ckpt = wflow_ckptr.checkpoints[first_run_id][\n",
+    "    1\n",
+    "]  # checkpoint after the second \"process_data\" step\n",
+    "ckpt\n",
+    "handler = wflow_ckptr.run_from(ckpt)\n",
+    "await handler"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Run: 90812bec-b571-4513-8ad5-aa957ad7d4fb has ['process_data', 'process_data', 'process_data', 'combine_results']\n",
+      "Run: 4e1d24cd-c672-4ed1-bb5b-b9f1a252abed has ['process_data', 'process_data', 'combine_results']\n",
+      "Run: e4f94fcd-9b78-4e28-8981-e0232d068f6e has ['process_data', 'combine_results']\n"
+     ]
+    }
+   ],
+   "source": [
+    "for run_id, ckpts in wflow_ckptr.checkpoints.items():\n",
+    "    print(f\"Run: {run_id} has {[c.last_completed_step for c in ckpts]}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Similarly, if we start with the checkpoint for the third completion of \"process_data\" of the initial run, then we should only see the final \"combine_results\" step."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'Processed: C, Processed: A, Processed: B'"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ckpt = wflow_ckptr.checkpoints[first_run_id][\n",
+    "    2\n",
+    "]  # checkpoint after the third \"process_data\" step\n",
+    "ckpt\n",
+    "handler = wflow_ckptr.run_from(ckpt)\n",
+    "await handler"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Run: 90812bec-b571-4513-8ad5-aa957ad7d4fb has ['process_data', 'process_data', 'process_data', 'combine_results']\n",
+      "Run: 4e1d24cd-c672-4ed1-bb5b-b9f1a252abed has ['process_data', 'process_data', 'combine_results']\n",
+      "Run: e4f94fcd-9b78-4e28-8981-e0232d068f6e has ['process_data', 'combine_results']\n",
+      "Run: c498a1a0-cf4c-4d80-a1e2-a175bb90b66d has ['combine_results']\n"
+     ]
+    }
+   ],
+   "source": [
+    "for run_id, ckpts in wflow_ckptr.checkpoints.items():\n",
+    "    print(f\"Run: {run_id} has {[c.last_completed_step for c in ckpts]}\")"
+   ]
   }
  ],
  "metadata": {
@@ -256,13 +535,22 @@
    "toc_visible": true
   },
   "kernelspec": {
-   "display_name": "Python 3",
-   "name": "python3"
+   "display_name": "llama-index-core",
+   "language": "python",
+   "name": "llama-index-core"
   },
   "language_info": {
-   "name": "python"
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 4
 }