Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
209 changes: 206 additions & 3 deletions docs/source/notebooks/extraction/email.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -675,9 +675,8 @@
"\n",
"A few things to note:\n",
"\n",
"* The correctness is low - getting the exact information right can be difficult\n",
"* The json_edit_distance is low is often a bit better\n",
"* For this run, Anthropic is doing better on average"
"* For this run, Anthropic is doing better on average\n",
"* The correctness is low - getting the exact information right can be difficult"
]
},
{
Expand Down Expand Up @@ -975,6 +974,210 @@
" df[\"feedback.score_string:accuracy_claude\"].mean(),\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "f19b30ae-e51b-4364-999b-1a0c71836737",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>inputs.input</th>\n",
" <th>outputs.output</th>\n",
" <th>outputs.output_claude</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>98358188-6e36-42ef-9298-83acf8d9dd12</th>\n",
" <td>Consider all ways to give to \\nSave the Redwo...</td>\n",
" <td>{'sender': 'Tim Whalen', 'sender_address': 'Sa...</td>\n",
" <td>{'sender': None, 'sender_phone_number': None, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0f29e857-fc08-45dd-b1ea-dde1e00c4a62</th>\n",
" <td>Some travelers plan ahead; others prefer a bit...</td>\n",
" <td>{'sender': 'Dunhill Vacations Inc.', 'sender_a...</td>\n",
" <td>{'sender': 'Dunhill Vacations Inc.', 'sender_p...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35414bbc-4d38-41ed-876f-2a6a067e66d5</th>\n",
" <td>--- \\n \\n|\\n\\nWe Passed the Stop Dangerous P...</td>\n",
" <td>{'sender': 'Matt Haney', 'sender_address': '10...</td>\n",
" <td>{'sender': 'Matt Haney', 'sender_phone_number'...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ff1b2ed6-26a7-4501-96aa-6e3e10eadc72</th>\n",
" <td>--- \\n|\\n\\n# We Provide Unique Financing Opti...</td>\n",
" <td>{'sender': 'info@championadvance.com', 'sender...</td>\n",
" <td>{'sender': None, 'sender_phone_number': None, ...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" inputs.input \\\n",
"98358188-6e36-42ef-9298-83acf8d9dd12 Consider all ways to give to \\nSave the Redwo... \n",
"0f29e857-fc08-45dd-b1ea-dde1e00c4a62 Some travelers plan ahead; others prefer a bit... \n",
"35414bbc-4d38-41ed-876f-2a6a067e66d5 --- \\n \\n|\\n\\nWe Passed the Stop Dangerous P... \n",
"ff1b2ed6-26a7-4501-96aa-6e3e10eadc72 --- \\n|\\n\\n# We Provide Unique Financing Opti... \n",
"\n",
" outputs.output \\\n",
"98358188-6e36-42ef-9298-83acf8d9dd12 {'sender': 'Tim Whalen', 'sender_address': 'Sa... \n",
"0f29e857-fc08-45dd-b1ea-dde1e00c4a62 {'sender': 'Dunhill Vacations Inc.', 'sender_a... \n",
"35414bbc-4d38-41ed-876f-2a6a067e66d5 {'sender': 'Matt Haney', 'sender_address': '10... \n",
"ff1b2ed6-26a7-4501-96aa-6e3e10eadc72 {'sender': 'info@championadvance.com', 'sender... \n",
"\n",
" outputs.output_claude \n",
"98358188-6e36-42ef-9298-83acf8d9dd12 {'sender': None, 'sender_phone_number': None, ... \n",
"0f29e857-fc08-45dd-b1ea-dde1e00c4a62 {'sender': 'Dunhill Vacations Inc.', 'sender_p... \n",
"35414bbc-4d38-41ed-876f-2a6a067e66d5 {'sender': 'Matt Haney', 'sender_phone_number'... \n",
"ff1b2ed6-26a7-4501-96aa-6e3e10eadc72 {'sender': None, 'sender_phone_number': None, ... "
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Rows for which OAI > Claude by at least 30%, according to the LLM-based evaluator\n",
"oai_beats_claude = df[\n",
" (df[\"feedback.score_string:accuracy\"] - df[\"feedback.score_string:accuracy_claude\"])\n",
" >= 0.3\n",
"]\n",
"\n",
"oai_beats_claude[[\"inputs.input\", \"outputs.output\", \"outputs.output_claude\"]]"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "9f81cc5e-bb0e-4b27-bfca-766c08ce0f2b",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>inputs.input</th>\n",
" <th>outputs.output</th>\n",
" <th>outputs.output_claude</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>02cfdfc4-c3dc-47e6-ad44-8e437ebf2dce</th>\n",
" <td>---|---|---|--- \\n \\n| \\n--- \\n **Limited ...</td>\n",
" <td>{'action_items': [], 'topic': 'Limited Time Up...</td>\n",
" <td>{'sender': 'Dunhill Vacations Inc.', 'sender_p...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>198dc232-8f98-484a-a65e-048cfb517282</th>\n",
" <td>Hello Jacob,\\n\\n \\n\\nFor many small businesse...</td>\n",
" <td>{'sender': 'Sam at EMC', 'action_items': ['Kic...</td>\n",
" <td>{'sender': 'Sam at EMC', 'sender_phone_number'...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c222957f-cc7e-46af-9cca-1270f3fa5621</th>\n",
" <td>Hello Jacob,\\n\\n \\n\\nDo you know what Fortune...</td>\n",
" <td>{'sender': 'Sam at EMC', 'action_items': ['qua...</td>\n",
" <td>{'sender': 'Sam at EMC', 'sender_phone_number'...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>119ef037-8744-4eb9-93df-64458278e4f8</th>\n",
" <td>--- \\n| | QUALIFY NOW \\n--- \\n \\n \\nHell...</td>\n",
" <td>{'sender': 'Sam at EMC', 'action_items': ['Che...</td>\n",
" <td>{'sender': 'Sam at EMC id:2023-09-19-20:17:53:...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" inputs.input \\\n",
"02cfdfc4-c3dc-47e6-ad44-8e437ebf2dce ---|---|---|--- \\n \\n| \\n--- \\n **Limited ... \n",
"198dc232-8f98-484a-a65e-048cfb517282 Hello Jacob,\\n\\n \\n\\nFor many small businesse... \n",
"c222957f-cc7e-46af-9cca-1270f3fa5621 Hello Jacob,\\n\\n \\n\\nDo you know what Fortune... \n",
"119ef037-8744-4eb9-93df-64458278e4f8 --- \\n| | QUALIFY NOW \\n--- \\n \\n \\nHell... \n",
"\n",
" outputs.output \\\n",
"02cfdfc4-c3dc-47e6-ad44-8e437ebf2dce {'action_items': [], 'topic': 'Limited Time Up... \n",
"198dc232-8f98-484a-a65e-048cfb517282 {'sender': 'Sam at EMC', 'action_items': ['Kic... \n",
"c222957f-cc7e-46af-9cca-1270f3fa5621 {'sender': 'Sam at EMC', 'action_items': ['qua... \n",
"119ef037-8744-4eb9-93df-64458278e4f8 {'sender': 'Sam at EMC', 'action_items': ['Che... \n",
"\n",
" outputs.output_claude \n",
"02cfdfc4-c3dc-47e6-ad44-8e437ebf2dce {'sender': 'Dunhill Vacations Inc.', 'sender_p... \n",
"198dc232-8f98-484a-a65e-048cfb517282 {'sender': 'Sam at EMC', 'sender_phone_number'... \n",
"c222957f-cc7e-46af-9cca-1270f3fa5621 {'sender': 'Sam at EMC', 'sender_phone_number'... \n",
"119ef037-8744-4eb9-93df-64458278e4f8 {'sender': 'Sam at EMC id:2023-09-19-20:17:53:... "
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Rows for which Claude > OAI by at least 50%, according to the LLM-based evaluator\n",
"oai_beats_claude = df[\n",
" (df[\"feedback.score_string:accuracy_claude\"] - df[\"feedback.score_string:accuracy\"])\n",
" >= 0.5\n",
"]\n",
"\n",
"oai_beats_claude[[\"inputs.input\", \"outputs.output\", \"outputs.output_claude\"]]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fa7f74a3-8cfe-48f1-ab85-249dc88aa307",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down