Skip to content

Commit

Permalink
intermediary save
Browse files Browse the repository at this point in the history
  • Loading branch information
sbooeshaghi committed Dec 15, 2023
1 parent 36f34cd commit 89d0186
Showing 1 changed file with 188 additions and 13 deletions.
201 changes: 188 additions & 13 deletions examples/parse_genbank.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyMhCTZ7NuSG/ASANgDzabR8",
"authorship_tag": "ABX9TyNMWNFnKajuCncoJoFxgHui",
"include_colab_link": true
},
"kernelspec": {
Expand Down Expand Up @@ -33,10 +33,10 @@
],
"metadata": {
"id": "RhUJkFixEzIe",
"outputId": "8b19d4f4-31b0-494c-d3b7-699afb77dd56",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"outputId": "8b19d4f4-31b0-494c-d3b7-699afb77dd56"
},
"execution_count": 1,
"outputs": [
Expand Down Expand Up @@ -840,11 +840,11 @@
"gaps"
],
"metadata": {
"id": "h9a02vSj86GS",
"outputId": "c676449d-566f-44db-be70-b18c2ce9cf30",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"id": "h9a02vSj86GS",
"outputId": "c676449d-566f-44db-be70-b18c2ce9cf30"
},
"execution_count": 33,
"outputs": [
Expand Down Expand Up @@ -872,11 +872,12 @@
{
"cell_type": "code",
"source": [
"def fill_gaps(regions, parent_start, parent_stop):\n",
"def fill_gaps(regions, parent_start = 0, parent_stop = 0):\n",
" if len(regions) == 0:\n",
" return []\n",
" return regions\n",
"\n",
" prev = regions[0]\n",
" e = prev\n",
" if prev[\"start\"] > parent_start:\n",
" # add filler to start of regions\n",
" regions = [{\n",
Expand All @@ -888,28 +889,202 @@
" for idx, e in enumerate(regions[1:]):\n",
" if prev[\"stop\"] < e[\"start\"]:\n",
" # insert filler between\n",
" pass\n",
" regions[:regions.index(prev)] + {\n",
" \"id\": \"filler\",\n",
" \"start\": prev[\"stop\"],\n",
" \"stop\": e[\"start\"]\n",
" } + regions[:regions.index(e)]\n",
"\n",
" prev = e\n",
"\n",
" if len(e[\"regions\"]) > 0:\n",
" fill_gaps(e[\"regions\"], e[\"start\"], e[\"stop\"])\n",
" e[\"regions\"] = fill_gaps(e[\"regions\"], e[\"start\"], e[\"stop\"])\n",
"\n",
" if e[\"stop\"] < parent_stop:\n",
" # add to end of regions\n",
" pass\n",
" regions.append({\n",
" \"id\": \"filler\",\n",
" \"start\": e[\"stop\"],\n",
" \"stop\": parent_stop\n",
" })\n",
" return regions"
],
"metadata": {
"id": "yA8e9-7n7dqe"
},
"execution_count": 35,
"execution_count": 43,
"outputs": []
},
{
"cell_type": "code",
"source": [
"fill_gaps(gaps)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "yJJicbaX7wyf",
"outputId": "78cae183-cb4c-4cd3-938e-ac9bed776b73"
},
"execution_count": 44,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[{'id': 'A',\n",
" 'start': 0,\n",
" 'stop': 100,\n",
" 'regions': [{'id': 'B',\n",
" 'start': 0,\n",
" 'stop': 25,\n",
" 'regions': [{'id': 'C', 'start': 10, 'stop': 15}]},\n",
" {'id': 'D', 'start': 50, 'stop': 75},\n",
" {'id': 'E', 'start': 75, 'stop': 85},\n",
" {'id': 'F', 'start': 90, 'stop': 100}]}]"
]
},
"metadata": {},
"execution_count": 44
}
]
},
{
"cell_type": "code",
"source": [
"nogaps"
],
"metadata": {
"id": "Lti5l9QHAY2A",
"outputId": "37401c8b-bba9-4df4-f087-f53a8fc47b8a",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"execution_count": 45,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[{'id': 'A',\n",
" 'start': 0,\n",
" 'stop': 100,\n",
" 'regions': [{'id': 'B',\n",
" 'start': 0,\n",
" 'stop': 25,\n",
" 'regions': [{'id': 'filler_BC', 'start': 0, 'stop': 10},\n",
" {'id': 'C', 'start': 10, 'stop': 15},\n",
" {'id': 'filler_CB', 'start': 15, 'stop': 25}]},\n",
" {'id': 'filler_BD', 'start': 25, 'stop': 50},\n",
" {'id': 'D', 'start': 50, 'stop': 75},\n",
" {'id': 'E', 'start': 75, 'stop': 85},\n",
" {'id': 'filler_EF', 'start': 85, 'stop': 90},\n",
" {'id': 'F', 'start': 90, 'stop': 100}]}]"
]
},
"metadata": {},
"execution_count": 45
}
]
},
{
"cell_type": "code",
"source": [
"def fill_gaps(regions, parent_start=0, parent_stop=0):\n",
" if len(regions) == 0:\n",
" return []\n",
"\n",
" # Insert a filler at the start if necessary\n",
" if regions[0]['start'] > parent_start:\n",
" regions.insert(0, {\n",
" 'id': 'filler_start',\n",
" 'start': parent_start,\n",
" 'stop': regions[0]['start']\n",
" })\n",
"\n",
" new_regions = []\n",
" for i, region in enumerate(regions):\n",
" # Append the current region\n",
" new_regions.append(region)\n",
"\n",
" # Recursive call for nested regions\n",
" if 'regions' in region:\n",
" region['regions'] = fill_gaps(region['regions'], region['start'], region['stop'])\n",
"\n",
" # Check for gap and insert a filler\n",
" if i < len(regions) - 1 and region['stop'] < regions[i+1]['start']:\n",
" filler_id = f'filler_{region[\"id\"]}_{regions[i+1][\"id\"]}'\n",
" new_regions.append({\n",
" 'id': filler_id,\n",
" 'start': region['stop'],\n",
" 'stop': regions[i+1]['start']\n",
" })\n",
"\n",
" # Insert a filler at the end if necessary\n",
" if new_regions[-1]['stop'] < parent_stop:\n",
" new_regions.append({\n",
" 'id': 'filler_end',\n",
" 'start': new_regions[-1]['stop'],\n",
" 'stop': parent_stop\n",
" })\n",
"\n",
" return new_regions\n",
"\n",
"# Example usage\n",
"filled_regions = fill_gaps(gaps)\n"
],
"metadata": {
"id": "7MzP1hzYDiUj"
},
"execution_count": 47,
"outputs": []
},
{
"cell_type": "code",
"source": [
"filled_regions"
],
"metadata": {
"id": "94mXCoOGELC6",
"outputId": "afc3de76-ab33-4f03-d074-7a8b3b59b03d",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"execution_count": 48,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[{'id': 'A',\n",
" 'start': 0,\n",
" 'stop': 100,\n",
" 'regions': [{'id': 'B',\n",
" 'start': 0,\n",
" 'stop': 25,\n",
" 'regions': [{'id': 'filler_start', 'start': 0, 'stop': 10},\n",
" {'id': 'C', 'start': 10, 'stop': 15},\n",
" {'id': 'filler_end', 'start': 15, 'stop': 25}]},\n",
" {'id': 'filler_B_D', 'start': 25, 'stop': 50},\n",
" {'id': 'D', 'start': 50, 'stop': 75},\n",
" {'id': 'E', 'start': 75, 'stop': 85},\n",
" {'id': 'filler_E_F', 'start': 85, 'stop': 90},\n",
" {'id': 'F', 'start': 90, 'stop': 100}]}]"
]
},
"metadata": {},
"execution_count": 48
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "yJJicbaX7wyf"
"id": "FG_eq8KPEOI8"
},
"execution_count": null,
"outputs": []
Expand Down

0 comments on commit 89d0186

Please sign in to comment.