ricklupton · stratus85 · Jul 12, 2024 · Jul 17, 2024 · Jul 17, 2024 · Jul 17, 2024
diff --git a/docs/cookbook/layout-optimisation.ipynb b/docs/cookbook/layout-optimisation.ipynb
@@ -0,0 +1,321 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "38a096d4-8ad1-4f2d-b8be-6affdcd71c21",
+   "metadata": {},
+   "source": [
+    "# Layout optimisation\n",
+    "\n",
+    "This example uses the same data as in the [US energy consumption example](us-energy-consumption.ipynb) to demonstrate node order and position optimisation. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "6c86326f-4c4b-4468-a583-049f503f1af7",
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from attr import evolve\n",
+    "import pandas as pd\n",
+    "from floweaver import *"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "692e1ccf-c748-45b9-87f2-d7e8e4f5f021",
+   "metadata": {},
+   "source": [
+    "Load the data and set up the Sankey Diagram Definition, as in the previous example:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "83d56948-5c13-4254-bf85-0c4a009627ea",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = Dataset.from_csv(\"us-energy-consumption.csv\", dim_process_filename=\"us-energy-consumption-processes.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "f490b5b0-ad19-48fc-bef3-0456e154e864",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sources = ['Solar', 'Nuclear', 'Hydro', 'Wind', 'Geothermal',\n",
+    "           'Natural_Gas', 'Coal', 'Biomass', 'Petroleum']\n",
+    "\n",
+    "uses = ['Residential', 'Commercial', 'Industrial', 'Transportation']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "9664a9bb-f21b-411d-8571-315cb58b42b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nodes = {\n",
+    "    'sources': ProcessGroup('type == \"source\"', Partition.Simple('process', sources), title='Sources'),\n",
+    "    'imports': ProcessGroup(['Net_Electricity_Import'], title='Net electricity imports'),\n",
+    "    'electricity': ProcessGroup(['Electricity_Generation'], title='Electricity Generation'),\n",
+    "    'uses': ProcessGroup('type == \"use\"', partition=Partition.Simple('process', uses)),\n",
+    "    \n",
+    "    'energy_services': ProcessGroup(['Energy_Services'], title='Energy services'),\n",
+    "    'rejected': ProcessGroup(['Rejected_Energy'], title='Rejected energy'),\n",
+    "    \n",
+    "    'direct_use': Waypoint(Partition.Simple('source', [\n",
+    "        # This is a hack to hide the labels of the partition, there should be a better way...\n",
+    "        (' '*i, [k]) for i, k in enumerate(sources)\n",
+    "    ])),\n",
+    "}\n",
+    "\n",
+    "ordering = [\n",
+    "    [[], ['sources'], []],\n",
+    "    [['imports'], ['electricity', 'direct_use'], []],\n",
+    "    [[], ['uses'], []],\n",
+    "    [[], ['rejected', 'energy_services'], []]\n",
+    "]\n",
+    "\n",
+    "bundles = [\n",
+    "    Bundle('sources', 'electricity'),\n",
+    "    Bundle('sources', 'uses', waypoints=['direct_use']),\n",
+    "    Bundle('electricity', 'uses'),\n",
+    "    Bundle('imports', 'uses'),\n",
+    "    Bundle('uses', 'energy_services'),\n",
+    "    Bundle('uses', 'rejected'),\n",
+    "    Bundle('electricity', 'rejected'),\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "0c5cfcd8-9f3a-407f-8edf-00c9822e42de",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "palette = {\n",
+    "    'Solar': 'gold',\n",
+    "    'Nuclear': 'red',\n",
+    "    'Hydro': 'blue',\n",
+    "    'Wind': 'purple',\n",
+    "    'Geothermal': 'brown',\n",
+    "    'Natural_Gas': 'steelblue',\n",
+    "    'Coal': 'black',\n",
+    "    'Biomass': 'lightgreen',\n",
+    "    'Petroleum': 'green',\n",
+    "    'Electricity': 'orange',\n",
+    "    'Rejected energy': 'lightgrey',\n",
+    "    'Energy services': 'dimgrey',\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "a26aa4f7-e57e-48c6-b1f1-6f80ec294755",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sdd = SankeyDefinition(nodes, bundles, ordering,\n",
+    "                       flow_partition=dataset.partition('type'))\n",
+    "sankey_data = weave(sdd, dataset, palette=palette)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6bd354ef-b106-45ef-9333-e9abcdf2f8a7",
+   "metadata": {},
+   "source": [
+    "This is the default, un-optimised layout:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "d112fb8e-73d6-4b27-a15e-df481b4d4860",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "eb50567887e346688e066a44518bbaca",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "SankeyWidget(groups=[{'id': 'sources', 'type': 'process', 'title': 'Sources', 'nodes': ['sources^Solar', 'sour…"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sankey_data.to_widget(width=700, height=450, margins=dict(left=100, right=120))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "81d211d5-d8ee-4a3d-887c-898eb1fffc2c",
+   "metadata": {},
+   "source": [
+    "Optimise the node ordering:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "4e7a3d95-6167-4175-9b34-17bd3c79ced8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<generator object optimise_node_order_model.<locals>.<genexpr> at 0x000001887C534900>\n"
+     ]
+    }
+   ],
+   "source": [
+    "sankey_data_evolved = optimise_node_order(sankey_data, group_nodes=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "93987910-ac87-49b3-a976-a4f132586901",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f1c2abb25a224f249ed40458d7a4590f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(SankeyWidget(groups=[{'id': 'sources', 'type': 'process', 'title': 'Sources', 'nodes': ['source…"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sankey_data_evolved.to_widget(width=700, height=450, margins=dict(left=100, right=120), debugging=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9ebd8d00-bc92-4858-88e4-214f48b6c6d4",
+   "metadata": {},
+   "source": [
+    "Optimise the node positions to make flows as straight as possible:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "28392d80-9ace-47a5-ad96-5b2f43b5f2e8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f3c1e8d7998743a7a0691c36c565e3cc",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "SankeyWidget(groups=[{'id': 'sources', 'type': 'process', 'title': 'Sources', 'nodes': ['sources^Solar', 'sour…"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sankey_data_evolved.to_widget(layout=optimise_node_positions(sankey_data_evolved, scale=1.5))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "00dff780-210c-4ca4-9007-db8c54aa75c0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[('sources^Solar', 'electricity^*'), ('sources^Solar', 'direct_use^'), ('sources^Nuclear', 'electricity^*'), ('sources^Hydro', 'electricity^*'), ('sources^Wind', 'electricity^*'), ('sources^Geothermal', 'electricity^*'), ('sources^Geothermal', 'direct_use^    '), ('sources^Natural_Gas', 'electricity^*'), ('sources^Natural_Gas', 'direct_use^     '), ('sources^Coal', 'electricity^*'), ('sources^Coal', 'direct_use^      '), ('sources^Biomass', 'electricity^*'), ('sources^Biomass', 'direct_use^       '), ('sources^Petroleum', 'electricity^*'), ('sources^Petroleum', 'direct_use^        '), ('imports^*', 'uses^Residential'), ('electricity^*', 'uses^Commercial'), ('electricity^*', 'uses^Industrial'), ('electricity^*', 'uses^Residential'), ('electricity^*', 'uses^Transportation'), ('electricity^*', '__electricity_rejected_2^*'), ('direct_use^', 'uses^Commercial'), ('direct_use^', 'uses^Industrial'), ('direct_use^', 'uses^Residential'), ('direct_use^    ', 'uses^Commercial'), ('direct_use^    ', 'uses^Residential'), ('direct_use^     ', 'uses^Commercial'), ('direct_use^     ', 'uses^Industrial'), ('direct_use^     ', 'uses^Residential'), ('direct_use^     ', 'uses^Transportation'), ('direct_use^      ', 'uses^Commercial'), ('direct_use^      ', 'uses^Industrial'), ('direct_use^       ', 'uses^Commercial'), ('direct_use^       ', 'uses^Industrial'), ('direct_use^       ', 'uses^Residential'), ('direct_use^       ', 'uses^Transportation'), ('direct_use^        ', 'uses^Commercial'), ('direct_use^        ', 'uses^Industrial'), ('direct_use^        ', 'uses^Residential'), ('direct_use^        ', 'uses^Transportation'), ('__electricity_rejected_2^*', 'rejected^*'), ('uses^Residential', 'energy_services^*'), ('uses^Residential', 'rejected^*'), ('uses^Commercial', 'energy_services^*'), ('uses^Commercial', 'rejected^*'), ('uses^Industrial', 'energy_services^*'), ('uses^Industrial', 'rejected^*'), ('uses^Transportation', 'energy_services^*'), ('uses^Transportation', 'rejected^*')]\n"
+     ]
+    }
+   ],
+   "source": [
+    "edges = []\n",
+    "for flow in sankey_data_evolved.links:\n",
+    "    edges.append((flow.source,flow.target))\n",
+    "print(edges)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "b8c12e3b-b130-4ac2-9f7b-9ae110cc9905",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Ordering( ; sources^Petroleum, sources^Biomass, sources^Natural_Gas, sources^Solar, sources^Geothermal, sources^Coal, sources^Wind, sources^Hydro, sources^Nuclear;  | imports^*; direct_use^        , direct_use^       , direct_use^     , direct_use^, direct_use^      , direct_use^    , electricity^*;  | ; uses^Transportation, uses^Industrial, uses^Commercial, uses^Residential, __electricity_rejected_2^*;  | ; energy_services^*, rejected^*;  )"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sankey_data_evolved.ordering"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venvsankey3.10.9",
+   "language": "python",
+   "name": "venvsankey3.10.9"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/installation.rst b/docs/installation.rst
@@ -10,6 +10,12 @@ install floweaver using pip:
 
     $ pip install floweaver
 
+To also install the additional dependencies for optimising diagram layout, use:
+
+.. code-block:: shell
+
+    $ pip install floweaver[mip]
+
 If you use Jupyter notebooks -- a good way to get started -- you will also want
 to install `ipysankeywidget <https://github.com/ricklupton/ipysankeywidget>`_,
 an IPython widget to interactively display Sankey diagrams::
@@ -62,3 +68,5 @@ To open Jupyter Notebook and begin to work on the Sankey. Write in the Command L
     $ jupyter notebook
 
 [not sure about this :D]
+
+To use the optimisation tools, `gcc` also needs to be installed, e.g. using Homebrew: `brew install gcc`.
diff --git a/floweaver/__init__.py b/floweaver/__init__.py
@@ -9,11 +9,13 @@
 from .results_graph import results_graph
 from .augment_view_graph import elsewhere_bundles, augment
 from .hierarchy import Hierarchy
-from .sankey_data import SankeyData, SankeyLink, SankeyNode
+from .sankey_data import SankeyData, SankeyLink, SankeyNode, SankeyLayout
 from .color_scales import CategoricalScale, QuantitativeScale
 from .weave import weave
+from .diagram_optimisation import optimise_node_order, optimise_node_positions, optimise_hybrid_model
 
 __all__ = ['Dataset', 'Partition', 'Group', 'SankeyDefinition', 'ProcessGroup',
            'Waypoint', 'Bundle', 'Elsewhere', 'view_graph', 'results_graph',
            'elsewhere_bundles', 'augment', 'Hierarchy', 'weave', 'SankeyData',
-           'SankeyLink', 'SankeyNode', 'CategoricalScale', 'QuantitativeScale']
+           'SankeyLink', 'SankeyNode', 'SankeyLayout', 'CategoricalScale', 'QuantitativeScale',
+           "optimise_node_order", "optimise_node_positions", "optimise_hybrid_model"]