diff --git a/Code_CO_23_02_23.ipynb b/Code_CO_23_02_23.ipynb new file mode 100644 index 0000000..a2951a5 --- /dev/null +++ b/Code_CO_23_02_23.ipynb @@ -0,0 +1,505 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "gmnXNgLmRE9L" + }, + "source": [ + "# KH.HD" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "id": "zWD8UbpQW9IY" + }, + "outputs": [], + "source": [ + "import datetime as dt \n", + "import requests as rq\n", + "import json\n", + "import pandas as pd \n", + "from pandasql import sqldf\n", + "import time\n", + "import numpy as np\n", + "from memory_profiler import profile" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "#url1=\"https://odre.opendatasoft.com/api/records/1.0/search/?dataset=consommation-quotidienne-brute-regionale&q=&sort=-consommation_brute_gaz_grtgaz&facet=date_heure&facet=code_insee_region&facet=region\"\n", + "#url2=\"https://opendata.agenceore.fr/api/records/1.0/search/?dataset=production-demi-horaire-agregee-par-region&q=&sort=-horodate&facet=horodate&facet=region&facet=grd\" \n", + "#url22='https://tinyurl.com/4nhvpv2m'\n", + "\n", + "list_api_1=[ 'region', 'code_insee_region','date_heure', 'date', 'heure', \n", + " 'consommation_brute_electricite_rte', 'consommation_brute_totale', 'consommation_brute_gaz_totale']\n", + "\n", + "list_api_2=['region','code','horodate',\n", + " 'energie_injectee', 'nb_points_injection' ]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Adapters" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "id": "ui3V4n1-XDJZ" + }, + "outputs": [], + "source": [ + "class EnergyAdapter1:\n", + " def __init__(self,list_of_tables):\n", + " \"\"\" The adapater for the first API Energy\"\"\"\n", + " self.base_url = 'https://odre.opendatasoft.com/api/records/1.0/search/?dataset=consommation-quotidienne-brute-regionale&q=&sort=-consommation_brute_gaz_grtgaz&facet=date_heure&facet=code_insee_region&facet=region'\n", + " self.list_of_tables = list_of_tables\n", + " def get_energy_production(self):\n", + " lista=self.list_of_tables\n", + " response = rq.get(self.base_url)\n", + " Repense = response.content\n", + " parse_json = json.loads(Repense)\n", + " n=parse_json['records'].__len__()\n", + " list_data=[x for x in parse_json['records']]\n", + " list_dic,df={},{}\n", + " for i in range(n):\n", + " list_dic[i] = {key: list_data[i][\"fields\"][key] for key in lista}\n", + " data=pd.DataFrame.from_dict(list_dic[0], orient='index').T\n", + " for i in range(1,n):\n", + " df[i]=pd.DataFrame.from_dict(list_dic[i], orient='index').T\n", + " data=pd.concat([data,df[i]], axis=0)\n", + " return data\n", + " \n", + "class EnergyAdapter2:\n", + " def __init__(self,list_of_tables):\n", + " \"\"\" The adapater for the second API of Energy \"\"\"\n", + " self.base_url = \"https://opendata.agenceore.fr/api/records/1.0/search/?dataset=production-demi-horaire-agregee-par-region&q=&sort=-horodate&facet=horodate&facet=region&facet=grd\"\n", + " self.list_of_tables = list_of_tables\n", + " def get_energy_production(self):\n", + " return EnergyAdapter1.get_energy_production(self)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Mediator" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "class InformationMediator:\n", + " def __init__(self, energy_adapter1, energy_adapter2):\n", + " self.energy_adapter1 = energy_adapter1\n", + " self.energy_adapter2 = energy_adapter2\n", + " \n", + " def get_combined_info(self,common):\n", + " energy_data1 = self.energy_adapter1.get_energy_production()\n", + " energy_data2 = self.energy_adapter2.get_energy_production()\n", + " return pd.merge(energy_data1, energy_data2, on = common)\n", + " \n", + " \n", + " def get_API1_info(self):\n", + " return pd.DataFrame(self.energy_adapter1.get_energy_production())\n", + " \n", + " def get_API2_info(self):\n", + " return pd.DataFrame(self.energy_adapter2.get_energy_production())\n", + " \n", + " def get_API1_API2_info(self):\n", + " API1_df=get_API1_info(self)\n", + " API2_df=get_API2_info(self)\n", + " return pd.merge(API1_df, API2_df, on = common)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# -------------------------------------------------------------------------" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$\\bowtie$ -> $\\Pi$ -> $\\sigma$ $\\hspace{1cm}$ $\\bowtie$ -> $\\sigma$ -> $\\Pi$\n", + "\n", + "$\\Pi$ -> $\\bowtie$ -> $\\sigma$ $\\hspace{1cm}$ $\\Pi$ -> $\\sigma$ -> $\\bowtie$\n", + "\n", + " La projection Π \n", + " La sélection 𝜎\n", + " La jointure ⋈ \n", + "\n", + " parfois on aura besoin de qlq cols et/ou api et pas la totalité donc il faut traiter aussi ça.\n", + " il faut trouver la bonne parmi ces méthodes " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1) Jointure first \n", + "## 1-1) $\\bowtie$ -> $\\Pi$ -> $\\sigma$" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "def method_11(Query):\n", + " # Π : La projection : all cols\n", + " energyAdapter1 = EnergyAdapter1(list_api_1)\n", + " energyAdapter2 = EnergyAdapter2(list_api_2)\n", + " # ⋈ : La jointure : all cols\n", + " informationMediator = InformationMediator(energyAdapter1,energyAdapter2)\n", + " df = informationMediator.get_combined_info('region')\n", + " sqldf(Query)\n", + "\n", + "Query=\"\"\" \n", + "SELECT region, consommation_brute_electricite_rte, energie_injectee\n", + "FROM df\n", + "WHERE df.region='Pays de la Loire'\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ERROR: Could not find file C:\\Users\\Utilisateur\\AppData\\Local\\Temp\\ipykernel_21200\\2226404414.py\n", + "Temps d'exécution : 3.06408953666687 secondes.\n" + ] + } + ], + "source": [ + "start_time = time.time()\n", + "method_11(Query)\n", + "elapsed_time11 = time.time() - start_time\n", + "print(\"Temps d'exécution : \", elapsed_time11, \" secondes.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1-2) $\\bowtie$ -> $\\sigma$ -> $\\Pi$" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "def method_12(Query):\n", + " # Π : La projection : all cols\n", + " energyAdapter1 = EnergyAdapter1(list_api_1)\n", + " energyAdapter2 = EnergyAdapter2(list_api_2)\n", + " # ⋈ : La jointure : all cols\n", + " informationMediator = InformationMediator(energyAdapter1,energyAdapter2)\n", + " dfff = informationMediator.get_combined_info('region')\n", + " informationMediator.get_combined_info('region').head(3)\n", + " # 𝜎 : La Selection \n", + " df_selection_step=dfff[dfff['region']=='Pays de la Loire']\n", + " sqldf(Query)\n", + "\n", + "Query=\"\"\" \n", + "SELECT region, consommation_brute_electricite_rte, energie_injectee\n", + "FROM df_selection_step \"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Temps d'exécution : 4.1961143016815186 secondes.\n" + ] + } + ], + "source": [ + "start_time = time.time()\n", + "method_12(Query)\n", + "elapsed_time12 = time.time() - start_time\n", + "print(\"Temps d'exécution : \", elapsed_time12, \" secondes.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2) Projection first (< $\\bowtie$ -> $\\sigma$ " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "def method_21(Query):\n", + " start, end = 'SELECT','FROM'\n", + " tables_in_query=Query[Query.find(start)+len(start):Query.rfind(end)].strip().split(', ')\n", + " list_api1, list_api2, common= [], [], []\n", + " for word in tables_in_query:\n", + " if (word in list_api_1) and (word in list_api_2):\n", + " common.append(word)\n", + " list_api1.append(word)\n", + " list_api2.append(word)\n", + " elif word in list_api_1:\n", + " list_api1.append(word)\n", + " else:\n", + " list_api2.append(word)\n", + " # Π : La projection : qlq cols\n", + " energyAdapter1 = EnergyAdapter1(list_api1)\n", + " energyAdapter2 = EnergyAdapter2(list_api2)\n", + " # ⋈ : La jointure : qlq cols\n", + " informationMediator = InformationMediator(energyAdapter1,energyAdapter2)\n", + " df=informationMediator.get_combined_info(common)\n", + " # 𝜎 : La sélection \n", + " x=['Pays de la Loire']\n", + " df.query(f\" region == {x}\")\n", + "\n", + "Query=\"\"\" \n", + "SELECT region, consommation_brute_electricite_rte, energie_injectee\n", + "FROM df1\n", + "INNER JOIN df2 ON df1.region = df2.region\n", + "WHERE df2.region='Pays de la Loire'\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Temps d'exécution : 1.1578257083892822 secondes.\n" + ] + } + ], + "source": [ + "start_time = time.time()\n", + "method_21(Query)\n", + "elapsed_time21 = time.time() - start_time\n", + "print(\"Temps d'exécution : \", elapsed_time21, \" secondes.\")\n", + "#Temps d'exécution : 2.115198850631714 secondes." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 2-2) $\\Pi$ -> $\\sigma$ -> $\\bowtie$" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "def method_22(Query):\n", + " # Π : La projection : qlq cols\n", + " start, end = 'SELECT', 'FROM'\n", + " tables_in_query=Query[Query.find(start)+len(start):Query.rfind(end)].strip().split(', ')\n", + " list_api1, list_api2, common= [], [], []\n", + " for word in tables_in_query:\n", + " if (word in list_api_1) and (word in list_api_2):\n", + " common.append(word)\n", + " list_api1.append(word)\n", + " list_api2.append(word)\n", + " elif word in list_api_1:\n", + " list_api1.append(word)\n", + " else:\n", + " list_api2.append(word)\n", + " energyAdapter1, energyAdapter2 = EnergyAdapter1(list_api1), EnergyAdapter2(list_api2)\n", + " informationMediator = InformationMediator(energyAdapter1,energyAdapter2)\n", + " df1, df2=informationMediator.get_API1_info() ,informationMediator.get_API2_info() \n", + " \n", + " # 𝜎 : La sélection \n", + " start, end = \"WHERE\", \"\"\n", + " tables_in_query=Query[Query.find(start)+len(start):Query.rfind(end)].strip().split('=')[1]\n", + " dff=df1[df1['region']==tables_in_query]\n", + " dfff=df2[df2['region']==tables_in_query]\n", + "\n", + " # ⋈ : La jointure : qlq cols\n", + " df=pd.merge(dff, dfff, on = \"region\")\n", + " df\n", + "\n", + "Query=\"\"\" \n", + "SELECT region, consommation_brute_electricite_rte, energie_injectee\n", + "FROM df1\n", + "INNER JOIN df2 ON df1.region = df2.region\n", + "WHERE df2.region='Pays de la Loire'\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Temps d'exécution : 1.2491710186004639 secondes.\n" + ] + } + ], + "source": [ + "start_time = time.time()\n", + "method_22(Query)\n", + "elapsed_time22 = time.time() - start_time\n", + "print(\"Temps d'exécution : \", elapsed_time22, \" secondes.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2.6496338844299316,\n", + " 4.1961143016815186,\n", + " 1.1578257083892822,\n", + " 1.2491710186004639)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(elapsed_time11, elapsed_time12, elapsed_time21, elapsed_time22)" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "Temps d'exécution :\n", + "\t⋈-> Π-> 𝜎 ⋈-> 𝜎-> Π Π-> ⋈-> 𝜎 Π-> 𝜎-> ⋈\n", + "(3.8648767471313477, 4.503273248672485, 2.081739664077759, 1.9406847953796387)\n", + "(2.583775520324707, 3.8232362270355225, 1.386244535446167, 1.8930981159210205)\n", + "(3.42537522315979, 4.277031660079956, 2.1051204204559326, 2.066295862197876)\n", + "(3.3437840938568115, 3.0091187953948975, 1.464475393295288, 2.062783718109131)\n", + "(3.1498332023620605, 4.417710304260254, 1.871737003326416, 2.104344367980957)\n", + "\n", + "(3.616210699081421, 3.5588901042938232, 1.476405143737793, 1.3976826667785645)\n", + "(2.8788163661956787, 4.071977138519287, 3.204068183898926, 1.9847311973571777)\n", + "(3.369443655014038, 3.1831319332122803, 1.8376247882843018, 1.6176726818084717)\n", + "(2.6496338844299316, 4.1961143016815186, 1.1578257083892822, 1.2491710186004639)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 3-1) $\\sigma$ -> $\\Pi$ -> $\\bowtie$\n", + "\tto do " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 3-2) $\\sigma$ -> $\\bowtie$ -> $\\Pi$\n", + "\tto do" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vq5K06PDbtnc" + }, + "source": [ + "Exemple de queries:\n", + "\n", + "'' Savoir la consommation d'énergie brute totale et la consommation brute d'électricité ''\n", + "\n", + "1) query = \"SELECT country, region, consommation_brute_totale, consommation_brute_electricite_rte FROM df\"\n", + "\n", + "'' Savoir la température et l'humidité sur Occitanie (Toulouse)''\n", + "\n", + "2) query = \"SELECT temp_celsius ,temperature, humidity, wind_speed FROM df\"\n", + "\n", + "'' Savoir la température et la consommation d'énergie brute totale sur Occitanie (Toulouse)''\n", + "\n", + "3) query = \"SELECT temp_celsius ,consommation_brute_totale FROM df\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dernier livrable \n", + "evaluation\n", + "methodes\n", + "difficulté " + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}