From d0a3b94a5fa6a3fb2fb93e5912ddae4a7b9d3646 Mon Sep 17 00:00:00 2001
From: Vincent Simon <vincent.simon@gadz.org>
Date: Thu, 26 Feb 2015 21:10:24 +0100
Subject: [PATCH] Version1

---
 ...1guli\303\250res (Regex)-checkpoint.ipynb" | 587 +++++++++++++++
 ...ns r\303\251guli\303\250res (Regex).ipynb" | 683 ++++++++++++++++++
 2 files changed, 1270 insertions(+)
 create mode 100644 "day3/.ipynb_checkpoints/03 - Focus sur les expressions r\303\251guli\303\250res (Regex)-checkpoint.ipynb"
 create mode 100644 "day3/03 - Focus sur les expressions r\303\251guli\303\250res (Regex).ipynb"
diff --git "a/day3/.ipynb_checkpoints/03 - Focus sur les expressions r\303\251guli\303\250res (Regex)-checkpoint.ipynb" "b/day3/.ipynb_checkpoints/03 - Focus sur les expressions r\303\251guli\303\250res (Regex)-checkpoint.ipynb"
new file mode 100644
index 0000000..fe7a080
--- /dev/null
+++ "b/day3/.ipynb_checkpoints/03 - Focus sur les expressions r\303\251guli\303\250res (Regex)-checkpoint.ipynb"	
@@ -0,0 +1,587 @@
+{
+ "metadata": {
+  "name": "",
+  "signature": "sha256:e03de3b803149d3aa073adb0234b1e547fe4a78754bd2d1f1ad55fe0f6a9af05"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "heading",
+     "level": 1,
+     "metadata": {},
+     "source": [
+      "Les expressions r\u00e9guli\u00e8res"
+     ]
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Introduction"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Ce focus va vous permettre de toucher du doigt la notion d'expressions r\u00e9guli\u00e8res, cependant il serait trop long de tout voir en d\u00e9tail. <br>\n",
+      "\n",
+      "Pour ceux qui veulent en savoir plus : [Learn Regex The Hard Way](http://regex.learncodethehardway.org/book/)"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Les expressions r\u00e9guli\u00e8res sont des patterns que l'on cr\u00e9e afin de soit : <br>\n",
+      "- selectionner l'information <br>\n",
+      "- diviser l'information <br>\n",
+      "- effectuer une validation <br>"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Afin de tester et de cr\u00e9er son pattern, il est fortement conseiller d'utiliser un testeur d'expressions r\u00e9guli\u00e8res tel que [RegexR](http://www.regexr.com/)"
+     ]
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Regex sur iPython et exemple"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Afin de pouvoir utiliser les expressions reguli\u00e8res il faut importer le module ```re``` "
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "\n",
+      "import pandas as pd\n",
+      "import re"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 14
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Vous avez un DataFrame compos\u00e9 de noms et d'adresses emails, vous souhaiteriez n'avoir que les adresse emails."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df = pd.DataFrame(['Martin Daniel',\n",
+      "                   'martin@gmail.com', \n",
+      "                   'Vincent Simon', \n",
+      "                   'vincent.simon@laposte.net', \n",
+      "                   'Bob', \n",
+      "                   'bobby@zimmerman.com'],columns=['noms&mails'])\n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>noms&amp;mails</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td>             Martin Daniel</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td>          martin@gmail.com</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td>             Vincent Simon</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> vincent.simon@laposte.net</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td>                       Bob</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td>       bobby@zimmerman.com</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>6 rows \u00d7 1 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 40,
+       "text": [
+        "                  noms&mails\n",
+        "0              Martin Daniel\n",
+        "1           martin@gmail.com\n",
+        "2              Vincent Simon\n",
+        "3  vincent.simon@laposte.net\n",
+        "4                        Bob\n",
+        "5        bobby@zimmerman.com\n",
+        "\n",
+        "[6 rows x 1 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 40
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Ici nous allons d\u00e9finir le pattern de l'expression r\u00e9guli\u00e8re que nous allons utiliser.\n",
+      "pattern = r'[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,4}'\n",
+      "\n",
+      "# re.IGNORECASE permet d'ignorer la casse. La fonction .match() permet de renvoyer un bool\u00e9en True or False.\n",
+      "resultat = df['noms&mails'].str.match(pattern, re.IGNORECASE)\n",
+      "print resultat"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "0    False\n",
+        "1     True\n",
+        "2    False\n",
+        "3     True\n",
+        "4    False\n",
+        "5     True\n",
+        "Name: noms&mails, dtype: bool\n"
+       ]
+      }
+     ],
+     "prompt_number": 43
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Construire une expression r\u00e9guli\u00e8re"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Construisons un DataFrame compos\u00e9 de differents types de donn\u00e9es et regardons les diff\u00e9rents r\u00e9sultats :"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df2 = pd.DataFrame(['Martin Daniel',\n",
+      "                   'martin@gmail.com', \n",
+      "                   '1234', \n",
+      "                   '0637687898', \n",
+      "                   'Bob'],columns=['donn\u00e9es'])\n",
+      "df2"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>donn\u00e9es</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td>    Martin Daniel</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> martin@gmail.com</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td>             1234</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td>       0637687898</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td>              Bob</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>5 rows \u00d7 1 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 45,
+       "text": [
+        "            donn\u00e9es\n",
+        "0     Martin Daniel\n",
+        "1  martin@gmail.com\n",
+        "2              1234\n",
+        "3        0637687898\n",
+        "4               Bob\n",
+        "\n",
+        "[5 rows x 1 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 45
+    },
+    {
+     "cell_type": "heading",
+     "level": 3,
+     "metadata": {},
+     "source": [
+      "1er pattern : (\\w)"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "pattern1 = r'\\w'\n",
+      "\n",
+      "resultat1 = df2['donn\u00e9es'].str.match(pattern1, re.IGNORECASE)\n",
+      "print resultat1"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "0    True\n",
+        "1    True\n",
+        "2    True\n",
+        "3    True\n",
+        "4    True\n",
+        "Name: donn\u00e9es, dtype: bool\n"
+       ]
+      }
+     ],
+     "prompt_number": 58
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Notre pattern1 s\u00e9l\u00e9ctionne tous les caract\u00e8res sauf les caract\u00e8res sp\u00e9ciaux."
+     ]
+    },
+    {
+     "cell_type": "heading",
+     "level": 3,
+     "metadata": {},
+     "source": [
+      "2eme pattern : \\d"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "pattern2 = r'\\d'\n",
+      "\n",
+      "resultat2 = df2['donn\u00e9es'].str.match(pattern2, re.IGNORECASE)\n",
+      "print resultat2"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "0    False\n",
+        "1    False\n",
+        "2     True\n",
+        "3     True\n",
+        "4    False\n",
+        "Name: donn\u00e9es, dtype: bool\n"
+       ]
+      }
+     ],
+     "prompt_number": 60
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Notre pattern2 n'a pris en compte que les donn\u00e9es num\u00e9riques."
+     ]
+    },
+    {
+     "cell_type": "heading",
+     "level": 3,
+     "metadata": {},
+     "source": [
+      "3eme pattern : \\D"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "pattern3 = r'\\D'\n",
+      "\n",
+      "resultat3 = df2['donn\u00e9es'].str.match(pattern3, re.IGNORECASE)\n",
+      "print resultat3"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "0     True\n",
+        "1     True\n",
+        "2    False\n",
+        "3    False\n",
+        "4     True\n",
+        "Name: donn\u00e9es, dtype: bool\n"
+       ]
+      }
+     ],
+     "prompt_number": 62
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Notre pattern3 a pris compte toutes les donn\u00e9es **n'\u00e9tant pas** num\u00e9riques"
+     ]
+    },
+    {
+     "cell_type": "heading",
+     "level": 3,
+     "metadata": {},
+     "source": [
+      "4eme pattern : 06\\d{8}"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "pattern4 = r'06\\d{8}'\n",
+      "\n",
+      "resultat4 = df2['donn\u00e9es'].str.match(pattern4, re.IGNORECASE)\n",
+      "print resultat4"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "0    False\n",
+        "1    False\n",
+        "2    False\n",
+        "3     True\n",
+        "4    False\n",
+        "Name: donn\u00e9es, dtype: bool\n"
+       ]
+      }
+     ],
+     "prompt_number": 69
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Notre pattern4 a pris en compte le seul num\u00e9ro de t\u00e9l\u00e9phone portable."
+     ]
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Les fonctions"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Jusqu'ici nous avons utiliser la fonction .match() qui renvoie un bool\u00e9en ```True``` ou ```False```. <br>\n",
+      "Il y a plusieurs autres fonctions tr\u00e8s utiles :"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Reprenons notre DataFrame initial :\n",
+      "df3 = pd.DataFrame(['Martin Daniel',\n",
+      "                   'martin@gmail.com', \n",
+      "                   'Vincent Simon', \n",
+      "                   'vincent.simon@laposte.net', \n",
+      "                   'Bob', \n",
+      "                   'bobby@zimmerman.com, bobby@zimm.fr'],columns=['noms&mails'])\n",
+      "df3"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>noms&amp;mails</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td>             Martin Daniel</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td>          martin@gmail.com</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td>             Vincent Simon</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> vincent.simon@laposte.net</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td>                       Bob</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td>       bobby@zimmerman.com</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>6 rows \u00d7 1 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 70,
+       "text": [
+        "                  noms&mails\n",
+        "0              Martin Daniel\n",
+        "1           martin@gmail.com\n",
+        "2              Vincent Simon\n",
+        "3  vincent.simon@laposte.net\n",
+        "4                        Bob\n",
+        "5        bobby@zimmerman.com\n",
+        "\n",
+        "[6 rows x 1 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 70
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "pattern = r'[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,4}'"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 72
+    },
+    {
+     "cell_type": "heading",
+     "level": 3,
+     "metadata": {},
+     "source": [
+      ".count()"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "resultatcount = df['noms&mails'].str.count(pattern, re.IGNORECASE)\n",
+      "print resultatcount"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "0    0\n",
+        "1    1\n",
+        "2    0\n",
+        "3    1\n",
+        "4    0\n",
+        "5    1\n",
+        "Name: noms&mails, dtype: int64\n"
+       ]
+      }
+     ],
+     "prompt_number": 73
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "La m\u00e9thode .count() compte le nombre d'occurences du pattern"
+     ]
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}
\ No newline at end of file
diff --git "a/day3/03 - Focus sur les expressions r\303\251guli\303\250res (Regex).ipynb" "b/day3/03 - Focus sur les expressions r\303\251guli\303\250res (Regex).ipynb"
new file mode 100644
index 0000000..dee9a15
--- /dev/null
+++ "b/day3/03 - Focus sur les expressions r\303\251guli\303\250res (Regex).ipynb"	
@@ -0,0 +1,683 @@
+{
+ "metadata": {
+  "name": "",
+  "signature": "sha256:83d5340cfc6187353dd41abead5a22c1c117ec3bf55e5a3d46574a2d709ed91d"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "heading",
+     "level": 1,
+     "metadata": {},
+     "source": [
+      "Les expressions r\u00e9guli\u00e8res"
+     ]
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Introduction"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Ce focus va vous permettre de toucher du doigt la notion d'expressions r\u00e9guli\u00e8res, cependant il serait trop long de tout voir en d\u00e9tail. <br>\n",
+      "\n",
+      "Pour ceux qui veulent en savoir plus : [Learn Regex The Hard Way](http://regex.learncodethehardway.org/book/)"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Les expressions r\u00e9guli\u00e8res sont des patterns que l'on cr\u00e9e afin de soit : <br>\n",
+      "- selectionner l'information <br>\n",
+      "- diviser l'information <br>\n",
+      "- effectuer une validation <br>"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Afin de tester et de cr\u00e9er son pattern, il est fortement conseiller d'utiliser un testeur d'expressions r\u00e9guli\u00e8res tel que [RegexR](http://www.regexr.com/)"
+     ]
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Regex sur iPython et exemple"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Afin de pouvoir utiliser les expressions reguli\u00e8res il faut importer le module ```re``` "
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "\n",
+      "import pandas as pd\n",
+      "import re"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 14
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Vous avez un DataFrame compos\u00e9 de noms et d'adresses emails, vous souhaiteriez n'avoir que les adresse emails."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df = pd.DataFrame(['Martin Daniel',\n",
+      "                   'martin@gmail.com', \n",
+      "                   'Vincent Simon', \n",
+      "                   'vincent.simon@laposte.net', \n",
+      "                   'Bob', \n",
+      "                   'bobby@zimmerman.com'],columns=['noms&mails'])\n",
+      "df"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>noms&amp;mails</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td>             Martin Daniel</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td>          martin@gmail.com</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td>             Vincent Simon</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td> vincent.simon@laposte.net</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td>                       Bob</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td>       bobby@zimmerman.com</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>6 rows \u00d7 1 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 40,
+       "text": [
+        "                  noms&mails\n",
+        "0              Martin Daniel\n",
+        "1           martin@gmail.com\n",
+        "2              Vincent Simon\n",
+        "3  vincent.simon@laposte.net\n",
+        "4                        Bob\n",
+        "5        bobby@zimmerman.com\n",
+        "\n",
+        "[6 rows x 1 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 40
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Ici nous allons d\u00e9finir le pattern de l'expression r\u00e9guli\u00e8re que nous allons utiliser.\n",
+      "pattern = r'[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,4}'\n",
+      "\n",
+      "# re.IGNORECASE permet d'ignorer la casse. La fonction .match() permet de renvoyer un bool\u00e9en True or False.\n",
+      "resultat = df['noms&mails'].str.match(pattern, re.IGNORECASE)\n",
+      "print resultat"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "0    False\n",
+        "1     True\n",
+        "2    False\n",
+        "3     True\n",
+        "4    False\n",
+        "5     True\n",
+        "Name: noms&mails, dtype: bool\n"
+       ]
+      }
+     ],
+     "prompt_number": 43
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Construire une expression r\u00e9guli\u00e8re"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Construisons un DataFrame compos\u00e9 de differents types de donn\u00e9es et regardons les diff\u00e9rents r\u00e9sultats :"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "df2 = pd.DataFrame(['Martin Daniel',\n",
+      "                   'martin@gmail.com', \n",
+      "                   '1234', \n",
+      "                   '0637687898', \n",
+      "                   'Bob'],columns=['donn\u00e9es'])\n",
+      "df2"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>donn\u00e9es</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td>    Martin Daniel</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td> martin@gmail.com</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td>             1234</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td>       0637687898</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td>              Bob</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>5 rows \u00d7 1 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 45,
+       "text": [
+        "            donn\u00e9es\n",
+        "0     Martin Daniel\n",
+        "1  martin@gmail.com\n",
+        "2              1234\n",
+        "3        0637687898\n",
+        "4               Bob\n",
+        "\n",
+        "[5 rows x 1 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 45
+    },
+    {
+     "cell_type": "heading",
+     "level": 3,
+     "metadata": {},
+     "source": [
+      "1er pattern : (\\w)"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "pattern1 = r'\\w'\n",
+      "\n",
+      "resultat1 = df2['donn\u00e9es'].str.match(pattern1, re.IGNORECASE)\n",
+      "print resultat1"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "0    True\n",
+        "1    True\n",
+        "2    True\n",
+        "3    True\n",
+        "4    True\n",
+        "Name: donn\u00e9es, dtype: bool\n"
+       ]
+      }
+     ],
+     "prompt_number": 58
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Notre pattern1 s\u00e9l\u00e9ctionne tous les caract\u00e8res sauf les caract\u00e8res sp\u00e9ciaux."
+     ]
+    },
+    {
+     "cell_type": "heading",
+     "level": 3,
+     "metadata": {},
+     "source": [
+      "2eme pattern : \\d"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "pattern2 = r'\\d'\n",
+      "\n",
+      "resultat2 = df2['donn\u00e9es'].str.match(pattern2, re.IGNORECASE)\n",
+      "print resultat2"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "0    False\n",
+        "1    False\n",
+        "2     True\n",
+        "3     True\n",
+        "4    False\n",
+        "Name: donn\u00e9es, dtype: bool\n"
+       ]
+      }
+     ],
+     "prompt_number": 60
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Notre pattern2 n'a pris en compte que les donn\u00e9es num\u00e9riques."
+     ]
+    },
+    {
+     "cell_type": "heading",
+     "level": 3,
+     "metadata": {},
+     "source": [
+      "3eme pattern : \\D"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "pattern3 = r'\\D'\n",
+      "\n",
+      "resultat3 = df2['donn\u00e9es'].str.match(pattern3, re.IGNORECASE)\n",
+      "print resultat3"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "0     True\n",
+        "1     True\n",
+        "2    False\n",
+        "3    False\n",
+        "4     True\n",
+        "Name: donn\u00e9es, dtype: bool\n"
+       ]
+      }
+     ],
+     "prompt_number": 62
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Notre pattern3 a pris compte toutes les donn\u00e9es **n'\u00e9tant pas** num\u00e9riques"
+     ]
+    },
+    {
+     "cell_type": "heading",
+     "level": 3,
+     "metadata": {},
+     "source": [
+      "4eme pattern : 06\\d{8}"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "pattern4 = r'06\\d{8}'\n",
+      "\n",
+      "resultat4 = df2['donn\u00e9es'].str.match(pattern4, re.IGNORECASE)\n",
+      "print resultat4"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "0    False\n",
+        "1    False\n",
+        "2    False\n",
+        "3     True\n",
+        "4    False\n",
+        "Name: donn\u00e9es, dtype: bool\n"
+       ]
+      }
+     ],
+     "prompt_number": 69
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Notre pattern4 a pris en compte le seul num\u00e9ro de t\u00e9l\u00e9phone portable."
+     ]
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Les fonctions"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Jusqu'ici nous avons utiliser la fonction .match() qui renvoie un bool\u00e9en ```True``` ou ```False```. <br>\n",
+      "Il y a plusieurs autres fonctions tr\u00e8s utiles :"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Reprenons notre DataFrame initial en ajoutant une deuxi\u00e8me adresse \u00e0 Bobby :\n",
+      "df3 = pd.DataFrame(['Martin Daniel',\n",
+      "                   'martin@gmail.com', \n",
+      "                   'Vincent Simon', \n",
+      "                   'vincent.simon@laposte.net', \n",
+      "                   'Bob', \n",
+      "                   'bobby@zimmerman.com, bobby@zimm.fr'],columns=['noms&mails'])\n",
+      "df3"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "html": [
+        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+        "<table border=\"1\" class=\"dataframe\">\n",
+        "  <thead>\n",
+        "    <tr style=\"text-align: right;\">\n",
+        "      <th></th>\n",
+        "      <th>noms&amp;mails</th>\n",
+        "    </tr>\n",
+        "  </thead>\n",
+        "  <tbody>\n",
+        "    <tr>\n",
+        "      <th>0</th>\n",
+        "      <td>                      Martin Daniel</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>1</th>\n",
+        "      <td>                   martin@gmail.com</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>2</th>\n",
+        "      <td>                      Vincent Simon</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>3</th>\n",
+        "      <td>          vincent.simon@laposte.net</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>4</th>\n",
+        "      <td>                                Bob</td>\n",
+        "    </tr>\n",
+        "    <tr>\n",
+        "      <th>5</th>\n",
+        "      <td> bobby@zimmerman.com, bobby@zimm.fr</td>\n",
+        "    </tr>\n",
+        "  </tbody>\n",
+        "</table>\n",
+        "<p>6 rows \u00d7 1 columns</p>\n",
+        "</div>"
+       ],
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 75,
+       "text": [
+        "                           noms&mails\n",
+        "0                       Martin Daniel\n",
+        "1                    martin@gmail.com\n",
+        "2                       Vincent Simon\n",
+        "3           vincent.simon@laposte.net\n",
+        "4                                 Bob\n",
+        "5  bobby@zimmerman.com, bobby@zimm.fr\n",
+        "\n",
+        "[6 rows x 1 columns]"
+       ]
+      }
+     ],
+     "prompt_number": 75
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "pattern = r'[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,4}'"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 72
+    },
+    {
+     "cell_type": "heading",
+     "level": 3,
+     "metadata": {},
+     "source": [
+      ".count()"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "resultatcount = df3['noms&mails'].str.count(pattern, re.IGNORECASE)\n",
+      "print resultatcount"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "0    0\n",
+        "1    1\n",
+        "2    0\n",
+        "3    1\n",
+        "4    0\n",
+        "5    2\n",
+        "Name: noms&mails, dtype: int64\n"
+       ]
+      }
+     ],
+     "prompt_number": 78
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "La m\u00e9thode .count() compte le nombre d'occurences du pattern."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "resultatfindall = df3['noms&mails'].str.findall(pattern, re.IGNORECASE)\n",
+      "print resultatfindall"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "0                                      []\n",
+        "1                      [martin@gmail.com]\n",
+        "2                                      []\n",
+        "3             [vincent.simon@laposte.net]\n",
+        "4                                      []\n",
+        "5    [bobby@zimmerman.com, bobby@zimm.fr]\n",
+        "Name: noms&mails, dtype: object\n"
+       ]
+      }
+     ],
+     "prompt_number": 95
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "La m\u00e9thode .findall() permet d'afficher les r\u00e9sultats li\u00e9s au pattern. <br>"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Pour afficher ces r\u00e9sultat de fa\u00e7on plus propre, \n",
+      "# nous pouvons utliser la methode .str[0] pour avoir la premi\u00e8re colonne des r\u00e9sultats\n",
+      "resultatfindall.str[0]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 96,
+       "text": [
+        "0                          NaN\n",
+        "1             martin@gmail.com\n",
+        "2                          NaN\n",
+        "3    vincent.simon@laposte.net\n",
+        "4                          NaN\n",
+        "5          bobby@zimmerman.com\n",
+        "Name: noms&mails, dtype: object"
+       ]
+      }
+     ],
+     "prompt_number": 96
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# Pour avoir la deuxi\u00e8me colonne nous jouons sur l'index\n",
+      "resultatfindall.str[1]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 97,
+       "text": [
+        "0              NaN\n",
+        "1              NaN\n",
+        "2              NaN\n",
+        "3              NaN\n",
+        "4              NaN\n",
+        "5    bobby@zimm.fr\n",
+        "Name: noms&mails, dtype: object"
+       ]
+      }
+     ],
+     "prompt_number": 97
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}
\ No newline at end of file

	noms&mails
0	Martin Daniel
1	martin@gmail.com
2	Vincent Simon
3	vincent.simon@laposte.net
4	Bob
5	bobby@zimmerman.com