added to regex

Jason-M-Richards · Jason-M-Richards · commit 5f3017101184 · 2019-10-27T17:50:06.000-05:00
diff --git a/Python Toolkit.ipynb b/Python Toolkit.ipynb
@@ -380,12 +380,52 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "#### REGULAR EXPRESSIONS\n",
-    "import re - Import the Regular Expressions module\n",
-    "\n",
-    "re.search(\"abc\",s) - Returns a match object if the regex \"abc\" is found in s, otherwise None\n",
-    "\n",
-    "re.sub(\"abc\",\"xyz\",s) - Returns a string where all instances matching regex \"abc\" are replaced by \"xyz\""
+    "#### Regular Expressions\n",
+    "- import re - Import the Regular Expressions module\n",
+    "- re.search(r\"abc\",string) - Returns a match object if the regex \"abc\" is found in s, otherwise None\n",
+    "- re.match(r\"abc\",string) - same as search, but is specific\n",
+    "- re.split(r'dilimiter',string) - splits string by delimiter provided\n",
+    "- re.sub(r\"abc\",\"xyz\",string) - Returns a string where all instances matching regex \"abc\" are replaced by \"xyz\"\n",
+    "\n",
+    "##### metacharacters\n",
+    "- \\d = digit (ex: 'User9' returns from re.findall(r'User/d',string)\n",
+    "- \\D = non-digit (ex: 'UserN' returns from re.findall(r'User/D',string)\n",
+    "- \\w = word - any word containing the provided regex\n",
+    "- \\W = non-word - any non-word item\n",
+    "- \\s - whitespace\n",
+    "- \\S = non-whitespace - any non-whitespace character\n",
+    "- . - matches any character\n",
+    "- ^ - matches the first instance in the string (^string)\n",
+    "- $ - matches the last instance in the string (string$)\n",
+    "- \\ - put in front of a charatcer that has another operation to specifically identify\n",
+    "- | - basically an 'and' operator cat|dog|bird\n",
+    "- [] - can use to denote values [a-zA-z] [0-9] and symbols [%^&!]\n",
+    "- [^] - use as negative operandum [^0-9] = find no numbers\n",
+    "- () - groups regex terms\n",
+    "- (?:regex) - will match but not return what is in the parentheses (non-capturing)\n",
+    "##### quantifiers - applies only to the character on its left\n",
+    "- + - shows up one time after the first (ex: 04-13 = \\d+-\\d+)\n",
+    "- * - shows up zero or more times\n",
+    "- ? - shows up zero or one time (this will also convert a greedy search to a lazy search)\n",
+    "- {n,m} - shows up minimum n times to maximum m times\n",
+    "##### backreference groups\n",
+    "    for string in html_tags:\n",
+    "        #Complete the regex and find if it matches a closed HTML tags\n",
+    "        match_tag =  re.match(r\"<(\\w+)>.*?</\\1>\", string)\n",
+    " \n",
+    "        if match_tag:\n",
+    "            #If it matches print the first group capture\n",
+    "            print(\"Your tag {} is closed\".format(match_tag.group(1))) \n",
+    "        else:\n",
+    "            #If it doesn't match capture only the tag \n",
+    "            notmatch_tag = re.match(r\"<(\\w+)>\", string)\n",
+    "            #Print the first group capture\n",
+    "            print(\"Close your {} tag!\".format(notmatch_tag.group(1)))\n",
+    "##### lookaround\n",
+    "- regex(?=reference) = lookup word before the reference\n",
+    "- regex(?!reference) = avoid looking up the word before reference\n",
+    "- (?<=reference)regex = lookup word after the reference\n",
+    "- (?<!reference)regex = avoid looking up the word after reference"
    ]
   },
   {