|
380 | 380 | "cell_type": "markdown",
|
381 | 381 | "metadata": {},
|
382 | 382 | "source": [
|
383 |
| - "#### REGULAR EXPRESSIONS\n", |
384 |
| - "import re - Import the Regular Expressions module\n", |
385 |
| - "\n", |
386 |
| - "re.search(\"abc\",s) - Returns a match object if the regex \"abc\" is found in s, otherwise None\n", |
387 |
| - "\n", |
388 |
| - "re.sub(\"abc\",\"xyz\",s) - Returns a string where all instances matching regex \"abc\" are replaced by \"xyz\"" |
| 383 | + "#### Regular Expressions\n", |
| 384 | + "- import re - Import the Regular Expressions module\n", |
| 385 | + "- re.search(r\"abc\",string) - Returns a match object if the regex \"abc\" is found in s, otherwise None\n", |
| 386 | + "- re.match(r\"abc\",string) - same as search, but is specific\n", |
| 387 | + "- re.split(r'dilimiter',string) - splits string by delimiter provided\n", |
| 388 | + "- re.sub(r\"abc\",\"xyz\",string) - Returns a string where all instances matching regex \"abc\" are replaced by \"xyz\"\n", |
| 389 | + "\n", |
| 390 | + "##### metacharacters\n", |
| 391 | + "- \\d = digit (ex: 'User9' returns from re.findall(r'User/d',string)\n", |
| 392 | + "- \\D = non-digit (ex: 'UserN' returns from re.findall(r'User/D',string)\n", |
| 393 | + "- \\w = word - any word containing the provided regex\n", |
| 394 | + "- \\W = non-word - any non-word item\n", |
| 395 | + "- \\s - whitespace\n", |
| 396 | + "- \\S = non-whitespace - any non-whitespace character\n", |
| 397 | + "- . - matches any character\n", |
| 398 | + "- ^ - matches the first instance in the string (^string)\n", |
| 399 | + "- $ - matches the last instance in the string (string$)\n", |
| 400 | + "- \\ - put in front of a charatcer that has another operation to specifically identify\n", |
| 401 | + "- | - basically an 'and' operator cat|dog|bird\n", |
| 402 | + "- [] - can use to denote values [a-zA-z] [0-9] and symbols [%^&!]\n", |
| 403 | + "- [^] - use as negative operandum [^0-9] = find no numbers\n", |
| 404 | + "- () - groups regex terms\n", |
| 405 | + "- (?:regex) - will match but not return what is in the parentheses (non-capturing)\n", |
| 406 | + "##### quantifiers - applies only to the character on its left\n", |
| 407 | + "- + - shows up one time after the first (ex: 04-13 = \\d+-\\d+)\n", |
| 408 | + "- * - shows up zero or more times\n", |
| 409 | + "- ? - shows up zero or one time (this will also convert a greedy search to a lazy search)\n", |
| 410 | + "- {n,m} - shows up minimum n times to maximum m times\n", |
| 411 | + "##### backreference groups\n", |
| 412 | + " for string in html_tags:\n", |
| 413 | + " #Complete the regex and find if it matches a closed HTML tags\n", |
| 414 | + " match_tag = re.match(r\"<(\\w+)>.*?</\\1>\", string)\n", |
| 415 | + " \n", |
| 416 | + " if match_tag:\n", |
| 417 | + " #If it matches print the first group capture\n", |
| 418 | + " print(\"Your tag {} is closed\".format(match_tag.group(1))) \n", |
| 419 | + " else:\n", |
| 420 | + " #If it doesn't match capture only the tag \n", |
| 421 | + " notmatch_tag = re.match(r\"<(\\w+)>\", string)\n", |
| 422 | + " #Print the first group capture\n", |
| 423 | + " print(\"Close your {} tag!\".format(notmatch_tag.group(1)))\n", |
| 424 | + "##### lookaround\n", |
| 425 | + "- regex(?=reference) = lookup word before the reference\n", |
| 426 | + "- regex(?!reference) = avoid looking up the word before reference\n", |
| 427 | + "- (?<=reference)regex = lookup word after the reference\n", |
| 428 | + "- (?<!reference)regex = avoid looking up the word after reference" |
389 | 429 | ]
|
390 | 430 | },
|
391 | 431 | {
|
|
0 commit comments