Skip to content

Commit

Permalink
add new regex based searching
Browse files Browse the repository at this point in the history
  • Loading branch information
Bharat23 committed Nov 2, 2021
1 parent e474456 commit 673a6f0
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 3 deletions.
31 changes: 31 additions & 0 deletions README.MD
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,37 @@
# keylevel11.keylevel23.[{name=Awesome}]
# return {name: Awesome}
```

- #### [{key~regex}]
- When you have a unordered list of object and you want extract a specific object from the list based on the key and a regex of value inside the object
- For extraction from beyond first level, append keys with a separator and provide `key_delimiter` for the program to recognize the start of next level.
- The regex search is case sensitive. You do not need to add `//` or `r''` to write your regex.
- Example:
```
"""
{
keylevel11: {
keylevel21: [
1, 2, 3
],
keylevel22: {
keylevel31: value
},
keylevel23: [
{
name: Awesome123,
},
{
name: Package
}
]
}
}
"""
# key to extract second level, and from that extarct the object with name matching Awesome
# keylevel11.keylevel23.[{name~Awesome}]
# return {name: Awesome}
```

### Available Methods

Expand Down
3 changes: 2 additions & 1 deletion WPTParser/Constants/RegexConstants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
class RegexConstants:
INDEXED_ARRAY = r'\[(\d+)\]'
DICT_ARRAY_SEARCH = r'\[\{(.+\=.+)\}\]'
RANGE_INDEXED_ARRAY = r'\[(\d+)\:(\d+)\]'
RANGE_INDEXED_ARRAY = r'\[(\d+)\:(\d+)\]'
DICT_ARRAY_REGEX_SEARCH = r'\[\{(.+\~.+)\}\]'
30 changes: 30 additions & 0 deletions WPTParser/JSONParser/ObjectListDataRegexExtracter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import re

from WPTParser.JSONParser.DataExtracter import DataExtracter

class ObjectListDataRegexExtracter(DataExtracter):

def __init__(self):
super().__init__()

def extract(self, obj_list: list, key: str):
"""extract the keys of format [{key~value}]
Keyword Arguments:
obj_list {list} -- list of objects received from the previous key
key {str} -- key that needs to be processed
Returns:
{} -- The found dict
"""
try:
key = key.replace(' ', '')
dict_key, dict_value = key.split('~')
for obj in obj_list:

regex_dict_value = re.compile(dict_value)
if dict_key in obj and re.search(regex_dict_value, obj[dict_key]):
return obj
return None
except Exception as ex:
return None
7 changes: 6 additions & 1 deletion WPTParser/JSONParser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# TODO: ADD support for array type fields while parsing eg: data.Median.[0].value
import re
import logging

from WPTParser.Constants import RegexConstants
from WPTParser.JSONParser.KeyDataExtracter import KeyDataExtracter
from WPTParser.JSONParser.ListDataExtracter import ListDataExtracter
from WPTParser.JSONParser.ObjectListDataExtracter import ObjectListDataExtracter
from WPTParser.JSONParser.ListRangeDataExtractor import ListRangeDataExtractor
from WPTParser.JSONParser.ObjectListDataRegexExtracter import ObjectListDataRegexExtracter

class JSONParser():

Expand Down Expand Up @@ -55,7 +57,7 @@ def _recursive_find(self, obj: dict = {}, level_list: list = [], index: int = 0)
else:
return None
except Exception as ex:
print('error:', ex)
logging.error(ex)
return None

def _process_key(self, key: str):
Expand All @@ -74,6 +76,9 @@ def _process_key(self, key: str):
elif re.match(RegexConstants.DICT_ARRAY_SEARCH, key):
key = re.findall(RegexConstants.DICT_ARRAY_SEARCH, key)[0]
extracter = ObjectListDataExtracter()
elif re.match(RegexConstants.DICT_ARRAY_REGEX_SEARCH, key):
key = re.findall(RegexConstants.DICT_ARRAY_REGEX_SEARCH, key)[0]
extracter = ObjectListDataRegexExtracter()
# TODO: add support for range of array index
# elif re.match(RegexConstants.RANGE_INDEXED_ARRAY, key):
# key = re.findall(RegexConstants.RANGE_INDEXED_ARRAY, key)[0]
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def read(fname):
setuptools.setup(
name="wpt-parser",
package=['wpt-parser'],
version="0.0.4",
version="0.0.5",
author="Bharat Sinha",
author_email="bharat.sinha.2307@gmail.com",
description=description,
Expand Down

0 comments on commit 673a6f0

Please sign in to comment.