Skip to content

Commit 5b44ef9

Browse files
Sumesh MuraliAshwin Rajeev
authored andcommitted
fixed flake8 issues
1 parent feaabb9 commit 5b44ef9

File tree

4 files changed

+30
-10
lines changed

4 files changed

+30
-10
lines changed

selectorlib/formatter.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ class Formatter:
44
"""
55

66
def format(self, text: str):
7-
"""Override this function in inherited subclass. return text after formatting"""
7+
"""Override this function in inherited subclass.
8+
return text after formatting"""
89
return text
910

1011
@property

selectorlib/selectorlib.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88

99
def extract_field(element, item_type, attribute=None, formatter=None):
1010
if item_type == 'Text':
11-
texts = [i.strip() for i in element.xpath('.//text()').getall() if i.strip()]
11+
texts = [
12+
i.strip() for i in element.xpath('.//text()').getall() if i.strip()
13+
]
1214
content = " ".join(texts)
1315
elif item_type == 'Link':
1416
content = element.xpath('.//@href').get()
@@ -68,7 +70,8 @@ def extract(self, html: str, base_url: str = None):
6870
"""
6971
Args:
7072
html: html string
71-
base_url (str, optional): specifying the base_url will make all extracted Links absolute
73+
base_url (str, optional): specifying the base_url will make all
74+
extracted Links absolute
7275
Returns:
7376
dict: extracted data from given html string
7477
@@ -80,7 +83,9 @@ def extract(self, html: str, base_url: str = None):
8083
sel.root.make_links_absolute()
8184
fields_data = {}
8285
for selector_name, selector_config in self.config.items():
83-
fields_data[selector_name] = self._extract_selector(selector_config, sel)
86+
fields_data[selector_name] = self._extract_selector(
87+
selector_config, sel
88+
)
8489
return fields_data
8590

8691
def _extract_selector(self, field_config, parent_parser):
@@ -105,7 +110,9 @@ def _extract_selector(self, field_config, parent_parser):
105110
if 'attribute' in field_config:
106111
kwargs['attribute'] = field_config['attribute']
107112
if 'format' in field_config:
108-
kwargs['formatter'] = self.formatters[field_config['format']]
113+
kwargs['formatter'] = self.formatters[
114+
field_config['format']
115+
]
109116
value = extract_field(element, item_type, **kwargs)
110117

111118
if field_config.get('multiple') is not True:
@@ -119,7 +126,10 @@ def _get_child_item(self, field_config, element):
119126
children_config = field_config['children']
120127
child_item = {}
121128
for field in children_config:
122-
child_value = self._extract_selector(children_config[field], element)
129+
child_value = self._extract_selector(
130+
children_config[field],
131+
element
132+
)
123133
child_item[field] = child_value
124134
return child_item
125135

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@
3434
'Programming Language :: Python :: 3.6',
3535
'Programming Language :: Python :: 3.7',
3636
],
37-
description="A library to read a YML file with Xpath or CSS Selectors and extract data from HTML pages using them",
37+
description="A library to read a YML file with Xpath or CSS Selectors"
38+
" and extract data from HTML pages using them",
3839
entry_points={
3940
'console_scripts': [
4041
'selectorlib=selectorlib.cli:main',

tests/test_selectorlib.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,10 @@ def extract_field_test_html():
6565
def test_content(html, input_yaml, output_yaml):
6666
base_url = "https://scrapeme.live/shop/Bulbasaur/"
6767
formatters = formatter.Formatter.get_all()
68-
extractor = selectorlib.Extractor.from_yaml_string(input_yaml, formatters=formatters)
68+
extractor = selectorlib.Extractor.from_yaml_string(
69+
input_yaml,
70+
formatters=formatters
71+
)
6972
output = extractor.extract(html, base_url=base_url)
7073
assert output == yaml.safe_load(output_yaml)
7174

@@ -90,8 +93,13 @@ def test_empty_selector_in_children(
9093
"/B004K4CIKC/ref=sr_1_3?qid=1563864262&refinements=p_89:NIKE" \
9194
"&s=apparel&sr=1-3"
9295
formatters = formatter.Formatter.get_all()
93-
extractor = selectorlib.Extractor.from_yaml_string(empty_selector_yaml, formatters=formatters)
94-
output = extractor.extract(amazon_nike_product_page_html, base_url=base_url)
96+
extractor = selectorlib.Extractor.from_yaml_string(
97+
empty_selector_yaml, formatters=formatters
98+
)
99+
output = extractor.extract(
100+
amazon_nike_product_page_html,
101+
base_url=base_url
102+
)
95103
assert output == json.loads(amazon_nike_product_page_output)
96104

97105

0 commit comments

Comments
 (0)