Skip to content

Commit

Permalink
Merge branch 'hotfix/custom-finder-example'
Browse files Browse the repository at this point in the history
  • Loading branch information
vinta committed Oct 4, 2013
2 parents 83454ea + fca7991 commit 33bc50c
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 3 deletions.
2 changes: 1 addition & 1 deletion HISTORY.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
History
-------
=======

1.1.0 (2013-10-04)
++++++++++++++++++
Expand Down
29 changes: 27 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,35 @@ Custom finder / extender pipeline:
.. code-block:: python
from haul import Haul
from haul.utils import in_ignorecase
IMAGE_FINDER_PIPELINE = (
def img_data_src_finder(pipeline_index,
soup,
finder_image_urls=[],
*args, **kwargs):
"""
Find image URL in <img>'s data-src attribute
"""
now_finder_image_urls = []
for img in soup.find_all('img'):
src = img.get('data-src', None)
if src:
if (not in_ignorecase(src, finder_image_urls)) and \
(not in_ignorecase(src, now_finder_image_urls)):
now_finder_image_urls.append(src)
output = {}
output['finder_image_urls'] = finder_image_urls + now_finder_image_urls
return output
MY_FINDER_PIPELINE = (
'haul.finders.pipeline.html.img_src_finder',
'haul.finders.pipeline.css.background_image_finder',
img_data_src_finder,
)
GOOGLE_SITES_EXTENDER_PIEPLINE = (
Expand All @@ -93,7 +118,7 @@ Custom finder / extender pipeline:
url = 'http://fashion-fever.nl/dressing-up/'
h = Haul(parser='lxml',
finder_pipeline=IMAGE_FINDER_PIPELINE,
finder_pipeline=MY_FINDER_PIPELINE,
extender_pipeline=GOOGLE_SITES_EXTENDER_PIEPLINE)
result = h.find_images(url, extend=True)
Expand Down

0 comments on commit 33bc50c

Please sign in to comment.