Skip to content

Commit

Permalink
updated
Browse files Browse the repository at this point in the history
  • Loading branch information
zaid-kamil committed Jun 30, 2024
1 parent 51d23ed commit c15ee33
Show file tree
Hide file tree
Showing 8 changed files with 78 additions and 209 deletions.
5 changes: 3 additions & 2 deletions .idea/dputils.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/scraper.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def extract(dom_item, tags, data, errors):
Here's a complete example of using the `scraper` module to extract data from a webpage:

```python
from dputils.scraper import Scraper, Tag
from dputils.scrape import Scraper, Tag

url = "https://www.example.com"
scraper = Scraper(url)
Expand Down
163 changes: 1 addition & 162 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 0 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,9 @@ docx2txt = "^0.8"
"pdfminer.six" = "^20220524"
fpdf2 = "^2.5.4"
bs4 = "^0.0.1"
requests = "^2.27.1"
python-docx = "^0.8.11"
httpx = {extras = ["http2"], version = "^0.25.1"}

[tool.poetry.dev-dependencies]
pytest = "^5.2"

[build-system]
requires = ["poetry-core>=1.0.0"]
Expand Down
25 changes: 25 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from setuptools import setup, find_packages

setup(
name='dputils',
version='1.0.1',
description='This library is utility library from digipodium',
author='Team Digipodium, Zaid Kamil, AkulS1008',
author_email='xaidmetamorphos@gmail.com',
url='https://github.com/digipodium/dputils',
packages=find_packages(),
install_requires=[
'docx2txt>=0.8',
'pdfminer.six>=20220524',
'fpdf2>=2.5.4',
'bs4>=0.0.1',
'python-docx>=0.8.11',
'httpx[http2]>=0.25.1',
],
classifiers=[
'Development Status :: 5 - Production/Stable',
'Intended Audience :: Developers',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 3',
],
)
41 changes: 0 additions & 41 deletions test.py

This file was deleted.

44 changes: 44 additions & 0 deletions tests/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import pytest
from dputils.scrape import Scraper, Tag

class TestScrapeModule:
@pytest.fixture
def scraper(self):
url = "https://www.example.com"
return Scraper(url)

@pytest.fixture
def title_tag(self):
return Tag(name='h1', cls='title', output='text')

@pytest.fixture
def price_tag(self):
return Tag(name='span', cls='price', output='text')

def test_get_data_from_page(self, scraper, title_tag, price_tag):
data = scraper.get_data_from_page(title=title_tag, price=price_tag)
assert isinstance(data, dict)
assert 'title' in data
assert 'price' in data

def test_get_repeating_data_from_page(self, scraper):
target_tag = Tag(name='div', cls='product-list')
item_tag = Tag(name='div', cls='product-item')
title_tag = Tag(name='h2', cls='product-title', output='text')
price_tag = Tag(name='span', cls='product-price', output='text')
link_tag = Tag(name='a', cls='product-link', output='href')

products = scraper.get_repeating_data_from_page(
target=target_tag,
items=item_tag,
title=title_tag,
price=price_tag,
link=link_tag
)

assert isinstance(products, list)
for product in products:
assert isinstance(product, dict)
assert 'title' in product
assert 'price' in product
assert 'link' in product

0 comments on commit c15ee33

Please sign in to comment.