updated

digipodium · Jun 30, 2024 · c15ee33 · c15ee33
1 parent 51d23ed
commit c15ee33
Show file tree

Hide file tree

Showing 8 changed files with 78 additions and 209 deletions.
diff --git a/.idea/dputils.iml b/.idea/dputils.iml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/docs/scraper.md b/docs/scraper.md
@@ -150,7 +150,7 @@ def extract(dom_item, tags, data, errors):
 Here's a complete example of using the `scraper` module to extract data from a webpage:
 
 ```python
-from dputils.scraper import Scraper, Tag
+from dputils.scrape import Scraper, Tag
 
 url = "https://www.example.com"
 scraper = Scraper(url)

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -17,12 +17,9 @@ docx2txt = "^0.8"
 "pdfminer.six" = "^20220524"
 fpdf2 = "^2.5.4"
 bs4 = "^0.0.1"
-requests = "^2.27.1"
 python-docx = "^0.8.11"
 httpx = {extras = ["http2"], version = "^0.25.1"}
 
-[tool.poetry.dev-dependencies]
-pytest = "^5.2"
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]

diff --git a/setup.py b/setup.py
@@ -0,0 +1,25 @@
+from setuptools import setup, find_packages
+
+setup(
+    name='dputils',
+    version='1.0.1',
+    description='This library is utility library from digipodium',
+    author='Team Digipodium, Zaid Kamil, AkulS1008',
+    author_email='xaidmetamorphos@gmail.com',
+    url='https://github.com/digipodium/dputils',
+    packages=find_packages(),
+    install_requires=[
+        'docx2txt>=0.8',
+        'pdfminer.six>=20220524',
+        'fpdf2>=2.5.4',
+        'bs4>=0.0.1',
+        'python-docx>=0.8.11',
+        'httpx[http2]>=0.25.1',
+    ],
+    classifiers=[
+        'Development Status :: 5 - Production/Stable',
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: MIT License',
+        'Programming Language :: Python :: 3',
+    ],
+)
diff --git a/test.py b/test.py
diff --git a/tests/test.py b/tests/test.py
@@ -0,0 +1,44 @@
+import pytest
+from dputils.scrape import Scraper, Tag
+
+class TestScrapeModule:
+    @pytest.fixture
+    def scraper(self):
+        url = "https://www.example.com"
+        return Scraper(url)
+
+    @pytest.fixture
+    def title_tag(self):
+        return Tag(name='h1', cls='title', output='text')
+
+    @pytest.fixture
+    def price_tag(self):
+        return Tag(name='span', cls='price', output='text')
+
+    def test_get_data_from_page(self, scraper, title_tag, price_tag):
+        data = scraper.get_data_from_page(title=title_tag, price=price_tag)
+        assert isinstance(data, dict)
+        assert 'title' in data
+        assert 'price' in data
+
+    def test_get_repeating_data_from_page(self, scraper):
+        target_tag = Tag(name='div', cls='product-list')
+        item_tag = Tag(name='div', cls='product-item')
+        title_tag = Tag(name='h2', cls='product-title', output='text')
+        price_tag = Tag(name='span', cls='product-price', output='text')
+        link_tag = Tag(name='a', cls='product-link', output='href')
+
+        products = scraper.get_repeating_data_from_page(
+            target=target_tag,
+            items=item_tag,
+            title=title_tag,
+            price=price_tag,
+            link=link_tag
+        )
+
+        assert isinstance(products, list)
+        for product in products:
+            assert isinstance(product, dict)
+            assert 'title' in product
+            assert 'price' in product
+            assert 'link' in product