✨ added YourBittorent Scraper

sandipndev · Mar 28, 2022 · 1ac1cc3 · 1ac1cc3
1 parent 4b8bab9
commit 1ac1cc3
Show file tree

Hide file tree

Showing 4 changed files with 151 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -14,7 +14,7 @@
 
 <p align="center">
 <span style='font-size: 19px'>
-An Unofficial API for <span style='font-weight:600;'>1337x</span>, <span style='font-weight:600;'>Piratebay</span>, <span style='font-weight:bold;'>Nyaasi</span>, <span style='font-weight:bold;'>Torlock</span>, <span style='font-weight:bold;'>Torrent Galaxy</span>, <span style='font-weight:600;'>Zooqle</span>, <span style='font-weight:600;'>Kickass</span>, <span style='font-weight:600;'>Bitsearch</span>, <span style='font-weight:600;'>MagnetDL</span>Libgen, YTS, Limetorrent, TorrentFunk, Glodls and TorrentProject
+An Unofficial API for <span style='font-weight:600;'>1337x</span>, <span style='font-weight:600;'>Piratebay</span>, <span style='font-weight:bold;'>Nyaasi</span>, <span style='font-weight:bold;'>Torlock</span>, <span style='font-weight:bold;'>Torrent Galaxy</span>, <span style='font-weight:600;'>Zooqle</span>, <span style='font-weight:600;'>Kickass</span>, <span style='font-weight:600;'>Bitsearch</span>, <span style='font-weight:600;'>MagnetDL</span>Libgen, YTS, Limetorrent, TorrentFunk, Glodls, TorrentProject and YourBittorrent
 </span>
 </p>
 
@@ -77,6 +77,7 @@ PYTHON_ENV=dev (if running local)
 |  TorrentFunk   |  `torrentfunk`   | https://www.torrentfunk.com  |    ❌     |
 |     Glodls     |     `glodls`     |      https://glodls.to       |    ❌     |
 | TorrentProject | `torrentproject` | https://torrentproject2.com  |    ❌     |
+| YourBittorrent |      `ybt`       |  https://yourbittorrent.com  |    ❌     |
 
 ---
 
@@ -225,6 +226,16 @@ PYTHON_ENV=dev (if running local)
             "recent_category_available": False,
             "categories": [],
             "limit": 20
+        },
+        "ybt": {
+            "trending_available": True,
+            "trending_category": True,
+            "search_by_category": False,
+            "recent_available": True,
+            "recent_category_available": True,
+            "categories": ["anime", "music", "games", "tv",
+                           "apps", "xxx", "movies", "books", "pictures", "other"],  # book -> ebooks
+            "limit": 20
         }
 
     }

diff --git a/app.json b/app.json
@@ -1,6 +1,6 @@
 {
     "name": "Torrent-Api-py",
-    "description": "An Unofficial API for 1337x, Piratebay, Nyaasi, Torlock, Torrent Galaxy, Zooqle, Kickass, Bitsearch, MagnetDL, Libgen, YTS, TorrentFunk, Glodls and TorrentProject",
+    "description": "An Unofficial API for 1337x, Piratebay, Nyaasi, Torlock, Torrent Galaxy, Zooqle, Kickass, Bitsearch, MagnetDL, Libgen, YTS, TorrentFunk, Glodls TorrentProject and YourBittorrent",
     "keywords": [
       "fast-api",
       "python",

diff --git a/helper/is_site_available.py b/helper/is_site_available.py
@@ -13,6 +13,7 @@
 from torrents.torrentfunk import TorrentFunk
 from torrents.glodls import Glodls
 from torrents.torrentProject import TorrentProject
+from torrents.your_bittorrent import YourBittorrent
 
 
 def check_if_site_available(site):
@@ -173,6 +174,17 @@ def check_if_site_available(site):
             "recent_category_available": False,
             "categories": [],
             "limit": 20
+        },
+        'ybt': {
+            "website": YourBittorrent,
+            "trending_available": True,
+            "trending_category": True,
+            "search_by_category": False,
+            "recent_available": True,
+            "recent_category_available": True,
+            "categories": ['anime', 'music', 'games', "tv",
+                           'apps', "xxx", "movies", 'books', 'pictures', 'other'],  # book -> ebooks
+            "limit": 20
         }
 
     }

diff --git a/torrents/your_bittorrent.py b/torrents/your_bittorrent.py
@@ -0,0 +1,126 @@
+import asyncio
+import aiohttp
+from bs4 import BeautifulSoup
+import time
+from helper.asyncioPoliciesFix import decorator_asyncio_fix
+import re
+from helper.html_scraper import Scraper
+
+
+class YourBittorrent:
+
+    def __init__(self):
+        self.BASE_URL = 'https://yourbittorrent.com'
+        self.LIMIT = None
+
+    @decorator_asyncio_fix
+    async def _individual_scrap(self, session, url, obj):
+        try:
+            async with session.get(url) as res:
+                html = await res.text(encoding="ISO-8859-1")
+                soup = BeautifulSoup(html, 'lxml')
+                try:
+                    container = soup.select_one('div.card-body.container')
+                    poster = container.find('div').find_all('div')[
+                        0].find('picture').find('img')['src']
+                    clearfix = soup.find('div', class_='clearfix')
+                    torrent = clearfix.find('div').find_all('div')[
+                        1].find('a')['href']
+                    obj['torrent'] = torrent
+                    obj['poster'] = poster
+                except:
+                    pass
+        except:
+            return None
+
+    async def _get_torrent(self, result, session, urls):
+        tasks = []
+        for idx, url in enumerate(urls):
+            for obj in result['data']:
+                if obj['url'] == url:
+                    task = asyncio.create_task(self._individual_scrap(
+                        session, url, result['data'][idx]))
+                    tasks.append(task)
+        await asyncio.gather(*tasks)
+        return result
+
+    def _parser(self, htmls, idx=1):
+        try:
+            for html in htmls:
+                soup = BeautifulSoup(html, 'lxml')
+                list_of_urls = []
+                my_dict = {
+                    'data': []
+                }
+
+                for tr in soup.find_all('tr')[idx:]:
+                    td = tr.find_all("td")
+                    name = td[1].find('a').get_text(strip=True)
+                    url = self.BASE_URL + td[1].find('a')['href']
+                    list_of_urls.append(url)
+                    size = td[2].text
+                    date = td[3].text
+                    seeders = td[4].text
+                    leechers = td[5].text
+                    my_dict['data'].append({
+                        'name': name,
+                        'size': size,
+                        'date': date,
+                        'seeders': seeders,
+                        'leechers': leechers,
+                        'url': url,
+                    })
+                    if len(my_dict['data']) == self.LIMIT:
+                        break
+                return my_dict, list_of_urls
+        except:
+            return None, None
+
+    async def search(self, query, page, limit):
+        async with aiohttp.ClientSession() as session:
+            start_time = time.time()
+            self.LIMIT = limit
+            url = self.BASE_URL + \
+                "/?v=&c=&q={}".format(query)
+            return await self.parser_result(start_time, url, session, idx=6)
+
+    async def parser_result(self, start_time, url, session, idx=1):
+        htmls = await Scraper().get_all_results(session, url)
+        result, urls = self._parser(htmls, idx)
+        if result != None:
+            results = await self._get_torrent(result, session, urls)
+            results['time'] = time.time() - start_time
+            results['total'] = len(results['data'])
+            return results
+        return result
+
+    async def trending(self, category, page, limit):
+        async with aiohttp.ClientSession() as session:
+            start_time = time.time()
+            self.LIMIT = limit
+            idx = None
+            if not category:
+                url = self.BASE_URL + '/top.html'
+                idx = 1
+            else:
+                if category == 'books':
+                    category = 'ebooks'
+                url = self.BASE_URL + f"/{category}.html"
+                idx = 4
+            return await self.parser_result(start_time, url, session, idx)
+
+    async def recent(self, category, page, limit):
+        async with aiohttp.ClientSession() as session:
+            start_time = time.time()
+            self.LIMIT = limit
+            idx = None
+            if not category:
+                url = self.BASE_URL + \
+                    "/new.html"
+                idx = 1
+            else:
+                if category == 'books':
+                    category = 'ebooks'
+                url = self.BASE_URL + f"/{category}/latest.html"
+                idx = 4
+            return await self.parser_result(start_time, url, session, idx)