Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dosagelib/cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def setup_options(console: console.Console) -> ArgumentParser:
parser.add_argument('--list-all', action='store_true',
help=argparse.SUPPRESS)
comic_arg = parser.add_argument('comic', nargs='*',
help='comic module name (including case insensitive substrings)')
help='comic module name (including case insensitive substrings). Also accepts URLs on certain websites (ComicFury, WebToons).')
comic_arg.completer = scraper_completion
with contextlib.suppress(ImportError):
completers = importlib.import_module('argcomplete.completers')
Expand Down
16 changes: 10 additions & 6 deletions dosagelib/director.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,13 +210,17 @@ def getScrapers(comics: Collection[str], basepath: str, adult=True, listing=Fals
# make the following command work:
# find Comics -type d | xargs -n1 -P10 dosage -b Comics
comic = comic[len(basepath) + 1:].lstrip(os.sep)
if ':' in comic:
name, index = comic.split(':', 1)
indexes = index.split(',')
if comic.startswith("http:") or comic.startswith("https:"):
scraper = scrapercache.findbyurl(comic)
indexes = None
else:
name = comic
indexes = None
scraper = scrapercache.find(name)
if ':' in comic:
name, index = comic.split(':', 1)
indexes = index.split(',')
else:
name = comic
indexes = None
scraper = scrapercache.find(name)
if shouldRunScraper(scraper, adult, listing):
# FIXME: Find a better way to work with indexes
scraper.indexes = indexes
Expand Down
20 changes: 20 additions & 0 deletions dosagelib/plugins/comicfury.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,26 @@ def shouldSkipUrl(self, url, data):
return (self.match(data, '//div[@id="comicimagewrap"]//video') and
not self.match(data, '//div[@id="comicimagewrap"]//img'))

@classmethod
def handleurl(cls, url) -> list[ParserScraper]:
import re
rs = [
r"^http.*comicfury\.com/read/([^/]+)/?.*",
r"^http.*://(.+?)\.thecomicseries\.com/?.*",
r"^http.*://(.+?)\.the-comic\.org/?.*",
r"^http.*://(.+?)\.thecomicstrip\.org/?.*",
r"^http.*://(.+?)\.cfw\.me/?.*",
r"^http.*://(.+?)\.webcomic\.ws/?.*"
]
for r in rs:
m = re.match(r, url.lower())
if m != None:
name = m.group(1)
ps = cls(name, name)
ps.multipleImagesPerStrip = True
return [ps]
return []

@classmethod
def getmodules(cls):
return (
Expand Down
10 changes: 10 additions & 0 deletions dosagelib/plugins/webtoons.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,16 @@ def namer(self, imageUrl, pageUrl):
imageExt = pageUrl.rsplit('.', 1)[-1].split('?', 1)[0]
return "%s-%03d.%s" % (episodeNum, imageNum, imageExt)

@classmethod
def handleurl(cls, url) -> list[ParserScraper]:
import re
m = re.match(r"^http.*webtoons\.com/.+?/(.+?/.+?)/.+title_no=(\d+)", url.lower())
if m != None:
path = m.group(1)
number = m.group(2)
return [cls(path, path, number)]
return []

@classmethod
def getmodules(cls):
return (
Expand Down
20 changes: 20 additions & 0 deletions dosagelib/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class Scraper:
# Stop search for previous URLs at this URL
firstStripUrl: Optional[str] = None

#THINK Is there harm in defaulting this to True?
# if more than one image per URL is expected
multipleImagesPerStrip: bool = False

Expand Down Expand Up @@ -95,6 +96,10 @@ class Scraper:
# HTTP session for configuration & cookies
session: http.Session = http.default_session

@classmethod
def handleurl(cls, url) -> list[Scraper]:
return []

@classmethod
def getmodules(cls) -> Collection[Scraper]:
if cls.url is None:
Expand Down Expand Up @@ -537,6 +542,7 @@ class Cache:
"""
def __init__(self) -> None:
self.data: List[Scraper] = []
self.plugins: List[Scraper] = []
self.userdirs: set[pathlib.Path] = set()

def find(self, comic: str) -> Scraper:
Expand All @@ -547,6 +553,7 @@ def find(self, comic: str) -> Scraper:
if not comic:
raise ValueError("empty comic name")
candidates = []

cname = comic.lower()
for scraper in self.all(include_removed=True):
lname = scraper.name.lower()
Expand Down Expand Up @@ -600,6 +607,7 @@ def addmodule(self, module) -> int:
classes = 0
for plugin in loader.get_module_plugins(module, Scraper):
classes += 1
self.plugins.append(plugin)
self.data.extend(plugin.getmodules())
return classes

Expand All @@ -615,6 +623,18 @@ def all(self, include_removed=False) -> list[Scraper]:
else:
return [x for x in self.data if x.url]

def findbyurl(self, url) -> list[Scraper]:
candidates = []
for plugin in self.plugins:
candidates.extend(plugin.handleurl(url))

if len(candidates) > 1:
comics = ", ".join(x.name for x in candidates)
raise ValueError('multiple comics found: %s' % comics)
elif not candidates:
raise ValueError('comic %r not found' % comic)
return candidates[0]

def validate(self) -> None:
"""Check for duplicate scraper names."""
d: Dict[str, Scraper] = {}
Expand Down