Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions dosagelib/comic.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,10 @@ def _exist_err(self, fn):
def _fnbase(self, basepath):
'''Determine the target base name of this comic file and make sure the
directory exists.'''
comicdir = self.scraper.get_download_dir(basepath)
comicpath = os.path.join(
self.scraper.get_download_dir(basepath), self.filename
)
comicdir = os.path.dirname(comicpath)
if not os.path.isdir(comicdir):
os.makedirs(comicdir)
return os.path.join(comicdir, self.filename)
return comicpath
77 changes: 70 additions & 7 deletions dosagelib/plugins/d.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,17 +329,80 @@ class DreamKeepersPrelude(_ParserScraper):


class DresdenCodak(_ParserScraper):
url = 'http://dresdencodak.com/'
startUrl = url + 'cat/comic/'
firstStripUrl = url + '2007/02/08/pom/'
imageSearch = '//section[d:class("entry-content")]//img[d:class("aligncenter")]'
from datetime import datetime

url = "https://dresdencodak.com/"
firstStripUrl = url + "2005/06/08/the-tomorrow-man/"
imageSearch = '(//section[d:class("entry-content")]//img[d:class("size-full") and not (contains(@alt, "revious") or contains(@alt,"irst") or contains(@alt,"ext"))])[1]'
textSearch = '//section[d:class("entry-content")]//p[(4 < position()) and (position() < (last() - 1))]'
textOptional = True
prevSearch = '//a[img[contains(@src, "prev")]]'
latestSearch = '//a[d:class("tc-grid-bg-link")]'
starter = indirectStarter

# Blog and comic are mixed...
def shouldSkipUrl(self, url, data):
return not data.xpath(self.imageSearch)
# Haven't found a better way to distinguish whether or not a page is part
# of Hob than by the date prefix.
date_format = "%Y-%m-%d"
hob_start = datetime.strptime("2007-02-08", date_format)
hob_end = datetime.strptime("2008-10-22", date_format)

pagenumber_re = compile(
"(?:[0-9]+-)*[^0-9]+_([0-9]+)(?:a|b|-1|_001|-[0-9]+x[0-9]+)?\.jpg$"
)

def getPrevUrl(self, url, data):
# Fix skipping newest One-Off
if url == self.url + "2010/06/03/dark-science-01/":
newurl = self.url + "category/oneoffs/"
return self.fetchUrl(
newurl, self.getPage(newurl), self.latestSearch
)
return super(DresdenCodak, self).getPrevUrl(url, data)

def namer(self, image_url, page_url):
import os.path

filename = image_url.rsplit("/", 1)[-1]
# The archives are divided into three parts:
# Dark Science, Hob and One-Offs
if filename.startswith("ds"):
filename = filename[:2] + "_" + filename[2:]
elif filename == "84_new.jpg":
# Single anomalous page
filename = "ds_84.jpg"
elif filename == "cyborg_time.jpg":
filename = os.path.join("Dark Science", "84b.jpg")
elif "act_4" in filename:
filename = os.path.join("Dark Science", "80b.jpg")
elif "act_3" in filename:
filename = os.path.join("Dark Science", "38b.jpg")
elif "act_2" in filename:
filename = os.path.join("Dark Science", "18b.jpg")

if filename.startswith("ds_") or "-dark_science_" in filename:
# Dark Science
import re

pagenumber = re.match(self.pagenumber_re, filename).group(1)
filename = os.path.join(
"Dark Science", "{0:0>3}".format(pagenumber)
)
elif "/" not in filename:
# Hob
from datetime import datetime

date_prefix = page_url.rsplit("/", 5)[-5:-2]
date = datetime(*(int(i) for i in date_prefix))
if self.hob_start <= date <= self.hob_end:
filename = os.path.join("Hob", filename)
else:
# One-Offs
year_day_prefix = date.strftime("%Y-%m-%d")
filename = os.path.join(
"One-Offs", "{0}-{1}".format(year_day_prefix, filename)
)

return filename


class DrFun(_ParserScraper):
Expand Down
2 changes: 1 addition & 1 deletion dosagelib/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ def getFilename(name):
"""Get a filename from given name without dangerous or incompatible
characters."""
# first replace all illegal chars
name = re.sub(r"[^0-9a-zA-Z_\-\.]", "_", name)
name = re.sub(r"[^0-9a-zA-Z_ /\-\.\\]", "_", name)
# then remove double dots and underscores
while ".." in name:
name = name.replace('..', '.')
Expand Down