|
| 1 | +package com.rarchives.ripme.ripper.rippers; |
| 2 | + |
| 3 | +import java.io.IOException; |
| 4 | +import java.net.MalformedURLException; |
| 5 | +import java.net.URL; |
| 6 | +import java.util.regex.Matcher; |
| 7 | +import java.util.regex.Pattern; |
| 8 | + |
| 9 | +import org.apache.log4j.Logger; |
| 10 | +import org.jsoup.Connection.Response; |
| 11 | +import org.jsoup.Jsoup; |
| 12 | +import org.jsoup.nodes.Document; |
| 13 | +import org.jsoup.nodes.Element; |
| 14 | + |
| 15 | +import com.rarchives.ripme.ripper.AbstractRipper; |
| 16 | + |
| 17 | +public class EightmusesRipper extends AbstractRipper { |
| 18 | + |
| 19 | + private static final String DOMAIN = "8muses.com", |
| 20 | + HOST = "8muses"; |
| 21 | + private static final Logger logger = Logger.getLogger(EightmusesRipper.class); |
| 22 | + |
| 23 | + public EightmusesRipper(URL url) throws IOException { |
| 24 | + super(url); |
| 25 | + } |
| 26 | + |
| 27 | + @Override |
| 28 | + public boolean canRip(URL url) { |
| 29 | + return url.getHost().endsWith(DOMAIN); |
| 30 | + } |
| 31 | + |
| 32 | + @Override |
| 33 | + public URL sanitizeURL(URL url) throws MalformedURLException { |
| 34 | + return url; |
| 35 | + } |
| 36 | + |
| 37 | + @Override |
| 38 | + public void rip() throws IOException { |
| 39 | + logger.info(" Retrieving " + this.url); |
| 40 | + Response resp = Jsoup.connect(this.url.toExternalForm()) |
| 41 | + .userAgent(USER_AGENT) |
| 42 | + .execute(); |
| 43 | + Document doc = resp.parse(); |
| 44 | + int index = 0; |
| 45 | + for (Element thumb : doc.select("img")) { |
| 46 | + if (!thumb.hasAttr("data-cfsrc")) { |
| 47 | + continue; |
| 48 | + } |
| 49 | + String image = thumb.attr("data-cfsrc"); |
| 50 | + if (image.contains("-cu_")) { |
| 51 | + image = image.replaceAll("-cu_[^.]+", "-me"); |
| 52 | + } |
| 53 | + if (image.startsWith("//")) { |
| 54 | + image = "http:" + image; |
| 55 | + } |
| 56 | + //image = image.replace(" ", "%20"); |
| 57 | + URL imageURL = new URL(image); |
| 58 | + index += 1; |
| 59 | + addURLToDownload(imageURL, String.format("%03d_", index)); |
| 60 | + } |
| 61 | + waitForThreads(); |
| 62 | + } |
| 63 | + |
| 64 | + @Override |
| 65 | + public String getHost() { |
| 66 | + return HOST; |
| 67 | + } |
| 68 | + |
| 69 | + @Override |
| 70 | + public String getGID(URL url) throws MalformedURLException { |
| 71 | + Pattern p = Pattern.compile("^https?://(www\\.)?8muses\\.com/index/category/([a-zA-Z0-9\\-_]+).*$"); |
| 72 | + Matcher m = p.matcher(url.toExternalForm()); |
| 73 | + if (!m.matches()) { |
| 74 | + throw new MalformedURLException("Expected URL format: http://www.8muses.com/index/category/albumname, got: " + url); |
| 75 | + } |
| 76 | + return m.group(m.groupCount()); |
| 77 | + } |
| 78 | + |
| 79 | +} |
0 commit comments