|
| 1 | +package com.rarchives.ripme.ripper.rippers; |
| 2 | + |
| 3 | +import java.io.IOException; |
| 4 | +import java.net.MalformedURLException; |
| 5 | +import java.net.URL; |
| 6 | +import java.util.ArrayList; |
| 7 | +import java.util.List; |
| 8 | +import java.util.regex.Matcher; |
| 9 | +import java.util.regex.Pattern; |
| 10 | + |
| 11 | +import org.json.JSONArray; |
| 12 | +import org.json.JSONObject; |
| 13 | +import org.jsoup.nodes.Document; |
| 14 | +import org.jsoup.nodes.Element; |
| 15 | +import org.jsoup.select.Elements; |
| 16 | + |
| 17 | +import com.rarchives.ripme.ripper.AbstractHTMLRipper; |
| 18 | +import com.rarchives.ripme.utils.Http; |
| 19 | + |
| 20 | +class TapasticEpisode { |
| 21 | + int index; |
| 22 | + int id; |
| 23 | + String title; |
| 24 | + String filename; |
| 25 | + public TapasticEpisode(int index, int id, String title) { |
| 26 | + this.index=index; |
| 27 | + this.id=id; |
| 28 | + this.title=title; |
| 29 | + this.filename=title // Windows filenames may not contain any of these... |
| 30 | + .replace("\\", "") |
| 31 | + .replace("/", "") |
| 32 | + .replace(":", "") |
| 33 | + .replace("*", "") |
| 34 | + .replace("?", "") |
| 35 | + .replace("\"", "") |
| 36 | + .replace("<", "") |
| 37 | + .replace(">", "") |
| 38 | + .replace("|", ""); |
| 39 | + } |
| 40 | +} |
| 41 | + |
| 42 | +public class TapasticRipper extends AbstractHTMLRipper { |
| 43 | + |
| 44 | + private List<TapasticEpisode> episodes=new ArrayList<TapasticEpisode>(); |
| 45 | + |
| 46 | + public TapasticRipper(URL url) throws IOException { |
| 47 | + super(url); |
| 48 | + } |
| 49 | + |
| 50 | + @Override |
| 51 | + public String getDomain() { |
| 52 | + return "tapastic.com"; |
| 53 | + } |
| 54 | + |
| 55 | + @Override |
| 56 | + public String getHost() { |
| 57 | + return "tapastic"; |
| 58 | + } |
| 59 | + |
| 60 | + @Override |
| 61 | + public Document getFirstPage() throws IOException { |
| 62 | + return Http.url(url).get(); |
| 63 | + } |
| 64 | + |
| 65 | + @Override |
| 66 | + public List<String> getURLsFromPage(Document page) { |
| 67 | + List<String> urls = new ArrayList<String>(); |
| 68 | + Elements scripts=page.select("script"); |
| 69 | + for(Element script: scripts) { |
| 70 | + String text=script.data(); |
| 71 | + if(text.contains("var _data")) { |
| 72 | + String[] lines=text.split("\n"); |
| 73 | + for(String line:lines) { |
| 74 | + String trimmed=line.trim(); |
| 75 | + if(trimmed.startsWith("episodeList : ")) { |
| 76 | + JSONArray json_episodes=new JSONArray(trimmed.substring("episodeList : ".length())); |
| 77 | + for(int i=0;i<json_episodes.length();i++) { |
| 78 | + JSONObject obj=json_episodes.getJSONObject(i); |
| 79 | + TapasticEpisode episode=new TapasticEpisode(i, obj.getInt("id"), obj.getString("title")); |
| 80 | + episodes.add(episode); |
| 81 | + urls.add("http://tapastic.com/episode/"+episode.id); |
| 82 | + } |
| 83 | + } |
| 84 | + } |
| 85 | + break; |
| 86 | + } |
| 87 | + } |
| 88 | + return urls; |
| 89 | + } |
| 90 | + |
| 91 | + @Override |
| 92 | + public void downloadURL(URL url, int index) { |
| 93 | + try { |
| 94 | + Document doc = Http.url(url).get(); |
| 95 | + Elements images=doc.select("article.ep-contents img"); |
| 96 | + for(int i=0;i<images.size();i++) { |
| 97 | + String link=images.get(i).attr("src"); |
| 98 | + String postfix=String.format(" %d-%d ", i+1,images.size()); |
| 99 | + TapasticEpisode episode=episodes.get(index-1); |
| 100 | + addURLToDownload(new URL(link), getPrefix(index)+episode.filename+postfix); |
| 101 | + } |
| 102 | + } catch (IOException e) { |
| 103 | + logger.error("[!] Exception while loading/parsing " + this.url,e); |
| 104 | + } |
| 105 | + |
| 106 | + } |
| 107 | + |
| 108 | + @Override |
| 109 | + public String getGID(URL url) throws MalformedURLException { |
| 110 | + Pattern p = Pattern.compile("^http://tapastic.com/series/(.*)$"); |
| 111 | + Matcher m = p.matcher(url.toExternalForm()); |
| 112 | + if (m.matches()) { |
| 113 | + return m.group(1); |
| 114 | + } |
| 115 | + throw new MalformedURLException("Expected tapastic.com URL format: " |
| 116 | + + "tapastic.com/series/name - got " + url + " instead"); |
| 117 | + } |
| 118 | +} |
0 commit comments