|
| 1 | +package com.darkprograms.speech.synthesiser; |
| 2 | + |
| 3 | +import com.darkprograms.speech.translator.GoogleTranslate; |
| 4 | + |
| 5 | +import java.io.IOException; |
| 6 | +import java.io.InputStream; |
| 7 | +import java.io.SequenceInputStream; |
| 8 | +import java.util.ArrayList; |
| 9 | +import java.util.Collections; |
| 10 | +import java.util.LinkedHashSet; |
| 11 | +import java.util.List; |
| 12 | +import java.util.Set; |
| 13 | +import java.util.concurrent.Callable; |
| 14 | +import java.util.concurrent.ExecutionException; |
| 15 | +import java.util.concurrent.ExecutorService; |
| 16 | +import java.util.concurrent.Executors; |
| 17 | +import java.util.concurrent.Future; |
| 18 | + |
| 19 | +/******************************************************************************* |
| 20 | + * Synthesiser class that connects to Google's unoffical API to retrieve data |
| 21 | + * |
| 22 | + * @author Luke Kuza, Aaron Gokaslan (Skylion) |
| 23 | + *******************************************************************************/ |
| 24 | +public abstract class BaseSynthsiser { |
| 25 | + |
| 26 | + /** |
| 27 | + * Gets an input stream to MP3 data for the returned information from a request |
| 28 | + * |
| 29 | + * @param synthText Text you want to be synthesized into MP3 data |
| 30 | + * @return Returns an input stream of the MP3 data that is returned from Google |
| 31 | + * @throws IOException Throws exception if it can not complete the request |
| 32 | + */ |
| 33 | + public abstract InputStream getMP3Data(String synthText) throws IOException; |
| 34 | + |
| 35 | + /** |
| 36 | + * Gets an InputStream to MP3Data for the returned information from a request |
| 37 | + * @param synthText List of Strings you want to be synthesized into MP3 data |
| 38 | + * @return Returns an input stream of all the MP3 data that is returned from Google |
| 39 | + * @throws IOException Throws exception if it cannot complete the request |
| 40 | + */ |
| 41 | + public InputStream getMP3Data(List<String> synthText) throws IOException { |
| 42 | + //Uses an executor service pool for concurrency. Limit to 1000 threads max. |
| 43 | + ExecutorService pool = Executors.newFixedThreadPool(1000); |
| 44 | + //Stores the Future (Data that will be returned in the future) |
| 45 | + Set<Future<InputStream>> set = new LinkedHashSet<Future<InputStream>>(synthText.size()); |
| 46 | + for(String part: synthText){ //Iterates through the list |
| 47 | + Callable<InputStream> callable = new MP3DataFetcher(part);//Creates Callable |
| 48 | + Future<InputStream> future = pool.submit(callable);//Begins to run Callable |
| 49 | + set.add(future);//Adds the response that will be returned to a set. |
| 50 | + } |
| 51 | + List<InputStream> inputStreams = new ArrayList<InputStream>(set.size()); |
| 52 | + for(Future<InputStream> future: set){ |
| 53 | + try { |
| 54 | + inputStreams.add(future.get());//Gets the returned data from the future. |
| 55 | + } catch (ExecutionException e) {//Thrown if the MP3DataFetcher encountered an error. |
| 56 | + Throwable ex = e.getCause(); |
| 57 | + if(ex instanceof IOException){ |
| 58 | + throw (IOException)ex;//Downcasts and rethrows it. |
| 59 | + } |
| 60 | + } catch (InterruptedException e){//Will probably never be called, but just in case... |
| 61 | + Thread.currentThread().interrupt();//Interrupts the thread since something went wrong. |
| 62 | + } |
| 63 | + } |
| 64 | + return new SequenceInputStream(Collections.enumeration(inputStreams));//Sequences the stream. |
| 65 | + } |
| 66 | + |
| 67 | + /** |
| 68 | + * Separates a string into smaller parts so that Google will not reject the request. |
| 69 | + * @param input The string you want to separate |
| 70 | + * @return A List<String> of the String fragments from your input.. |
| 71 | + */ |
| 72 | + protected List<String> parseString(String input){ |
| 73 | + return parseString (input, new ArrayList<String>()); |
| 74 | + } |
| 75 | + |
| 76 | + /** |
| 77 | + * Separates a string into smaller parts so that Google will not reject the request. |
| 78 | + * @param input The string you want to break up into smaller parts |
| 79 | + * @param fragments List<String> that you want to add stuff too. |
| 80 | + * If you don't have a List<String> already constructed "new ArrayList<String>()" works well. |
| 81 | + * @return A list of the fragments of the original String |
| 82 | + */ |
| 83 | + private List<String> parseString(String input, List<String> fragments){ |
| 84 | + if(input.length()<=100){//Base Case |
| 85 | + fragments.add(input); |
| 86 | + return fragments; |
| 87 | + } |
| 88 | + else{ |
| 89 | + int lastWord = findLastWord(input);//Checks if a space exists |
| 90 | + if(lastWord<=0){ |
| 91 | + fragments.add(input.substring(0,100));//In case you sent gibberish to Google. |
| 92 | + return parseString(input.substring(100), fragments); |
| 93 | + }else{ |
| 94 | + fragments.add(input.substring(0,lastWord)); |
| 95 | + //Otherwise, adds the last word to the list for recursion. |
| 96 | + return parseString(input.substring(lastWord), fragments); |
| 97 | + } |
| 98 | + } |
| 99 | + } |
| 100 | + |
| 101 | + /** |
| 102 | + * Finds the last word in your String (before the index of 99) by searching for spaces and ending punctuation. |
| 103 | + * Will preferably parse on punctuation to alleviate mid-sentence pausing |
| 104 | + * @param input The String you want to search through. |
| 105 | + * @return The index of where the last word of the string ends before the index of 99. |
| 106 | + */ |
| 107 | + private int findLastWord(String input){ |
| 108 | + if(input.length()<100) |
| 109 | + return input.length(); |
| 110 | + int space = -1; |
| 111 | + for(int i = 99; i>0; i--){ |
| 112 | + char tmp = input.charAt(i); |
| 113 | + if(isEndingPunctuation(tmp)){ |
| 114 | + return i+1; |
| 115 | + } |
| 116 | + if(space==-1 && tmp == ' '){ |
| 117 | + space = i; |
| 118 | + } |
| 119 | + } |
| 120 | + if(space>0){ |
| 121 | + return space; |
| 122 | + } |
| 123 | + return -1; |
| 124 | + } |
| 125 | + |
| 126 | + /** |
| 127 | + * Checks if char is an ending character |
| 128 | + * Ending punctuation for all languages according to Wikipedia (Except for Sanskrit non-unicode) |
| 129 | + * @param input The char you want check |
| 130 | + * @return True if it is, false if not. |
| 131 | + */ |
| 132 | + private boolean isEndingPunctuation(char input){ |
| 133 | + return input == '.' || input == '!' || input == '?' || input == ';' || input == ':' || input == '|'; |
| 134 | + } |
| 135 | + |
| 136 | + /** |
| 137 | + * Automatically determines the language of the original text |
| 138 | + * @param text represents the text you want to check the language of |
| 139 | + * @return the languageCode in ISO-639 |
| 140 | + * @throws IOException if it cannot complete the request |
| 141 | + */ |
| 142 | + public String detectLanguage(String text) throws IOException{ |
| 143 | + return GoogleTranslate.detectLanguage(text); |
| 144 | + } |
| 145 | + |
| 146 | + /** |
| 147 | + * This class is a callable. |
| 148 | + * A callable is like a runnable except that it can return data and throw exceptions. |
| 149 | + * Useful when using futures. Dramatically improves the speed of execution. |
| 150 | + * @author Aaron Gokaslan (Skylion) |
| 151 | + */ |
| 152 | + private class MP3DataFetcher implements Callable<InputStream>{ |
| 153 | + private String synthText; |
| 154 | + |
| 155 | + public MP3DataFetcher(String synthText){ |
| 156 | + this.synthText = synthText; |
| 157 | + } |
| 158 | + |
| 159 | + public InputStream call() throws IOException{ |
| 160 | + return getMP3Data(synthText); |
| 161 | + } |
| 162 | + } |
| 163 | +} |
0 commit comments