@@ -50,14 +50,18 @@ public abstract class LanguageDetector {
5050
5151 private static final ServiceLoader DEFAULT_SERVICE_LOADER = new ServiceLoader ();
5252
53+ //if a user calls detect on a huge string, break it into this size
54+ //and add sequentially until hasEnoughText() is true
55+ private static final int BUFFER_LENGTH = 4096 ;
56+
5357 // True if text is expected to be a mix of languages, and thus higher-resolution
5458 // detection must be done to avoid under-sampling the text.
5559 protected boolean mixedLanguages = false ;
5660
5761 // True if the text is expected to be 'short' (typically less than 100 chars), and
5862 // thus a different algorithm and/or set of profiles should be used.
5963 protected boolean shortText = false ;
60-
64+
6165 public static LanguageDetector getDefaultLanguageDetector () {
6266 List <LanguageDetector > detectors = getLanguageDetectors ();
6367 if (detectors .isEmpty ()) {
@@ -183,8 +187,19 @@ public LanguageDetector setShortText(boolean shortText) {
183187 * @param text Characters to add to current statistics.
184188 */
185189 public void addText (CharSequence text ) {
186- char [] chars = text .toString ().toCharArray ();
187- addText (chars , 0 , chars .length );
190+ int len = text .length ();
191+ if (len < BUFFER_LENGTH ) {
192+ char [] chars = text .toString ().toCharArray ();
193+ addText (chars , 0 , chars .length );
194+ return ;
195+ }
196+ int start = 0 ;
197+ while (! hasEnoughText () && start < len ) {
198+ int end = Math .min (start + BUFFER_LENGTH , len );
199+ char [] chars = text .subSequence (start , end ).toString ().toCharArray ();
200+ addText (chars , 0 , chars .length );
201+ start += BUFFER_LENGTH ;
202+ }
188203 }
189204
190205
0 commit comments