diff --git a/wordSegment/org/tinos/engine/nlp/imp/NLPControllerImp.java b/wordSegment/org/tinos/engine/nlp/imp/NLPControllerImp.java index 9274642..9eaa876 100644 --- a/wordSegment/org/tinos/engine/nlp/imp/NLPControllerImp.java +++ b/wordSegment/org/tinos/engine/nlp/imp/NLPControllerImp.java @@ -13,7 +13,7 @@ public int doSlangPartAndPOSCheckForTwoChar(int countInputStringLength, List outputList , Map wordsForest, StringBuilder stringBuilder, StringBuilder[] prefixWord , POSController posUtils, int charPosition, String textInputString){ String inputString= stringBuilder.toString(); - if (wordsForest.containsKey(inputString)){ + if (StableMaps.CiThree.containsKey(inputString)){ prefixWord[StableData.INT_ZERO].delete(StableData.INT_ZERO, prefixWord[StableData.INT_ZERO].length()); prefixWord[StableData.INT_ZERO].append(inputString); outputList.add(inputString); @@ -73,7 +73,7 @@ public int doPOSAndEMMCheckOfThree(int countInputLength, List outputList + inputString.charAt(StableData.INT_TWO); strings[StableData.INT_THREE]= String.valueOf(inputString.charAt(StableData.INT_TWO)); if (null== prefixWord[StableData.INT_ZERO]){ - if (wordsForest.containsKey(inputString)){ + if (StableMaps.CiThree.containsKey(inputString)){ prefixWord[StableData.INT_ZERO].delete(StableData.INT_ZERO, prefixWord[StableData.INT_ZERO].length()); prefixWord[StableData.INT_ZERO].append(inputString); outputList.add(inputString); @@ -135,7 +135,7 @@ public int doPOSAndEMMCheckOfThree(int countInputLength, List outputList countInputLength= posUtils.chuLiFuCiOfThree(wordsForest, outputList, countInputLength, strings, prefixWord); return countInputLength; } - if(wordsForest.containsKey(strings[StableData.INT_ONE])) { + if(StableMaps.CiTwo.containsKey(strings[StableData.INT_ONE])) { StringBuilder stringsBuilder= new StringBuilder(); countInputLength= doSlangPartAndPOSCheckForTwoChar(--countInputLength, outputList, stringsBuilder.append(strings[StableData.INT_ONE]) , wordsForest, prefixWord, posUtils, charPosition, textInputString); @@ -157,7 +157,7 @@ public int doPOSAndEMMCheckOfThree(int countInputLength, List outputList public int doSlangCheck(int countInputStringLength, List output, StringBuilder stringBuilder, Map wordsForest, StringBuilder[] prefixWord, POSController posUtils, int charPosition, String textInputString){ String inputString = stringBuilder.toString(); - if (wordsForest.containsKey(inputString)){ + if (StableMaps.CiFour.containsKey(inputString)){ output.add(inputString); prefixWord[StableData.INT_ZERO].delete(StableData.INT_ZERO, prefixWord[StableData.INT_ZERO].length()); prefixWord[StableData.INT_ZERO].append(inputString); @@ -172,8 +172,8 @@ public int doSlangCheck(int countInputStringLength, List output, StringB , stringBuilder.delete(StableData.INT_THREE, StableData.INT_FOUR), prefixWord, posUtils, charPosition, textInputString); return countInputStringLength; } - if (wordsForest.containsKey(preRegister)){ - if (wordsForest.containsKey(postRegister)){ + if (StableMaps.CiTwo.containsKey(preRegister)){ + if (StableMaps.CiTwo.containsKey(postRegister)){ String string= StableData.EMPTY_STRING+ inputString.charAt(StableData.INT_ZERO); if(StableMaps.xingWeiCi.containsKey(prefixWord[StableData.INT_ZERO].toString()) &&StableMaps.shiTaiCi.containsKey(string)) { @@ -199,18 +199,18 @@ public int doSlangCheck(int countInputStringLength, List output, StringB return countInputStringLength-StableData.INT_TWO; } } - if(wordsForest.containsKey(preRegister+ inputString.charAt(StableData.INT_TWO))&& !wordsForest.containsKey(postRegister)) { + if(StableMaps.CiThree.containsKey(preRegister+ inputString.charAt(StableData.INT_TWO))&& !StableMaps.CiTwo.containsKey(postRegister)) { prefixWord[StableData.INT_ZERO].delete(StableData.INT_ZERO, prefixWord[StableData.INT_ZERO].length()); prefixWord[StableData.INT_ZERO].append(preRegister+ inputString.charAt(StableData.INT_TWO)); output.add(preRegister+ inputString.charAt(StableData.INT_TWO)); return countInputStringLength- StableData.INT_ONE ; } - if(wordsForest.containsKey(preRegister)&& wordsForest.containsKey(inRegister)) { + if(StableMaps.CiTwo.containsKey(preRegister)&& StableMaps.CiTwo.containsKey(inRegister)) { countInputStringLength= doPOSAndEMMCheckOfThree(--countInputStringLength, output, wordsForest , stringBuilder.delete(StableData.INT_THREE, StableData.INT_FOUR), prefixWord, posUtils, charPosition, textInputString); return countInputStringLength; } - if(wordsForest.containsKey(preRegister)) { + if(StableMaps.CiTwo.containsKey(preRegister)) { countInputStringLength= doSlangPartAndPOSCheckForTwoChar(countInputStringLength- StableData.INT_TWO, output , stringBuilder.delete(StableData.INT_TWO, StableData.INT_FOUR), wordsForest, prefixWord, posUtils, charPosition, textInputString); return countInputStringLength; diff --git a/wordSegment/org/tinos/ortho/fhmm/imp/FMHMMListOneTimeImp.java b/wordSegment/org/tinos/ortho/fhmm/imp/FMHMMListOneTimeImp.java index 0bf111f..08b7866 100644 --- a/wordSegment/org/tinos/ortho/fhmm/imp/FMHMMListOneTimeImp.java +++ b/wordSegment/org/tinos/ortho/fhmm/imp/FMHMMListOneTimeImp.java @@ -124,6 +124,22 @@ public void indexMixed() throws IOException { && cInputString.split(StableData.NLP_SYMBO_SLASH).length > StableData.INT_ONE )) { continue Here; } + if(StableData.INT_ONE== cInputString.split(StableData.NLP_SYMBO_SLASH)[StableData.INT_ZERO].length()) { + StableMaps.CiOne.put(cInputString.split(StableData.NLP_SYMBO_SLASH)[StableData.INT_ZERO],cInputString + .split(StableData.NLP_SYMBO_SLASH)[StableData.INT_ONE]); + } + if(StableData.INT_TWO== cInputString.split(StableData.NLP_SYMBO_SLASH)[StableData.INT_ZERO].length()) { + StableMaps.CiTwo.put(cInputString.split(StableData.NLP_SYMBO_SLASH)[StableData.INT_ZERO],cInputString + .split(StableData.NLP_SYMBO_SLASH)[StableData.INT_ONE]); + } + if(StableData.INT_THREE== cInputString.split(StableData.NLP_SYMBO_SLASH)[StableData.INT_ZERO].length()) { + StableMaps.CiThree.put(cInputString.split(StableData.NLP_SYMBO_SLASH)[StableData.INT_ZERO],cInputString + .split(StableData.NLP_SYMBO_SLASH)[StableData.INT_ONE]); + } + if(StableData.INT_FOUR== cInputString.split(StableData.NLP_SYMBO_SLASH)[StableData.INT_ZERO].length()) { + StableMaps.CiFour.put(cInputString.split(StableData.NLP_SYMBO_SLASH)[StableData.INT_ZERO],cInputString + .split(StableData.NLP_SYMBO_SLASH)[StableData.INT_ONE]); + } if(!StableMaps.fuCi.containsKey(cInputString.split(StableData.NLP_SYMBO_SLASH)[StableData.INT_ZERO]) && cInputString.split(StableData.NLP_SYMBO_SLASH)[StableData.INT_ONE].contains(StableData.NLP_CI_FU)) { StableMaps.fuCi.put(cInputString.split(StableData.NLP_SYMBO_SLASH)[StableData.INT_ZERO], cInputString @@ -530,6 +546,10 @@ public void indexMixed() throws IOException { StableMaps.xingRongCi.remove(""); StableMaps.xingWeiCi.remove(""); StableMaps.zhuCi.remove(""); + System.out.println(StableMaps.CiOne.size()); + System.out.println(StableMaps.CiTwo.size()); + System.out.println(StableMaps.CiThree.size()); + System.out.println(StableMaps.CiFour.size()); } public void index() throws IOException {