Change the project.

lulupango · May 23, 2015 · b70e580 · b70e580
1 parent e0062dd
commit b70e580
Show file tree

Hide file tree

Showing 22 changed files with 509 additions and 563 deletions.
diff --git a/README.md b/README.md
@@ -13,35 +13,33 @@ In the project "SpellCorrectorBuild" root directory, use Ant command to build th
 
 + ## How to use
 2. Prepare the dictionary file "words.txt", original spell data file "final.out" and parameter file "parameter", and put "SpellCorrectorBuild.jar" in the same directory.
-3. Create a "/tmp/" directory to save the middle file: "train_data.txt", "corpus_data.txt", "error_data.txt", "count_data.txt".
-4. Use command "java -jar SpellCorrectorBuild.jar" to run the project.
-5. You can get a "test_result.txt" file to save the test infomation, and "channle_data.txt" to save the noisy channel model parameter.
+3. Use command "java -jar SpellCorrectorBuild.jar" to run the project.
+4. You can get a "test_result.txt" file to save the test infomation, and "channle_data.txt" to save the noisy channel model parameter.
 
 + ## File descriptions
 - ### "parameter"
 A json format file:
+
+```
 {
-    "tmp_dir": "tmp/",
-	"channel_file": "channel_data.txt",
-	"input_file": "final.out",
-	"words_file": "words.txt",
-	"test_file": "tmp/train_data.txt",
-	"equal_prob": 1.0,
-	"smooth_prob": 0.00000005,
+	"model_file": "channel_data.txt",
+	"train_file": "final.out",
+	"dic_file": "words.txt",
+	"equal_prob": 0.9,
+	"smooth_prob": -1,
 	"most_dis": 2,
 	"context_num": 2,
-	"top_num": 3
+	"top_num": 3,
+	"train": "yes"
 }
+```
 
 - ### "channle_data.txt"
 The file is like the format:
 (word_slice \t key_slice \t log_probability)
 
 + ## Tips
-+ If you don't change the original spell data, you can reuse the middle file in tmp directory so that the train process can be fast.
 + The noisy channel model parameters file "channel_data.txt" can be resued all the time.
 
 + ## Reference
-The main method is based on Noisy Channel Model and an improved method from Microsoft Research
-http://ucrel.lancs.ac.uk/acl/P/P00/P00-1037.pdf
-
++ The main method is based on Noisy Channel Model and an improved method from Microsoft Research http://ucrel.lancs.ac.uk/acl/P/P00/P00-1037.pdf
diff --git a/build.xml b/build.xml
@@ -5,7 +5,7 @@
     <property name="classes.dir" value="bin" />
     <property name="output.dir" value="out" />
     <property name="jarname" value="SpellCorrector.jar" />
-    <property name="mainclass" value="cootek.spell.main.Run" />
+    <property name="mainclass" value="main.Run" />
     <!-- 第三方jar包的路径 -->
     <path id="lib-classpath">
         <fileset dir="${lib.dir}">

diff --git a/parameter b/parameter
@@ -0,0 +1,11 @@
+{
+	"model_file": "channel_data.txt",
+	"train_file": "final.out",
+	"dic_file": "words.txt",
+	"equal_prob": 0.9,
+	"smooth_prob": -1,
+	"most_dis": 2,
+	"context_num": 2,
+	"top_num": 3,
+	"train": "yes"
+}
diff --git a/src/cootek/spell/main/PredictModel.java b/src/cootek/spell/main/PredictModel.java
diff --git a/src/cootek/spell/main/TrainModel.java b/src/cootek/spell/main/TrainModel.java
diff --git a/src/cootek/spell/model/CountModel.java b/src/cootek/spell/model/CountModel.java
diff --git a/src/cootek/spell/model/ErrorModel.java b/src/cootek/spell/model/ErrorModel.java