diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a37d209
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,8 @@
+/bin
+/classes
+/.externalToolBuilders
+*.jar
+/target/
+.classpath
+.project
+/.settings/
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..a154f42
--- /dev/null
+++ b/README.md
@@ -0,0 +1,2 @@
+# gateplugin-LanguageDetection
+Integrate optimaize/language-detector into GATE
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..b989491
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,62 @@
+
+
+ 4.0.0
+
+
+ uk.ac.gate
+ gate-plugin-base
+
+
+ 8.5.1
+
+
+
+
+
+ gate.language-detection
+ language-detection
+ 1.0-SNAPSHOT
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The Apache Software License, Version 2.0
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+
+
+
+
+
+
+
+
+
+ com.optimaize.languagedetector
+ language-detector
+ 0.6
+
+
+
diff --git a/src/main/java/gate/languagedetection/LanguageDetection.java b/src/main/java/gate/languagedetection/LanguageDetection.java
new file mode 100644
index 0000000..5bc6afb
--- /dev/null
+++ b/src/main/java/gate/languagedetection/LanguageDetection.java
@@ -0,0 +1,174 @@
+package gate.languagedetection;
+
+import java.io.IOException;
+import java.util.List;
+
+import com.optimaize.langdetect.DetectedLanguage;
+import com.optimaize.langdetect.LanguageDetector;
+import com.optimaize.langdetect.LanguageDetectorBuilder;
+import com.optimaize.langdetect.ngram.NgramExtractors;
+import com.optimaize.langdetect.profiles.LanguageProfile;
+import com.optimaize.langdetect.profiles.LanguageProfileReader;
+
+import gate.Annotation;
+import gate.AnnotationSet;
+import gate.FeatureMap;
+import gate.Resource;
+import gate.creole.AbstractLanguageAnalyser;
+import gate.creole.ExecutionException;
+import gate.creole.ResourceInstantiationException;
+import gate.creole.metadata.CreoleParameter;
+import gate.creole.metadata.CreoleResource;
+import gate.creole.metadata.Optional;
+import gate.creole.metadata.RunTime;
+
+/**
+ * This class is the implementation of the resource LanguageDetection.
+ */
+@CreoleResource(name = "LanguageDetection", comment = "Integrate optimaize/language-detector (https://github.com/optimaize/language-detector) as a Processing Resource")
+public class LanguageDetection extends AbstractLanguageAnalyser {
+
+ private static final long serialVersionUID = 4531104124991700665L;
+
+ private static final String DETECTEDLANGUAGE_SPLIT = ", ";
+ private static final String PROBABILITY_SPLIT = ":";
+
+ private List languageFilter;
+
+ private String featureName;
+ private String inputASName;
+ private String inputAnnotation;
+
+ private Double threshold;
+
+ private LanguageDetector detector;
+
+ @Override
+ public Resource init() throws ResourceInstantiationException {
+ try {
+ LanguageProfileReader profileReader = new LanguageProfileReader();
+ List languageProfiles;
+ if (languageFilter == null || languageFilter.isEmpty()) {
+ languageProfiles = profileReader.readAllBuiltIn();
+ } else {
+ languageProfiles = profileReader.read(languageFilter);
+ }
+ detector = LanguageDetectorBuilder.create(NgramExtractors.standard()).withProfiles(languageProfiles)
+ .build();
+ } catch (IllegalStateException | IOException e) {
+ throw new ResourceInstantiationException(e);
+ }
+ return this;
+ }
+
+ @Override
+ public void reInit() throws ResourceInstantiationException {
+ init();
+ }
+
+ @Override
+ public void execute() throws ExecutionException {
+ try {
+ if (isEmpty(inputASName) && isEmpty(inputAnnotation)) {
+ String text = document.getContent().toString();
+ FeatureMap featureMap = document.getFeatures();
+ detectLanguage(text, featureMap);
+ } else {
+ AnnotationSet inputAnnotationSet = document.getAnnotations(inputASName);
+ if (!isEmpty(inputAnnotation)) {
+ inputAnnotationSet = inputAnnotationSet.get(inputAnnotation);
+ }
+ for (Annotation annotation : inputAnnotationSet) {
+ String text = document.getContent()
+ .getContent(annotation.getStartNode().getOffset(), annotation.getEndNode().getOffset())
+ .toString();
+ FeatureMap featureMap = annotation.getFeatures();
+ detectLanguage(text, featureMap);
+ }
+ }
+ } catch (Exception e) {
+ throw new ExecutionException(e);
+ }
+ }
+
+ private void detectLanguage(String text, FeatureMap featureMap) {
+ List probabilities = detector.getProbabilities(text);
+ for (DetectedLanguage detectedLanguage : probabilities) {
+ if (threshold == null || detectedLanguage.getProbability() >= threshold) {
+ appendLanguageToFeatureMap(featureMap, detectedLanguage.getLocale().getLanguage(),
+ detectedLanguage.getProbability());
+ }
+ }
+ }
+
+ private void appendLanguageToFeatureMap(FeatureMap featureMap, String language, double probability) {
+ Object object = document.getFeatures().get(featureName);
+ if (object != null) {
+ featureMap.put(featureName, object.toString() + DETECTEDLANGUAGE_SPLIT + asString(language, probability));
+ } else {
+ featureMap.put(featureName, asString(language, probability));
+ }
+ }
+
+ private String asString(String language, double probability) {
+ return language + PROBABILITY_SPLIT + probability;
+ }
+
+ private boolean isEmpty(String string) {
+ return string == null || string.length() == 0;
+ }
+
+ public List getLanguageFilter() {
+ return languageFilter;
+ }
+
+ @Optional
+ @CreoleParameter(comment = "Only detect following languages")
+ public void setLanguageFilter(List languageFilter) {
+ this.languageFilter = languageFilter;
+ }
+
+ public String getFeatureName() {
+ return featureName;
+ }
+
+ @RunTime
+ @CreoleParameter(comment = "Name of the feature to store detected language, document or annotation", defaultValue = "lang")
+ public void setFeatureName(String featureName) {
+ this.featureName = featureName;
+ }
+
+ public String getInputASName() {
+ return inputASName;
+ }
+
+ @Optional
+ @RunTime
+ @CreoleParameter(comment = "analyse specific annotation instead of whole document")
+ public void setInputASName(String inputASName) {
+ this.inputASName = inputASName;
+ }
+
+ public String getInputAnnotation() {
+ return inputAnnotation;
+ }
+
+ @Optional
+ @RunTime
+ @CreoleParameter(comment = "analyse specific annotation instead of whole document")
+ public void setInputAnnotation(String inputAnnotation) {
+ this.inputAnnotation = inputAnnotation;
+ }
+
+ @Optional
+ @RunTime
+ @CreoleParameter(comment = "Only annotate languages with threshold")
+ public void setThreshold(Double threshold) {
+ this.threshold = threshold;
+ }
+
+ public Double getThreshold() {
+ return threshold;
+ }
+
+}
\ No newline at end of file
diff --git a/src/main/resources/creole.xml b/src/main/resources/creole.xml
new file mode 100644
index 0000000..ebeeccf
--- /dev/null
+++ b/src/main/resources/creole.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
diff --git a/src/test/java/gate/languagedetection/GappLoadingTest.java b/src/test/java/gate/languagedetection/GappLoadingTest.java
new file mode 100644
index 0000000..f6065ca
--- /dev/null
+++ b/src/test/java/gate/languagedetection/GappLoadingTest.java
@@ -0,0 +1,14 @@
+package gate.languagedetection;
+
+import gate.test.GappLoadingTestCase;
+
+/**
+ * Using this class automatically tests all pipelines for proper loading.
+ *
+ * This class automatically tries to load all pipelines (any file with an
+ * extension ".gapp" or ".xgapp") which reside
+ * in the main/resources/resources directory tree.
+ */
+public class GappLoadingTest extends GappLoadingTestCase {
+
+}
diff --git a/src/test/java/gate/languagedetection/TestingClass.java b/src/test/java/gate/languagedetection/TestingClass.java
new file mode 100644
index 0000000..95bbcdc
--- /dev/null
+++ b/src/test/java/gate/languagedetection/TestingClass.java
@@ -0,0 +1,22 @@
+package gate.languagedetection;
+
+import gate.test.GATEPluginTests;
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+
+/**
+ * Using this class automatically prepares GATE and the plugin for testing.
+ *
+ * This class automatically initializes GATE and loads the plugin.
+ * Any method in this class with the "@Test" annotation will then get
+ * run with the plugin already properly loaded.
+ *
+ */
+public class TestingClass extends GATEPluginTests {
+
+ @Test
+ public void testSomething() {
+ // testing code goes here
+ }
+}
diff --git a/src/test/resources/creole.properties b/src/test/resources/creole.properties
new file mode 100644
index 0000000..951752f
--- /dev/null
+++ b/src/test/resources/creole.properties
@@ -0,0 +1,3 @@
+groupId=${project.groupId}
+artifactId=${project.artifactId}
+version=${project.version}