oracle
diff --git a/‎opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/AbstractAnalyzer.java
Lines changed: 167 additions & 0 deletions b/‎opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/AbstractAnalyzer.java
Lines changed: 167 additions & 0 deletions
diff --git a/‎opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/AnalyzerFactory.java
Lines changed: 159 additions & 0 deletions b/‎opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/AnalyzerFactory.java
Lines changed: 159 additions & 0 deletions
@@ -0,0 +1,167 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * See LICENSE.txt included in this distribution for the specific
+ * language governing permissions and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at LICENSE.txt.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2019 Oracle and/or its affiliates. All rights reserved.
+ */
+package org.opengrok.indexer.analysis;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.io.Writer;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.Document;
+import org.opengrok.indexer.configuration.Project;
+
+public abstract class AbstractAnalyzer extends Analyzer {
+    public static final Reader DUMMY_READER = new StringReader("");
+    protected AnalyzerFactory factory;
+    // you analyzer HAS to override this to get proper symbols in results
+    protected JFlexTokenizer symbolTokenizer;
+    protected Project project;
+    protected Ctags ctags;
+    protected boolean scopesEnabled;
+    protected boolean foldingEnabled;
+
+    public AbstractAnalyzer(ReuseStrategy reuseStrategy) {
+        super(reuseStrategy);
+    }
+
+    public abstract long getVersionNo();
+
+    /**
+     * Subclasses should override to produce a value relevant for the evolution
+     * of their analysis in each release.
+     *
+     * @return 0
+     */
+    protected int getSpecializedVersionNo() {
+        return 0; // FileAnalyzer is not specialized.
+    }
+
+    public void setCtags(Ctags ctags) {
+        this.ctags = ctags;
+    }
+
+    public void setProject(Project project) {
+        this.project = project;
+    }
+
+    public void setScopesEnabled(boolean scopesEnabled) {
+        this.scopesEnabled = supportsScopes() && scopesEnabled;
+    }
+
+    public void setFoldingEnabled(boolean foldingEnabled) {
+        this.foldingEnabled = supportsScopes() && foldingEnabled;
+    }
+
+    protected abstract boolean supportsScopes();
+
+    /**
+     * Get the factory which created this analyzer.
+     *
+     * @return the {@code FileAnalyzerFactory} which created this analyzer
+     */
+    public final AnalyzerFactory getFactory() {
+        return factory;
+    }
+
+    public AbstractAnalyzer.Genre getGenre() {
+        return factory.getGenre();
+    }
+
+    public abstract String getFileTypeName();
+
+    public abstract void analyze(Document doc, StreamSource src, Writer xrefOut)
+            throws IOException, InterruptedException;
+
+    public abstract Xrefer writeXref(WriteXrefArgs args) throws IOException;
+
+    @Override
+    protected abstract TokenStreamComponents createComponents(String fieldName);
+
+    protected abstract void addNumLines(Document doc, int value);
+
+    protected abstract void addLOC(Document doc, int value);
+
+    @Override
+    protected abstract TokenStream normalize(String fieldName, TokenStream in);
+
+    /**
+     * What kind of file is this?
+     */
+    public enum Genre {
+        /**
+         * xrefed - line numbered context
+         */
+        PLAIN("p"),
+        /**
+         * xrefed - summarizer context
+         */
+        XREFABLE("x"),
+        /**
+         * not xrefed - no context - used by diff/list
+         */
+        IMAGE("i"),
+        /**
+         * not xrefed - no context
+         */
+        DATA("d"),
+        /**
+         * not xrefed - summarizer context from original file
+         */
+        HTML("h");
+        private final String typeName;
+
+        Genre(String typename) {
+            this.typeName = typename;
+        }
+
+        /**
+         * Get the type name value used to tag lucene documents.
+         *
+         * @return a none-null string.
+         */
+        public String typeName() {
+            return typeName;
+        }
+
+        /**
+         * Get the Genre for the given type name.
+         *
+         * @param typeName name to check
+         * @return {@code null} if it doesn't match any genre, the genre
+         * otherwise.
+         * @see #typeName()
+         */
+        public static Genre get(String typeName) {
+            if (typeName == null) {
+                return null;
+            }
+            for (Genre g : values()) {
+                if (g.typeName.equals(typeName)) {
+                    return g;
+                }
+            }
+            return null;
+        }
+    }
+}
@@ -0,0 +1,159 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * See LICENSE.txt included in this distribution for the specific
+ * language governing permissions and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at LICENSE.txt.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2019 Oracle and/or its affiliates. All rights reserved.
+ */
+package org.opengrok.indexer.analysis;
+
+import java.util.Collections;
+import java.util.List;
+
+public abstract class AnalyzerFactory {
+    /**
+     * Cached analyzer object for the current thread (analyzer objects can be
+     * expensive to allocate).
+     */
+    protected final ThreadLocal<AbstractAnalyzer> cachedAnalyzer;
+    /**
+     * List of file names on which this kind of analyzer should be used.
+     */
+    protected List<String> names;
+    /**
+     * List of file prefixes on which this kind of analyzer should be
+     * used.
+     */
+    protected List<String> prefixes;
+    /**
+     * List of file extensions on which this kind of analyzer should be
+     * used.
+     */
+    protected List<String> suffixes;
+    /**
+     * List of magic strings used to recognize files on which this kind of
+     * analyzer should be used.
+     */
+    protected List<String> magics;
+    /**
+     * List of matchers which delegate files to different types of
+     * analyzers.
+     */
+    protected final List<FileAnalyzerFactory.Matcher> matchers;
+    /**
+     * The content type for the files recognized by this kind of analyzer.
+     */
+    protected final String contentType;
+    /**
+     * The genre for files recognized by this kind of analyzer.
+     */
+    protected AbstractAnalyzer.Genre genre;
+
+    public AnalyzerFactory(FileAnalyzerFactory.Matcher matcher, String contentType) {
+        cachedAnalyzer = new ThreadLocal<>();
+        if (matcher == null) {
+            this.matchers = Collections.emptyList();
+        } else {
+            this.matchers = Collections.singletonList(matcher);
+        }
+        this.contentType = contentType;
+    }
+
+    /**
+     * Get the list of file names recognized by this analyzer (names must
+     * match exactly, ignoring case).
+     *
+     * @return list of file names
+     */
+    final List<String> getFileNames() {
+        return names;
+    }
+
+    /**
+     * Get the list of file prefixes recognized by this analyzer.
+     *
+     * @return list of prefixes
+     */
+    final List<String> getPrefixes() {
+        return prefixes;
+    }
+
+    /**
+     * Get the list of file extensions recognized by this analyzer.
+     *
+     * @return list of suffixes
+     */
+    final List<String> getSuffixes() {
+        return suffixes;
+    }
+
+    /**
+     * Get the list of magic strings recognized by this analyzer. If a file
+     * starts with one of these strings, an analyzer created by this factory
+     * should be used to analyze it.
+     *
+     * <p><b>Note:</b> Currently this assumes that the file is encoded with
+     * UTF-8 unless a BOM is detected.
+     *
+     * @return list of magic strings
+     */
+    final List<String> getMagicStrings() {
+        return magics;
+    }
+
+    /**
+     * Get matchers that map file contents to analyzer factories
+     * programmatically.
+     *
+     * @return list of matchers
+     */
+    final List<FileAnalyzerFactory.Matcher> getMatchers() {
+        return matchers;
+    }
+
+    /**
+     * Get the content type (MIME type) for analyzers returned by this factory.
+     *
+     * @return content type (could be {@code null} if it is unknown)
+     */
+    final String getContentType() {
+        return contentType;
+    }
+
+    /**
+     * The genre this analyzer factory belongs to.
+     *
+     * @return a genre
+     */
+    public final AbstractAnalyzer.Genre getGenre() {
+        return genre;
+    }
+
+    /**
+     * The user friendly name of this analyzer
+     *
+     * @return a genre
+     */
+    public abstract String getName();
+
+    public abstract AbstractAnalyzer getAnalyzer();
+
+    public abstract void returnAnalyzer();
+
+    protected abstract AbstractAnalyzer newAnalyzer();
+}