Skip to content

Commit 57eefa4

Browse files
committed
Extracting base classes for analyzer factory and analyzers
- some base class is needed as a reference for the framework approaches #2588
1 parent d1e826f commit 57eefa4

File tree

162 files changed

+949
-847
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

162 files changed

+949
-847
lines changed
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2019 Oracle and/or its affiliates. All rights reserved.
22+
*/
23+
package org.opengrok.indexer.analysis;
24+
25+
import java.io.IOException;
26+
import java.io.Reader;
27+
import java.io.StringReader;
28+
import java.io.Writer;
29+
import org.apache.lucene.analysis.Analyzer;
30+
import org.apache.lucene.analysis.TokenStream;
31+
import org.apache.lucene.document.Document;
32+
import org.opengrok.indexer.configuration.Project;
33+
34+
public abstract class AbstractAnalyzer extends Analyzer {
35+
public static final Reader DUMMY_READER = new StringReader("");
36+
protected AnalyzerFactory factory;
37+
// you analyzer HAS to override this to get proper symbols in results
38+
protected JFlexTokenizer symbolTokenizer;
39+
protected Project project;
40+
protected Ctags ctags;
41+
protected boolean scopesEnabled;
42+
protected boolean foldingEnabled;
43+
44+
public AbstractAnalyzer(ReuseStrategy reuseStrategy) {
45+
super(reuseStrategy);
46+
}
47+
48+
public abstract long getVersionNo();
49+
50+
/**
51+
* Subclasses should override to produce a value relevant for the evolution
52+
* of their analysis in each release.
53+
*
54+
* @return 0
55+
*/
56+
protected int getSpecializedVersionNo() {
57+
return 0; // FileAnalyzer is not specialized.
58+
}
59+
60+
public void setCtags(Ctags ctags) {
61+
this.ctags = ctags;
62+
}
63+
64+
public void setProject(Project project) {
65+
this.project = project;
66+
}
67+
68+
public void setScopesEnabled(boolean scopesEnabled) {
69+
this.scopesEnabled = supportsScopes() && scopesEnabled;
70+
}
71+
72+
public void setFoldingEnabled(boolean foldingEnabled) {
73+
this.foldingEnabled = supportsScopes() && foldingEnabled;
74+
}
75+
76+
protected abstract boolean supportsScopes();
77+
78+
/**
79+
* Get the factory which created this analyzer.
80+
*
81+
* @return the {@code FileAnalyzerFactory} which created this analyzer
82+
*/
83+
public final AnalyzerFactory getFactory() {
84+
return factory;
85+
}
86+
87+
public AbstractAnalyzer.Genre getGenre() {
88+
return factory.getGenre();
89+
}
90+
91+
public abstract String getFileTypeName();
92+
93+
public abstract void analyze(Document doc, StreamSource src, Writer xrefOut)
94+
throws IOException, InterruptedException;
95+
96+
public abstract Xrefer writeXref(WriteXrefArgs args) throws IOException;
97+
98+
@Override
99+
protected abstract TokenStreamComponents createComponents(String fieldName);
100+
101+
protected abstract void addNumLines(Document doc, int value);
102+
103+
protected abstract void addLOC(Document doc, int value);
104+
105+
@Override
106+
protected abstract TokenStream normalize(String fieldName, TokenStream in);
107+
108+
/**
109+
* What kind of file is this?
110+
*/
111+
public enum Genre {
112+
/**
113+
* xrefed - line numbered context
114+
*/
115+
PLAIN("p"),
116+
/**
117+
* xrefed - summarizer context
118+
*/
119+
XREFABLE("x"),
120+
/**
121+
* not xrefed - no context - used by diff/list
122+
*/
123+
IMAGE("i"),
124+
/**
125+
* not xrefed - no context
126+
*/
127+
DATA("d"),
128+
/**
129+
* not xrefed - summarizer context from original file
130+
*/
131+
HTML("h");
132+
private final String typeName;
133+
134+
Genre(String typename) {
135+
this.typeName = typename;
136+
}
137+
138+
/**
139+
* Get the type name value used to tag lucene documents.
140+
*
141+
* @return a none-null string.
142+
*/
143+
public String typeName() {
144+
return typeName;
145+
}
146+
147+
/**
148+
* Get the Genre for the given type name.
149+
*
150+
* @param typeName name to check
151+
* @return {@code null} if it doesn't match any genre, the genre
152+
* otherwise.
153+
* @see #typeName()
154+
*/
155+
public static Genre get(String typeName) {
156+
if (typeName == null) {
157+
return null;
158+
}
159+
for (Genre g : values()) {
160+
if (g.typeName.equals(typeName)) {
161+
return g;
162+
}
163+
}
164+
return null;
165+
}
166+
}
167+
}
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2019 Oracle and/or its affiliates. All rights reserved.
22+
*/
23+
package org.opengrok.indexer.analysis;
24+
25+
import java.util.Collections;
26+
import java.util.List;
27+
28+
public abstract class AnalyzerFactory {
29+
/**
30+
* Cached analyzer object for the current thread (analyzer objects can be
31+
* expensive to allocate).
32+
*/
33+
protected final ThreadLocal<AbstractAnalyzer> cachedAnalyzer;
34+
/**
35+
* List of file names on which this kind of analyzer should be used.
36+
*/
37+
protected List<String> names;
38+
/**
39+
* List of file prefixes on which this kind of analyzer should be
40+
* used.
41+
*/
42+
protected List<String> prefixes;
43+
/**
44+
* List of file extensions on which this kind of analyzer should be
45+
* used.
46+
*/
47+
protected List<String> suffixes;
48+
/**
49+
* List of magic strings used to recognize files on which this kind of
50+
* analyzer should be used.
51+
*/
52+
protected List<String> magics;
53+
/**
54+
* List of matchers which delegate files to different types of
55+
* analyzers.
56+
*/
57+
protected final List<FileAnalyzerFactory.Matcher> matchers;
58+
/**
59+
* The content type for the files recognized by this kind of analyzer.
60+
*/
61+
protected final String contentType;
62+
/**
63+
* The genre for files recognized by this kind of analyzer.
64+
*/
65+
protected AbstractAnalyzer.Genre genre;
66+
67+
public AnalyzerFactory(FileAnalyzerFactory.Matcher matcher, String contentType) {
68+
cachedAnalyzer = new ThreadLocal<>();
69+
if (matcher == null) {
70+
this.matchers = Collections.emptyList();
71+
} else {
72+
this.matchers = Collections.singletonList(matcher);
73+
}
74+
this.contentType = contentType;
75+
}
76+
77+
/**
78+
* Get the list of file names recognized by this analyzer (names must
79+
* match exactly, ignoring case).
80+
*
81+
* @return list of file names
82+
*/
83+
final List<String> getFileNames() {
84+
return names;
85+
}
86+
87+
/**
88+
* Get the list of file prefixes recognized by this analyzer.
89+
*
90+
* @return list of prefixes
91+
*/
92+
final List<String> getPrefixes() {
93+
return prefixes;
94+
}
95+
96+
/**
97+
* Get the list of file extensions recognized by this analyzer.
98+
*
99+
* @return list of suffixes
100+
*/
101+
final List<String> getSuffixes() {
102+
return suffixes;
103+
}
104+
105+
/**
106+
* Get the list of magic strings recognized by this analyzer. If a file
107+
* starts with one of these strings, an analyzer created by this factory
108+
* should be used to analyze it.
109+
*
110+
* <p><b>Note:</b> Currently this assumes that the file is encoded with
111+
* UTF-8 unless a BOM is detected.
112+
*
113+
* @return list of magic strings
114+
*/
115+
final List<String> getMagicStrings() {
116+
return magics;
117+
}
118+
119+
/**
120+
* Get matchers that map file contents to analyzer factories
121+
* programmatically.
122+
*
123+
* @return list of matchers
124+
*/
125+
final List<FileAnalyzerFactory.Matcher> getMatchers() {
126+
return matchers;
127+
}
128+
129+
/**
130+
* Get the content type (MIME type) for analyzers returned by this factory.
131+
*
132+
* @return content type (could be {@code null} if it is unknown)
133+
*/
134+
final String getContentType() {
135+
return contentType;
136+
}
137+
138+
/**
139+
* The genre this analyzer factory belongs to.
140+
*
141+
* @return a genre
142+
*/
143+
public final AbstractAnalyzer.Genre getGenre() {
144+
return genre;
145+
}
146+
147+
/**
148+
* The user friendly name of this analyzer
149+
*
150+
* @return a genre
151+
*/
152+
public abstract String getName();
153+
154+
public abstract AbstractAnalyzer getAnalyzer();
155+
156+
public abstract void returnAnalyzer();
157+
158+
protected abstract AbstractAnalyzer newAnalyzer();
159+
}

0 commit comments

Comments
 (0)