Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 26 additions & 6 deletions src/main/java/com/worksap/nlp/sudachi/Config.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2022 Works Applications Co., Ltd.
* Copyright (c) 2017-2026 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -29,7 +29,6 @@
import java.lang.reflect.InvocationTargetException;
import java.net.URL;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
Expand Down Expand Up @@ -242,7 +241,10 @@ public static Config fromClasspathMerged(String name) throws IOException {
*/
public static <T> Config fromResource(Resource<T> resource, PathAnchor anchor) throws IOException {
if (resource instanceof Resource.Classpath) {
return fromClasspath((URL) resource.repr(), anchor);
try (InputStream stream = resource.asInputStream()) {
String data = StringUtil.readFully(stream);
return fromSettings(Settings.parse(data, anchor));
}
}
if (resource instanceof Resource.Filesystem) {
return fromFile((Path) resource.repr(), anchor);
Expand Down Expand Up @@ -945,13 +947,28 @@ public int hashCode() {
*/
public static class Classpath<T> extends Resource<T> {
private final URL url;
private final ClassLoader loader;
private final String resourceName;

Classpath(URL url) {
this(url, null, null);
}

Classpath(URL url, ClassLoader loader, String resourceName) {
this.url = url;
this.loader = loader;
this.resourceName = resourceName;
}

@Override
public InputStream asInputStream() throws IOException {
if (loader != null && resourceName != null) {
InputStream stream = loader.getResourceAsStream(resourceName);
if (stream == null) {
throw new FileNotFoundException(resourceName);
}
return stream;
}
return url.openStream();
}

Expand All @@ -960,7 +977,9 @@ public ByteBuffer asByteBuffer() throws IOException {
if (Objects.equals(url.getProtocol(), "file")) {
return MMap.map(url.getPath());
}
return StringUtil.readAllBytes(url);
try (InputStream stream = asInputStream()) {
return StringUtil.readAllBytes(stream);
}
}

@Override
Expand All @@ -980,12 +999,13 @@ public boolean equals(Object o) {
if (o == null || getClass() != o.getClass())
return false;
Classpath<?> classpath = (Classpath<?>) o;
return Objects.equals(url, classpath.url);
return Objects.equals(url, classpath.url) && Objects.equals(loader, classpath.loader)
&& Objects.equals(resourceName, classpath.resourceName);
}

@Override
public int hashCode() {
return Objects.hash(url);
return Objects.hash(url, loader, resourceName);
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/com/worksap/nlp/sudachi/DictionaryFactory.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2022 Works Applications Co., Ltd.
* Copyright (c) 2017-2026 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -68,7 +68,7 @@ public Dictionary create(Config config) throws IOException {
@Deprecated()
public Dictionary create(String settings) throws IOException {
Config defaults = Config.defaultConfig();
Config passed = Config.fromJsonString(settings, PathAnchor.classpath().andThen(PathAnchor.none()));
Config passed = Config.fromJsonString(settings, PathAnchor.classpath().andThen(PathAnchor.filesystem()));
Config merged = passed.withFallback(defaults);
return create(merged);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021 Works Applications Co., Ltd.
* Copyright (c) 2021-2026 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -26,7 +26,6 @@
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Paths;
import java.util.*;
import java.util.regex.Pattern;

Expand Down Expand Up @@ -134,7 +133,7 @@ LatticeNodeImpl getOOVNode(String text, OOV oov, int length) {

<T> void readCharacterProperty(Config.Resource<T> charDef) throws IOException {
if (charDef == null) {
charDef = settings.base.toResource(Paths.get("char.def"));
charDef = settings.base.resource("char.def");
}
try (InputStream input = charDef.asInputStream();
InputStreamReader isReader = new InputStreamReader(input, StandardCharsets.UTF_8);
Expand Down Expand Up @@ -170,7 +169,7 @@ <T> void readCharacterProperty(Config.Resource<T> charDef) throws IOException {

<T> void readOOV(Config.Resource<T> unkDef, Grammar grammar, String userPosMode) throws IOException {
if (unkDef == null) {
unkDef = settings.base.toResource(Paths.get("unk.def"));
unkDef = settings.base.resource("unk.def");
}
try (InputStream input = unkDef.asInputStream();
InputStreamReader isReader = new InputStreamReader(input, StandardCharsets.UTF_8);
Expand Down
66 changes: 59 additions & 7 deletions src/main/java/com/worksap/nlp/sudachi/PathAnchor.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2022 Works Applications Co., Ltd.
* Copyright (c) 2017-2026 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,13 +34,13 @@
* <p>
* There are three types of anchors:
* <ul>
* <li>{@link None} which will resolve paths as filesystem, relative to the
* CWD</li>
* <li>{@link None} which will not search for resources</li>
* <li>{@link Filesystem} which will resolve paths relative to a provided
* directory</li>
* <li>{@link Classpath} which will resolve classpath resources</li>
* </ul>
* Use static methods for their creation.
* Use static methods for their creation. To resolve paths relative to the
* current working directory, use {@link #filesystem()}.
* <p>
* One more utility of this class is to capture multiple classloaders in case of
* complex environment with multiple classloaders (e.g. ElasticSearch plugins).
Expand Down Expand Up @@ -128,10 +128,19 @@ public static PathAnchor filesystem(String path) {
}

/**
* Create a filesystem anchor relative to the current directory
* Create a filesystem anchor relative to the current working directory
*
* @return filesystem anchor
*/
public static PathAnchor filesystem() {
return filesystem("");
}

/**
* Create an anchor which does not search for resources
*
* @return non-searching anchor
*/
public static PathAnchor none() {
return None.INSTANCE;
}
Expand Down Expand Up @@ -164,6 +173,12 @@ public boolean exists(Path path) {

/**
* Create a resource for the fully resolved path
*
* For chained anchors, this method may behave differently from
* {@code resource(part)} when passed a path returned by {@code resolve(part)}.
* Another child anchor may accept the already resolved path and reinterpret it
* through a different base. Use {@link #resource(String)} when resolution and
* resource lookup must stay on the same child anchor.
*
* @param path
* fully resolved path
Expand All @@ -181,6 +196,9 @@ public <T> Config.Resource<T> toResource(Path path) {
/**
* Create a resource for passed string path
*
* This is the preferred API for chained anchors because the same child anchor
* performs both resolution and resource lookup.
*
* @param path
* path to the resource
* @return resource, encapsulating the path
Expand All @@ -204,6 +222,12 @@ public PathAnchor andThen(PathAnchor other) {
if (this.equals(other)) {
return this;
}
if (other instanceof None) {
return this;
}
if (this instanceof None) {
return other;
}
return new Chain(this, other);
}

Expand Down Expand Up @@ -285,11 +309,12 @@ public boolean exists(Path path) {

@Override
public <T> Config.Resource<T> toResource(Path path) {
URL resource = loader.getResource(resourceName(path));
String name = resourceName(path);
URL resource = loader.getResource(name);
if (resource == null) {
return new Config.Resource.NotFound<>(path, this);
}
return new Config.Resource.Classpath<>(resource);
return new Config.Resource.Classpath<>(resource, loader, name);
}

@Override
Expand Down Expand Up @@ -357,6 +382,8 @@ public boolean exists(Path path) {

@Override
public <T> Config.Resource<T> toResource(Path path) {
// Note: Another child may also accept an already-resolved path and
// reinterpret it through a different base.
for (PathAnchor child : children) {
if (child.exists(path)) {
return child.toResource(path);
Expand All @@ -366,6 +393,21 @@ public <T> Config.Resource<T> toResource(Path path) {
return new Config.Resource.NotFound<>(path, this);
}

@Override
public <T> Config.Resource<T> resource(String path) {
Path lastPath = null;
for (PathAnchor child : children) {
Path resolved = child.resolve(path);
lastPath = resolved;
if (child.exists(resolved)) {
return child.toResource(resolved);
}
logger.fine(() -> String.format("%s: %s does not exist, skipping", child, path));
}

return new Config.Resource.NotFound<>(lastPath == null ? Paths.get(path) : lastPath, this);
}

@Override
public Class<?> lookupClass(String name) throws ClassNotFoundException {
for (PathAnchor child : children) {
Expand Down Expand Up @@ -413,6 +455,16 @@ private None() {

private static final None INSTANCE = new None();

@Override
public boolean exists(Path path) {
return false;
}

@Override
public <T> Config.Resource<T> toResource(Path path) {
return new Config.Resource.NotFound<>(path, this);
}

@Override
public String toString() {
return "None{}";
Expand Down
9 changes: 4 additions & 5 deletions src/main/java/com/worksap/nlp/sudachi/Settings.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2022 Works Applications Co., Ltd.
* Copyright (c) 2017-2026 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -161,7 +161,7 @@ public Settings read(URL resource) throws IOException {
*/
@Deprecated
public static Settings parseSettings(String path, String json) {
PathAnchor anchor = path == null ? PathAnchor.none() : PathAnchor.filesystem(Paths.get(path));
PathAnchor anchor = path == null ? PathAnchor.filesystem() : PathAnchor.filesystem(Paths.get(path));
anchor = anchor.andThen(PathAnchor.classpath());
return parse(json, anchor);
}
Expand Down Expand Up @@ -193,7 +193,7 @@ public static Settings parse(String json, PathAnchor resolver) {
}
return new Settings(root, resolver);
} else {
PathAnchor pathResolver = PathAnchor.filesystem(Paths.get(basePath));
PathAnchor pathResolver = PathAnchor.filesystem(basePath);
if (resolver != null) {
pathResolver = pathResolver.andThen(resolver);
}
Expand Down Expand Up @@ -461,8 +461,7 @@ public <T> Config.Resource<T> getResource(String setting) {
}

private <T> Config.Resource<T> extractResource(String path) {
Path obj = base.resolve(path);
return base.toResource(obj);
return base.resource(path);
}

/**
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/com/worksap/nlp/sudachi/SudachiCommandLine.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2022 Works Applications Co., Ltd.
* Copyright (c) 2017-2026 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -192,7 +192,7 @@ public static void main(String[] args) throws IOException {
}

Tokenizer.SplitMode mode = Tokenizer.SplitMode.C;
PathAnchor anchor = PathAnchor.classpath().andThen(PathAnchor.none());
PathAnchor anchor = PathAnchor.classpath().andThen(PathAnchor.filesystem());
Settings current = Settings.resolvedBy(anchor)
.read(SudachiCommandLine.class.getClassLoader().getResource("sudachi.json"));
Config additional = Config.empty();
Expand Down
2 changes: 1 addition & 1 deletion src/test/java/com/worksap/nlp/sudachi/ConfigTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ class ConfigTest {
fun anchoredWith() {
val cfg = Config.empty()
cfg.anchoredWith(PathAnchor.filesystem("test"))
assertIs<PathAnchor.Chain>(cfg.anchor)
assertIs<PathAnchor.Filesystem>(cfg.anchor)
}

@Test
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2022 Works Applications Co., Ltd.
* Copyright (c) 2017-2026 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -44,7 +44,7 @@ public void setUp() {
plugin = new DefaultInputTextPlugin();
try {
PathAnchor anchor = PathAnchor.classpath();
plugin.rewriteDef = anchor.toResource(anchor.resolve("rewrite.def"));
plugin.rewriteDef = anchor.resource("rewrite.def");
plugin.setUp(new MockGrammar());
} catch (IOException ex) {
ex.printStackTrace();
Expand Down Expand Up @@ -112,23 +112,23 @@ public void setUpWithNull() throws IOException {
public void invalidFormatOfIgnoreList() throws IOException {
plugin = new DefaultInputTextPlugin();
PathAnchor anchor = PathAnchor.classpath();
plugin.rewriteDef = anchor.toResource(anchor.resolve("rewrite_error_ignorelist.def"));
plugin.rewriteDef = anchor.resource("rewrite_error_ignorelist.def");
plugin.setUp(new MockGrammar());
}

@Test(expected = IllegalArgumentException.class)
public void invalidFormatOfReplaceList() throws IOException {
plugin = new DefaultInputTextPlugin();
PathAnchor anchor = PathAnchor.classpath();
plugin.rewriteDef = anchor.toResource(anchor.resolve("rewrite_error_replacelist.def"));
plugin.rewriteDef = anchor.resource("rewrite_error_replacelist.def");
plugin.setUp(new MockGrammar());
}

@Test(expected = IllegalArgumentException.class)
public void duplicatedLinesInReplaceList() throws IOException {
plugin = new DefaultInputTextPlugin();
PathAnchor anchor = PathAnchor.classpath();
plugin.rewriteDef = anchor.toResource(anchor.resolve("rewrite_error_dup.def"));
plugin.rewriteDef = anchor.resource("rewrite_error_dup.def");
plugin.setUp(new MockGrammar());
}
}
Loading