-
Notifications
You must be signed in to change notification settings - Fork 222
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(DiscoveryV1): Tokenization dictionaries for collections
Adds the methods createTokenizationDictionary(), getTokenizationDictionaryStatus(), and deleteTokenizationDictionary(). This feature is currently only supported for Japanese-language collections.
- Loading branch information
Anthony Oliveri
committed
Nov 2, 2018
1 parent
54d1cca
commit d274371
Showing
5 changed files
with
365 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
/** | ||
* Copyright IBM Corporation 2018 | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
**/ | ||
|
||
import Foundation | ||
|
||
/** | ||
Tokenization dictionary describing how words are tokenized during ingestion and at query time. | ||
*/ | ||
internal struct TokenDict: Encodable { | ||
|
||
/** | ||
An array of tokenization rules. Each rule contains, the original `text` string, component `tokens`, any alternate | ||
character set `readings`, and which `part_of_speech` the text is from. | ||
*/ | ||
public var tokenizationRules: [TokenDictRule]? | ||
|
||
// Map each property name to the key that shall be used for encoding/decoding. | ||
private enum CodingKeys: String, CodingKey { | ||
case tokenizationRules = "tokenization_rules" | ||
} | ||
|
||
/** | ||
Initialize a `TokenDict` with member variables. | ||
- parameter tokenizationRules: An array of tokenization rules. Each rule contains, the original `text` string, | ||
component `tokens`, any alternate character set `readings`, and which `part_of_speech` the text is from. | ||
- returns: An initialized `TokenDict`. | ||
*/ | ||
public init( | ||
tokenizationRules: [TokenDictRule]? = nil | ||
) | ||
{ | ||
self.tokenizationRules = tokenizationRules | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
/** | ||
* Copyright IBM Corporation 2018 | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
**/ | ||
|
||
import Foundation | ||
|
||
/** | ||
An object defining a single tokenizaion rule. | ||
*/ | ||
public struct TokenDictRule: Encodable { | ||
|
||
/** | ||
The string to tokenize. | ||
*/ | ||
public var text: String? | ||
|
||
/** | ||
Array of tokens that the `text` field is split into when found. | ||
*/ | ||
public var tokens: [String]? | ||
|
||
/** | ||
Array of tokens that represent the content of the `text` field in an alternate character set. | ||
*/ | ||
public var readings: [String]? | ||
|
||
/** | ||
The part of speech that the `text` string belongs to. For example `noun`. Custom parts of speech can be specified. | ||
*/ | ||
public var partOfSpeech: String? | ||
|
||
// Map each property name to the key that shall be used for encoding/decoding. | ||
private enum CodingKeys: String, CodingKey { | ||
case text = "text" | ||
case tokens = "tokens" | ||
case readings = "readings" | ||
case partOfSpeech = "part_of_speech" | ||
} | ||
|
||
/** | ||
Initialize a `TokenDictRule` with member variables. | ||
- parameter text: The string to tokenize. | ||
- parameter tokens: Array of tokens that the `text` field is split into when found. | ||
- parameter readings: Array of tokens that represent the content of the `text` field in an alternate character | ||
set. | ||
- parameter partOfSpeech: The part of speech that the `text` string belongs to. For example `noun`. Custom parts | ||
of speech can be specified. | ||
- returns: An initialized `TokenDictRule`. | ||
*/ | ||
public init( | ||
text: String? = nil, | ||
tokens: [String]? = nil, | ||
readings: [String]? = nil, | ||
partOfSpeech: String? = nil | ||
) | ||
{ | ||
self.text = text | ||
self.tokens = tokens | ||
self.readings = readings | ||
self.partOfSpeech = partOfSpeech | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
/** | ||
* Copyright IBM Corporation 2018 | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
**/ | ||
|
||
import Foundation | ||
|
||
/** | ||
Object describing the current status of the tokenization dictionary. | ||
*/ | ||
public struct TokenDictStatusResponse: Decodable { | ||
|
||
/** | ||
Current tokenization dictionary status for the specified collection. | ||
*/ | ||
public enum Status: String { | ||
case active = "active" | ||
case pending = "pending" | ||
case notFound = "not found" | ||
} | ||
|
||
/** | ||
Current tokenization dictionary status for the specified collection. | ||
*/ | ||
public var status: String? | ||
|
||
/** | ||
The type for this dictionary. Always returns `tokenization_dictionary`. | ||
*/ | ||
public var type: String? | ||
|
||
// Map each property name to the key that shall be used for encoding/decoding. | ||
private enum CodingKeys: String, CodingKey { | ||
case status = "status" | ||
case type = "type" | ||
} | ||
|
||
} |
Oops, something went wrong.