Skip to content

Commit d274371

Browse files
author
Anthony Oliveri
committed
feat(DiscoveryV1): Tokenization dictionaries for collections
Adds the methods createTokenizationDictionary(), getTokenizationDictionaryStatus(), and deleteTokenizationDictionary(). This feature is currently only supported for Japanese-language collections.
1 parent 54d1cca commit d274371

File tree

5 files changed

+365
-0
lines changed

5 files changed

+365
-0
lines changed

Source/DiscoveryV1/Discovery.swift

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1432,6 +1432,183 @@ public class Discovery {
14321432
}
14331433
}
14341434

1435+
/**
1436+
Get tokenization dictionary status.
1437+
1438+
Returns the current status of the tokenization dictionary for the specified collection.
1439+
1440+
- parameter environmentID: The ID of the environment.
1441+
- parameter collectionID: The ID of the collection.
1442+
- parameter headers: A dictionary of request headers to be sent with this request.
1443+
- parameter failure: A function executed if an error occurs.
1444+
- parameter success: A function executed with the successful result.
1445+
*/
1446+
public func getTokenizationDictionaryStatus(
1447+
environmentID: String,
1448+
collectionID: String,
1449+
headers: [String: String]? = nil,
1450+
failure: ((Error) -> Void)? = nil,
1451+
success: @escaping (TokenDictStatusResponse) -> Void)
1452+
{
1453+
// construct header parameters
1454+
var headerParameters = defaultHeaders
1455+
if let headers = headers {
1456+
headerParameters.merge(headers) { (_, new) in new }
1457+
}
1458+
headerParameters["Accept"] = "application/json"
1459+
1460+
// construct query parameters
1461+
var queryParameters = [URLQueryItem]()
1462+
queryParameters.append(URLQueryItem(name: "version", value: version))
1463+
1464+
// construct REST request
1465+
let path = "/v1/environments/\(environmentID)/collections/\(collectionID)/word_lists/tokenization_dictionary"
1466+
guard let encodedPath = path.addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) else {
1467+
failure?(RestError.encodingError)
1468+
return
1469+
}
1470+
let request = RestRequest(
1471+
session: session,
1472+
authMethod: authMethod,
1473+
errorResponseDecoder: errorResponseDecoder,
1474+
method: "GET",
1475+
url: serviceURL + encodedPath,
1476+
headerParameters: headerParameters,
1477+
queryItems: queryParameters
1478+
)
1479+
1480+
// execute REST request
1481+
request.responseObject {
1482+
(response: RestResponse<TokenDictStatusResponse>) in
1483+
switch response.result {
1484+
case .success(let retval): success(retval)
1485+
case .failure(let error): failure?(error)
1486+
}
1487+
}
1488+
}
1489+
1490+
/**
1491+
Create tokenization dictionary.
1492+
1493+
Upload a custom tokenization dictionary to use with the specified collection.
1494+
1495+
- parameter environmentID: The ID of the environment.
1496+
- parameter collectionID: The ID of the collection.
1497+
- parameter tokenizationRules: An array of tokenization rules. Each rule contains, the original `text` string,
1498+
component `tokens`, any alternate character set `readings`, and which `part_of_speech` the text is from.
1499+
- parameter headers: A dictionary of request headers to be sent with this request.
1500+
- parameter failure: A function executed if an error occurs.
1501+
- parameter success: A function executed with the successful result.
1502+
*/
1503+
public func createTokenizationDictionary(
1504+
environmentID: String,
1505+
collectionID: String,
1506+
tokenizationRules: [TokenDictRule]? = nil,
1507+
headers: [String: String]? = nil,
1508+
failure: ((Error) -> Void)? = nil,
1509+
success: @escaping (TokenDictStatusResponse) -> Void)
1510+
{
1511+
// construct body
1512+
let createTokenizationDictionaryRequest = TokenDict(tokenizationRules: tokenizationRules)
1513+
guard let body = try? JSONEncoder().encodeIfPresent(createTokenizationDictionaryRequest) else {
1514+
failure?(RestError.serializationError)
1515+
return
1516+
}
1517+
1518+
// construct header parameters
1519+
var headerParameters = defaultHeaders
1520+
if let headers = headers {
1521+
headerParameters.merge(headers) { (_, new) in new }
1522+
}
1523+
headerParameters["Accept"] = "application/json"
1524+
headerParameters["Content-Type"] = "application/json"
1525+
1526+
// construct query parameters
1527+
var queryParameters = [URLQueryItem]()
1528+
queryParameters.append(URLQueryItem(name: "version", value: version))
1529+
1530+
// construct REST request
1531+
let path = "/v1/environments/\(environmentID)/collections/\(collectionID)/word_lists/tokenization_dictionary"
1532+
guard let encodedPath = path.addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) else {
1533+
failure?(RestError.encodingError)
1534+
return
1535+
}
1536+
let request = RestRequest(
1537+
session: session,
1538+
authMethod: authMethod,
1539+
errorResponseDecoder: errorResponseDecoder,
1540+
method: "POST",
1541+
url: serviceURL + encodedPath,
1542+
headerParameters: headerParameters,
1543+
queryItems: queryParameters,
1544+
messageBody: body
1545+
)
1546+
1547+
// execute REST request
1548+
request.responseObject {
1549+
(response: RestResponse<TokenDictStatusResponse>) in
1550+
switch response.result {
1551+
case .success(let retval): success(retval)
1552+
case .failure(let error): failure?(error)
1553+
}
1554+
}
1555+
}
1556+
1557+
/**
1558+
Delete tokenization dictionary.
1559+
1560+
Delete the tokenization dictionary from the collection.
1561+
1562+
- parameter environmentID: The ID of the environment.
1563+
- parameter collectionID: The ID of the collection.
1564+
- parameter headers: A dictionary of request headers to be sent with this request.
1565+
- parameter failure: A function executed if an error occurs.
1566+
- parameter success: A function executed with the successful result.
1567+
*/
1568+
public func deleteTokenizationDictionary(
1569+
environmentID: String,
1570+
collectionID: String,
1571+
headers: [String: String]? = nil,
1572+
failure: ((Error) -> Void)? = nil,
1573+
success: @escaping () -> Void)
1574+
{
1575+
// construct header parameters
1576+
var headerParameters = defaultHeaders
1577+
if let headers = headers {
1578+
headerParameters.merge(headers) { (_, new) in new }
1579+
}
1580+
headerParameters["Accept"] = "application/json"
1581+
1582+
// construct query parameters
1583+
var queryParameters = [URLQueryItem]()
1584+
queryParameters.append(URLQueryItem(name: "version", value: version))
1585+
1586+
// construct REST request
1587+
let path = "/v1/environments/\(environmentID)/collections/\(collectionID)/word_lists/tokenization_dictionary"
1588+
guard let encodedPath = path.addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) else {
1589+
failure?(RestError.encodingError)
1590+
return
1591+
}
1592+
let request = RestRequest(
1593+
session: session,
1594+
authMethod: authMethod,
1595+
errorResponseDecoder: errorResponseDecoder,
1596+
method: "DELETE",
1597+
url: serviceURL + encodedPath,
1598+
headerParameters: headerParameters,
1599+
queryItems: queryParameters
1600+
)
1601+
1602+
// execute REST request
1603+
request.responseVoid {
1604+
(response: RestResponse) in
1605+
switch response.result {
1606+
case .success: success()
1607+
case .failure(let error): failure?(error)
1608+
}
1609+
}
1610+
}
1611+
14351612
/**
14361613
Add a document.
14371614

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/**
2+
* Copyright IBM Corporation 2018
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
**/
16+
17+
import Foundation
18+
19+
/**
20+
Tokenization dictionary describing how words are tokenized during ingestion and at query time.
21+
*/
22+
internal struct TokenDict: Encodable {
23+
24+
/**
25+
An array of tokenization rules. Each rule contains, the original `text` string, component `tokens`, any alternate
26+
character set `readings`, and which `part_of_speech` the text is from.
27+
*/
28+
public var tokenizationRules: [TokenDictRule]?
29+
30+
// Map each property name to the key that shall be used for encoding/decoding.
31+
private enum CodingKeys: String, CodingKey {
32+
case tokenizationRules = "tokenization_rules"
33+
}
34+
35+
/**
36+
Initialize a `TokenDict` with member variables.
37+
38+
- parameter tokenizationRules: An array of tokenization rules. Each rule contains, the original `text` string,
39+
component `tokens`, any alternate character set `readings`, and which `part_of_speech` the text is from.
40+
41+
- returns: An initialized `TokenDict`.
42+
*/
43+
public init(
44+
tokenizationRules: [TokenDictRule]? = nil
45+
)
46+
{
47+
self.tokenizationRules = tokenizationRules
48+
}
49+
50+
}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
/**
2+
* Copyright IBM Corporation 2018
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
**/
16+
17+
import Foundation
18+
19+
/**
20+
An object defining a single tokenizaion rule.
21+
*/
22+
public struct TokenDictRule: Encodable {
23+
24+
/**
25+
The string to tokenize.
26+
*/
27+
public var text: String?
28+
29+
/**
30+
Array of tokens that the `text` field is split into when found.
31+
*/
32+
public var tokens: [String]?
33+
34+
/**
35+
Array of tokens that represent the content of the `text` field in an alternate character set.
36+
*/
37+
public var readings: [String]?
38+
39+
/**
40+
The part of speech that the `text` string belongs to. For example `noun`. Custom parts of speech can be specified.
41+
*/
42+
public var partOfSpeech: String?
43+
44+
// Map each property name to the key that shall be used for encoding/decoding.
45+
private enum CodingKeys: String, CodingKey {
46+
case text = "text"
47+
case tokens = "tokens"
48+
case readings = "readings"
49+
case partOfSpeech = "part_of_speech"
50+
}
51+
52+
/**
53+
Initialize a `TokenDictRule` with member variables.
54+
55+
- parameter text: The string to tokenize.
56+
- parameter tokens: Array of tokens that the `text` field is split into when found.
57+
- parameter readings: Array of tokens that represent the content of the `text` field in an alternate character
58+
set.
59+
- parameter partOfSpeech: The part of speech that the `text` string belongs to. For example `noun`. Custom parts
60+
of speech can be specified.
61+
62+
- returns: An initialized `TokenDictRule`.
63+
*/
64+
public init(
65+
text: String? = nil,
66+
tokens: [String]? = nil,
67+
readings: [String]? = nil,
68+
partOfSpeech: String? = nil
69+
)
70+
{
71+
self.text = text
72+
self.tokens = tokens
73+
self.readings = readings
74+
self.partOfSpeech = partOfSpeech
75+
}
76+
77+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/**
2+
* Copyright IBM Corporation 2018
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
**/
16+
17+
import Foundation
18+
19+
/**
20+
Object describing the current status of the tokenization dictionary.
21+
*/
22+
public struct TokenDictStatusResponse: Decodable {
23+
24+
/**
25+
Current tokenization dictionary status for the specified collection.
26+
*/
27+
public enum Status: String {
28+
case active = "active"
29+
case pending = "pending"
30+
case notFound = "not found"
31+
}
32+
33+
/**
34+
Current tokenization dictionary status for the specified collection.
35+
*/
36+
public var status: String?
37+
38+
/**
39+
The type for this dictionary. Always returns `tokenization_dictionary`.
40+
*/
41+
public var type: String?
42+
43+
// Map each property name to the key that shall be used for encoding/decoding.
44+
private enum CodingKeys: String, CodingKey {
45+
case status = "status"
46+
case type = "type"
47+
}
48+
49+
}

0 commit comments

Comments
 (0)