Skip to content

Add trigram tokenizer #1068

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ package.dependencies = [.package(url: "https://github.com/stephencelis/CSQLite.g
package.targets = [
.target(name: "SQLite", exclude: ["Extensions/FTS4.swift", "Extensions/FTS5.swift"]),
.testTarget(name: "SQLiteTests", dependencies: ["SQLite"], path: "Tests/SQLiteTests", exclude: [
"FTSIntegrationTests.swift",
"FTS4Tests.swift",
"FTS5Tests.swift"
])
Expand Down
8 changes: 8 additions & 0 deletions SQLite.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
19A17152E32A9585831E3FE0 /* DateAndTimeFunctions.swift in Sources */ = {isa = PBXBuildFile; fileRef = 19A17BA55DABB480F9020C8A /* DateAndTimeFunctions.swift */; };
19A1717B10CC941ACB5533D6 /* FTS5.swift in Sources */ = {isa = PBXBuildFile; fileRef = 19A1730E4390C775C25677D1 /* FTS5.swift */; };
19A171967CC511C4F6F773C9 /* RowTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 19A175C1F9CB3BBAB8FCEC7B /* RowTests.swift */; };
19A171BE056457F13BFBC4C3 /* FTSIntegrationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 19A171FDE4D67879B14ACBDF /* FTSIntegrationTests.swift */; };
19A171E6FA242F72A308C594 /* FTS5Tests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 19A1721B8984686B9963B45D /* FTS5Tests.swift */; };
19A171F12AB8B07F2FD7201A /* Cipher.swift in Sources */ = {isa = PBXBuildFile; fileRef = 19A178A39ACA9667A62663CC /* Cipher.swift */; };
19A1720B67ED13E6150C6A3D /* RowTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 19A175C1F9CB3BBAB8FCEC7B /* RowTests.swift */; };
Expand All @@ -80,8 +81,10 @@
19A179B59450FE7C4811AB8A /* Connection+Aggregation.swift in Sources */ = {isa = PBXBuildFile; fileRef = 19A175A9CB446640AE6F2200 /* Connection+Aggregation.swift */; };
19A179CCF9671E345E5A9811 /* Cipher.swift in Sources */ = {isa = PBXBuildFile; fileRef = 19A178A39ACA9667A62663CC /* Cipher.swift */; };
19A179E76EA6207669B60C1B /* Cipher.swift in Sources */ = {isa = PBXBuildFile; fileRef = 19A178A39ACA9667A62663CC /* Cipher.swift */; };
19A17C013B00682B70D53DB8 /* FTSIntegrationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 19A171FDE4D67879B14ACBDF /* FTSIntegrationTests.swift */; };
19A17C4B951CB054EE48AB1C /* CipherTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 19A17399EA9E61235D5D77BF /* CipherTests.swift */; };
19A17C80076860CF7751A056 /* DateAndTimeFunctionTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 19A1729B75C33F9A0B9A89C1 /* DateAndTimeFunctionTests.swift */; };
19A17C9407AC0EE104E5CC85 /* FTSIntegrationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 19A171FDE4D67879B14ACBDF /* FTSIntegrationTests.swift */; };
19A17CB808ACF606726E77A8 /* QueryIntegrationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 19A17BA6B4E282C1315A115C /* QueryIntegrationTests.swift */; };
19A17DC282E36C4F41AA440B /* Errors.swift in Sources */ = {isa = PBXBuildFile; fileRef = 19A1710E73A46D5AC721CDA9 /* Errors.swift */; };
19A17DF8D4F13A20F5D2269E /* Result.swift in Sources */ = {isa = PBXBuildFile; fileRef = 19A17E723300E5ED3771DCB5 /* Result.swift */; };
Expand Down Expand Up @@ -236,6 +239,7 @@
03A65E631C6BB0F60062603F /* SQLiteTests tvOS.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = "SQLiteTests tvOS.xctest"; sourceTree = BUILT_PRODUCTS_DIR; };
03A65E961C6BB3210062603F /* libsqlite3.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libsqlite3.tbd; path = Platforms/AppleTVOS.platform/Developer/SDKs/AppleTVOS.sdk/usr/lib/libsqlite3.tbd; sourceTree = DEVELOPER_DIR; };
19A1710E73A46D5AC721CDA9 /* Errors.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Errors.swift; sourceTree = "<group>"; };
19A171FDE4D67879B14ACBDF /* FTSIntegrationTests.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FTSIntegrationTests.swift; sourceTree = "<group>"; };
19A1721B8984686B9963B45D /* FTS5Tests.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FTS5Tests.swift; sourceTree = "<group>"; };
19A1729B75C33F9A0B9A89C1 /* DateAndTimeFunctionTests.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = DateAndTimeFunctionTests.swift; sourceTree = "<group>"; };
19A1730E4390C775C25677D1 /* FTS5.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FTS5.swift; sourceTree = "<group>"; };
Expand Down Expand Up @@ -449,6 +453,7 @@
19A1729B75C33F9A0B9A89C1 /* DateAndTimeFunctionTests.swift */,
D4DB368A20C09C9B00D5A58E /* SelectTests.swift */,
19A17BA6B4E282C1315A115C /* QueryIntegrationTests.swift */,
19A171FDE4D67879B14ACBDF /* FTSIntegrationTests.swift */,
);
name = SQLiteTests;
path = Tests/SQLiteTests;
Expand Down Expand Up @@ -887,6 +892,7 @@
19A1769C1F3A7542BECF50FF /* DateAndTimeFunctionTests.swift in Sources */,
D4DB368E20C09CFD00D5A58E /* SelectTests.swift in Sources */,
19A17CB808ACF606726E77A8 /* QueryIntegrationTests.swift in Sources */,
19A17C9407AC0EE104E5CC85 /* FTSIntegrationTests.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
Expand Down Expand Up @@ -983,6 +989,7 @@
19A17C80076860CF7751A056 /* DateAndTimeFunctionTests.swift in Sources */,
D4DB368C20C09CFB00D5A58E /* SelectTests.swift in Sources */,
19A172A9B536D02D4A98AAAD /* QueryIntegrationTests.swift in Sources */,
19A17C013B00682B70D53DB8 /* FTSIntegrationTests.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
Expand Down Expand Up @@ -1047,6 +1054,7 @@
19A172EB202970561E5C4245 /* DateAndTimeFunctionTests.swift in Sources */,
D4DB368D20C09CFC00D5A58E /* SelectTests.swift in Sources */,
19A176406BDE9D9C80CC9FA3 /* QueryIntegrationTests.swift in Sources */,
19A171BE056457F13BFBC4C3 /* FTSIntegrationTests.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
Expand Down
9 changes: 7 additions & 2 deletions Sources/SQLite/Extensions/FTS4.swift
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,10 @@ extension VirtualTable {
public struct Tokenizer {

public static let Simple = Tokenizer("simple")

public static let Porter = Tokenizer("porter")

public static func Unicode61(removeDiacritics: Bool? = nil, tokenchars: Set<Character> = [],
public static func Unicode61(removeDiacritics: Bool? = nil,
tokenchars: Set<Character> = [],
separators: Set<Character> = []) -> Tokenizer {
var arguments = [String]()

Expand All @@ -119,6 +119,11 @@ public struct Tokenizer {
return Tokenizer("unicode61", arguments)
}

// https://sqlite.org/fts5.html#the_experimental_trigram_tokenizer
public static func Trigram(caseSensitive: Bool = false) -> Tokenizer {
return Tokenizer("trigram", ["case_sensitive", caseSensitive ? "1" : "0"])
}

public static func Custom(_ name: String) -> Tokenizer {
Tokenizer(Tokenizer.moduleName.quote(), [name.quote()])
}
Expand Down
16 changes: 8 additions & 8 deletions Tests/SQLiteTests/CipherTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,22 @@ class CipherTests: XCTestCase {
let db1 = try! Connection()
let db2 = try! Connection()

override func setUp() {
override func setUpWithError() throws {
// db

try! db1.key("hello")
try db1.key("hello")

try! db1.run("CREATE TABLE foo (bar TEXT)")
try! db1.run("INSERT INTO foo (bar) VALUES ('world')")
try db1.run("CREATE TABLE foo (bar TEXT)")
try db1.run("INSERT INTO foo (bar) VALUES ('world')")

// db2
let key2 = keyData()
try! db2.key(Blob(bytes: key2.bytes, length: key2.length))
try db2.key(Blob(bytes: key2.bytes, length: key2.length))

try! db2.run("CREATE TABLE foo (bar TEXT)")
try! db2.run("INSERT INTO foo (bar) VALUES ('world')")
try db2.run("CREATE TABLE foo (bar TEXT)")
try db2.run("INSERT INTO foo (bar) VALUES ('world')")

super.setUp()
try super.setUpWithError()
}

func test_key() {
Expand Down
7 changes: 3 additions & 4 deletions Tests/SQLiteTests/ConnectionTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@ import SQLite3

class ConnectionTests: SQLiteTestCase {

override func setUp() {
super.setUp()

createUsersTable()
override func setUpWithError() throws {
try super.setUpWithError()
try createUsersTable()
}

func test_init_withInMemory_returnsInMemoryConnection() {
Expand Down
12 changes: 6 additions & 6 deletions Tests/SQLiteTests/CustomAggregationTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ import SQLite3
#if !os(Linux)

class CustomAggregationTests: SQLiteTestCase {
override func setUp() {
super.setUp()
createUsersTable()
try! insertUser("Alice", age: 30, admin: true)
try! insertUser("Bob", age: 25, admin: true)
try! insertUser("Eve", age: 28, admin: false)
override func setUpWithError() throws {
try super.setUpWithError()
try createUsersTable()
try insertUser("Alice", age: 30, admin: true)
try insertUser("Bob", age: 25, admin: true)
try insertUser("Eve", age: 28, admin: false)
}

func testUnsafeCustomSum() {
Expand Down
10 changes: 10 additions & 0 deletions Tests/SQLiteTests/FTS5Tests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,16 @@ class FTS5Tests: XCTestCase {
sql(config.tokenizer(.Unicode61(removeDiacritics: true, tokenchars: ["."], separators: ["X"]))))
}

func test_tokenizer_trigram() {
XCTAssertEqual(
"CREATE VIRTUAL TABLE \"virtual_table\" USING fts5(tokenize=trigram case_sensitive 0)",
sql(config.tokenizer(.Trigram())))

XCTAssertEqual(
"CREATE VIRTUAL TABLE \"virtual_table\" USING fts5(tokenize=trigram case_sensitive 1)",
sql(config.tokenizer(.Trigram(caseSensitive: true))))
}

func test_column_size() {
XCTAssertEqual(
"CREATE VIRTUAL TABLE \"virtual_table\" USING fts5(columnsize=1)",
Expand Down
80 changes: 80 additions & 0 deletions Tests/SQLiteTests/FTSIntegrationTests.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import XCTest
#if SQLITE_SWIFT_STANDALONE
import sqlite3
#elseif SQLITE_SWIFT_SQLCIPHER
import SQLCipher
#elseif os(Linux)
import CSQLite
#else
import SQLite3
#endif
@testable import SQLite

class FTSIntegrationTests: SQLiteTestCase {
let email = Expression<String>("email")
let index = VirtualTable("index")

private func createIndex() throws {
try createOrSkip { db in
try db.run(index.create(.FTS5(
FTS5Config()
.column(email)
.tokenizer(.Unicode61()))
))
}

for user in try db.prepare(users) {
try db.run(index.insert(email <- user[email]))
}
}

private func createTrigramIndex() throws {
try createOrSkip { db in
try db.run(index.create(.FTS5(
FTS5Config()
.column(email)
.tokenizer(.Trigram(caseSensitive: false)))
))
}

for user in try db.prepare(users) {
try db.run(index.insert(email <- user[email]))
}
}

override func setUpWithError() throws {
try super.setUpWithError()
try createUsersTable()
try insertUsers("John", "Paul", "George", "Ringo")
}

func testMatch() throws {
try createIndex()
let matches = Array(try db.prepare(index.match("Paul")))
XCTAssertEqual(matches.map { $0[email ]}, ["Paul@example.com"])
}

func testMatchPartial() throws {
try insertUsers("Paula")
try createIndex()
let matches = Array(try db.prepare(index.match("Pa*")))
XCTAssertEqual(matches.map { $0[email ]}, ["Paul@example.com", "Paula@example.com"])
}

func testTrigramIndex() throws {
try createTrigramIndex()
let matches = Array(try db.prepare(index.match("Paul")))
XCTAssertEqual(1, matches.count)
}

private func createOrSkip(_ createIndex: (Connection) throws -> Void) throws {
do {
try createIndex(db)
} catch let error as Result {
try XCTSkipIf(error.description.starts(with: "no such module:") ||
error.description.starts(with: "parse error")
)
throw error
}
}
}
11 changes: 5 additions & 6 deletions Tests/SQLiteTests/QueryIntegrationTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@ class QueryIntegrationTests: SQLiteTestCase {
let email = Expression<String>("email")
let age = Expression<Int>("age")

override func setUp() {
super.setUp()

createUsersTable()
override func setUpWithError() throws {
try super.setUpWithError()
try createUsersTable()
}

// MARK: -
Expand Down Expand Up @@ -131,7 +130,7 @@ class QueryIntegrationTests: SQLiteTestCase {
}

func test_upsert() throws {
guard db.satisfiesMinimumVersion(minor: 24) else { return }
try XCTSkipUnless(db.satisfiesMinimumVersion(minor: 24))
let fetchAge = { () throws -> Int? in
try self.db.pluck(self.users.filter(self.email == "alice@example.com")).flatMap { $0[self.age] }
}
Expand Down Expand Up @@ -210,7 +209,7 @@ class QueryIntegrationTests: SQLiteTestCase {
}
}

private extension Connection {
extension Connection {
func satisfiesMinimumVersion(minor: Int, patch: Int = 0) -> Bool {
guard let version = try? scalar("SELECT sqlite_version()") as? String else { return false }
let components = version.split(separator: ".", maxSplits: 3).compactMap { Int($0) }
Expand Down
12 changes: 6 additions & 6 deletions Tests/SQLiteTests/SelectTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ import XCTest

class SelectTests: SQLiteTestCase {

override func setUp() {
super.setUp()
createUsersTable()
createUsersDataTable()
override func setUpWithError() throws {
try super.setUpWithError()
try createUsersTable()
try createUsersDataTable()
}

func createUsersDataTable() {
try! db.execute("""
func createUsersDataTable() throws {
try db.execute("""
CREATE TABLE users_name (
id INTEGER,
user_id INTEGER REFERENCES users(id),
Expand Down
6 changes: 3 additions & 3 deletions Tests/SQLiteTests/StatementTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ import XCTest
import SQLite

class StatementTests: SQLiteTestCase {
override func setUp() {
super.setUp()
createUsersTable()
override func setUpWithError() throws {
try super.setUpWithError()
try createUsersTable()
}

func test_cursor_to_blob() {
Expand Down
10 changes: 5 additions & 5 deletions Tests/SQLiteTests/TestHelpers.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ class SQLiteTestCase: XCTestCase {
var db: Connection!
let users = Table("users")

override func setUp() {
super.setUp()
db = try! Connection()
override func setUpWithError() throws {
try super.setUpWithError()
db = try Connection()
trace = [String: Int]()

db.trace { SQL in
Expand All @@ -17,8 +17,8 @@ class SQLiteTestCase: XCTestCase {
}
}

func createUsersTable() {
try! db.execute("""
func createUsersTable() throws {
try db.execute("""
CREATE TABLE users (
id INTEGER PRIMARY KEY,
email TEXT NOT NULL UNIQUE,
Expand Down