Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sourceTimeBoundaryRefreshInterval setting for Cluster #926

Merged
merged 6 commits into from
Nov 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/configuration-cluster.md
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,11 @@ This will put additional load on the data store but will ensure that dimension a

How often should source schema be reloaded in ms. Default value of 0 disables periodical source refresh.

**sourceTimeBoundaryRefreshInterval** (number), minimum: 1000, default: 60000

How often should source max time be refreshed in ms. Turnilo sends [time boundary query](https://druid.apache.org/docs/latest/querying/timeboundaryquery.html) to Druid cluster to get source max time.
Smaller values will ensure that turnilo is aware of freshly added data but also would put load on the data store with additional queries.


### Druid specific properties

Expand Down
3 changes: 3 additions & 0 deletions src/common/models/cluster/cluster.mocha.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ describe("Cluster", () => {
sourceListScan: "auto",
sourceReintrospectInterval: 0,
sourceReintrospectOnLoad: false,
sourceTimeBoundaryRefreshInterval: 60000,
timeout: undefined,
title: "",
type: "druid",
Expand Down Expand Up @@ -124,6 +125,7 @@ describe("Cluster", () => {
sourceListScan: "auto",
sourceReintrospectInterval: 1432,
sourceReintrospectOnLoad: true,
sourceTimeBoundaryRefreshInterval: 4242,
timeout: 581,
title: "foobar-title",
url: "http://url-bazz",
Expand All @@ -140,6 +142,7 @@ describe("Cluster", () => {
sourceListScan: "auto",
sourceReintrospectInterval: 1432,
sourceReintrospectOnLoad: true,
sourceTimeBoundaryRefreshInterval: 4242,
timeout: 581,
title: "foobar-title",
url: "http://url-bazz",
Expand Down
17 changes: 11 additions & 6 deletions src/common/models/cluster/cluster.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,13 @@ export interface Cluster {
title?: string;
version?: string;
timeout?: number;
healthCheckTimeout?: number;
sourceListScan?: SourceListScan;
sourceListRefreshOnLoad?: boolean;
sourceListRefreshInterval?: number;
sourceReintrospectOnLoad?: boolean;
sourceReintrospectInterval?: number;
healthCheckTimeout: number;
sourceListScan: SourceListScan;
sourceListRefreshOnLoad: boolean;
sourceListRefreshInterval: number;
sourceReintrospectOnLoad: boolean;
sourceReintrospectInterval: number;
sourceTimeBoundaryRefreshInterval: number;
guardDataCubes?: boolean;
introspectionStrategy?: string;
requestDecorator?: RequestDecorator;
Expand All @@ -57,6 +58,7 @@ export interface ClusterJS {
sourceListRefreshInterval?: number;
sourceReintrospectOnLoad?: boolean;
sourceReintrospectInterval?: number;
sourceTimeBoundaryRefreshInterval?: number;
guardDataCubes?: boolean;
introspectionStrategy?: string;
requestDecorator?: RequestDecoratorJS;
Expand Down Expand Up @@ -128,6 +130,7 @@ export const DEFAULT_SOURCE_LIST_REFRESH_INTERVAL = 0;
export const DEFAULT_SOURCE_LIST_REFRESH_ON_LOAD = false;
export const DEFAULT_SOURCE_REINTROSPECT_INTERVAL = 0;
export const DEFAULT_SOURCE_REINTROSPECT_ON_LOAD = false;
export const DEFAULT_SOURCE_TIME_BOUNDARY_REFRESH_INTERVAL = 60000;
export const DEFAULT_INTROSPECTION_STRATEGY = "segment-metadata-fallback";
const DEFAULT_GUARD_DATA_CUBES = false;

Expand Down Expand Up @@ -159,6 +162,7 @@ export function fromConfig(params: ClusterJS): Cluster {

const sourceReintrospectInterval = readInterval(params.sourceReintrospectInterval, DEFAULT_SOURCE_REINTROSPECT_INTERVAL);
const sourceListRefreshInterval = readInterval(params.sourceListRefreshInterval, DEFAULT_SOURCE_LIST_REFRESH_INTERVAL);
const sourceTimeBoundaryRefreshInterval = readInterval(params.sourceTimeBoundaryRefreshInterval, DEFAULT_SOURCE_TIME_BOUNDARY_REFRESH_INTERVAL);
const retry = RetryOptions.fromJS(params.retry);
const requestDecorator = readRequestDecorator(params);

Expand All @@ -176,6 +180,7 @@ export function fromConfig(params: ClusterJS): Cluster {
sourceListRefreshOnLoad,
sourceReintrospectInterval,
sourceReintrospectOnLoad,
sourceTimeBoundaryRefreshInterval,
version,
title,
guardDataCubes,
Expand Down
7 changes: 6 additions & 1 deletion src/common/models/time-tag/time-tag.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,29 +19,34 @@ import { Record } from "immutable";

export interface TimeTagValue {
name: string;
checkInterval: number;
time?: Date;
lastTimeChecked?: Date;
}

export interface TimeTagJS {
name: string;
checkInterval: number;
time?: string;
lastTimeChecked?: string;
}

const defaultTimeTag: TimeTagValue = {
name: "",
// NOTE: this value won't be used ever. Immutable.Record type does not understand that non-nullable fields should be not required in default value.
checkInterval: 60000,
time: null,
lastTimeChecked: null
};

export class TimeTag extends Record<TimeTagValue>(defaultTimeTag) {

static fromJS({ name, time: timeJS, lastTimeChecked: lastTimeCheckedJS }: TimeTagJS): TimeTag {
static fromJS({ name, checkInterval, time: timeJS, lastTimeChecked: lastTimeCheckedJS }: TimeTagJS): TimeTag {
const time = timeJS ? new Date(timeJS) : undefined;
const lastTimeChecked = lastTimeCheckedJS ? new Date(lastTimeCheckedJS) : time;
return new TimeTag({
name,
checkInterval,
time,
lastTimeChecked
});
Expand Down
14 changes: 12 additions & 2 deletions src/common/models/timekeeper/timekeeper.mocha.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,23 @@ describe("Timekeeper", () => {
},
{
timeTags: {
lol: { name: "lol", time: new Date("2016-01-01T01:02:03Z"), lastTimeChecked: new Date("2016-01-01T01:02:03Z") }
lol: {
name: "lol",
time: new Date("2016-01-01T01:02:03Z"),
lastTimeChecked: new Date("2016-01-01T01:02:03Z"),
checkInterval: 42000
}
},
nowOverride: null
},
{
timeTags: {
lol: { name: "lol", time: new Date("2016-01-01T01:02:03Z"), lastTimeChecked: new Date("2016-01-01T01:02:03Z") }
lol: {
name: "lol",
time: new Date("2016-01-01T01:02:03Z"),
lastTimeChecked: new Date("2016-01-01T01:02:03Z"),
checkInterval: 42000
}
},
nowOverride: new Date("2016-01-01T01:02:03Z")
}
Expand Down
4 changes: 2 additions & 2 deletions src/common/models/timekeeper/timekeeper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ export class Timekeeper implements Instance<TimekeeperValue, TimekeeperJS> {
return this.changeTimeTags(timeTags);
}

addTimeTagFor(name: string): Timekeeper {
const timeTags = this.timeTags.set(name, new TimeTag({ name }));
addTimeTagFor(name: string, checkInterval: number): Timekeeper {
const timeTags = this.timeTags.set(name, new TimeTag({ name, checkInterval }));
return this.changeTimeTags(timeTags);
}

Expand Down
13 changes: 6 additions & 7 deletions src/common/utils/time-monitor/time-monitor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,13 @@ import { maxTimeQueryForCube } from "../query/max-time-query";

export class TimeMonitor {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's interesting, we have one TimeMonitor for whole turnilo instance. That is one of few places where we use source name as a unique key across possible multiple clusters. Such situation would break turnilo instance. Something to consider in the future.

public timekeeper: Timekeeper;
private regularCheckInterval: number;
private checks: Map<string, Nullary<Promise<Date>>>;
private logger: Logger;
private doingChecks = false;

constructor(logger: Logger) {
this.logger = logger.addPrefix("TimeMonitor");
this.checks = new Map();
this.regularCheckInterval = 60000;
this.timekeeper = Timekeeper.EMPTY;
setInterval(this.doChecks, 1000);
}
Expand All @@ -44,10 +42,11 @@ export class TimeMonitor {
return this;
}

addCheck(cube: QueryableDataCube): this {
// NOTE: We should pass whole Cluster here, but we still don't have Cluster class for "native" cluster type.
addCheck(cube: QueryableDataCube, sourceTimeBoundaryRefreshInterval: number): this {
const { name } = cube;
this.checks.set(name, () => maxTimeQueryForCube(cube));
this.timekeeper = this.timekeeper.addTimeTagFor(name);
this.timekeeper = this.timekeeper.addTimeTagFor(name, sourceTimeBoundaryRefreshInterval);
return this;
}

Expand All @@ -65,10 +64,10 @@ export class TimeMonitor {
);
}

private isStale = (timeTag: TimeTag): boolean => {
const { timekeeper, regularCheckInterval } = this;
private isStale = ({ time, lastTimeChecked, checkInterval }: TimeTag): boolean => {
const { timekeeper } = this;
const now = timekeeper.now().valueOf();
return !timeTag.time || now - timeTag.lastTimeChecked.valueOf() > regularCheckInterval;
return !time || now - lastTimeChecked.valueOf() > checkInterval;
}

private doChecks = (): void => {
Expand Down
7 changes: 4 additions & 3 deletions src/server/utils/settings-manager/settings-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import { Dataset, External } from "plywood";
import { Logger } from "../../../common/logger/logger";
import { AppSettings } from "../../../common/models/app-settings/app-settings";
import { Cluster } from "../../../common/models/cluster/cluster";
import { Cluster, DEFAULT_SOURCE_TIME_BOUNDARY_REFRESH_INTERVAL } from "../../../common/models/cluster/cluster";
import { DataCube, fromClusterAndExternal } from "../../../common/models/data-cube/data-cube";
import { attachDatasetExecutor, attachExternalExecutor } from "../../../common/models/data-cube/queryable-data-cube";
import {
Expand Down Expand Up @@ -205,7 +205,8 @@ export class SettingsManager {
const queryableDataCube = attachDatasetExecutor(dataCube, changedDataset);

if (queryableDataCube.refreshRule.isQuery()) {
this.timeMonitor.addCheck(queryableDataCube);
// TODO: Maybe we have better default for native clusters?
this.timeMonitor.addCheck(queryableDataCube, DEFAULT_SOURCE_TIME_BOUNDARY_REFRESH_INTERVAL);
}

this.sources = addOrUpdateDataCube(sources, queryableDataCube);
Expand All @@ -224,7 +225,7 @@ export class SettingsManager {
const queryableDataCube = attachExternalExecutor(dataCube, changedExternal);

if (queryableDataCube.refreshRule.isQuery()) {
this.timeMonitor.addCheck(queryableDataCube);
this.timeMonitor.addCheck(queryableDataCube, cluster.sourceTimeBoundaryRefreshInterval);
}

this.sources = addOrUpdateDataCube(sources, queryableDataCube);
Expand Down