Skip to content

Commit 3469e16

Browse files
authored
[ML] Add option for per-partition categorization to categorization job wizard (#75061)
1 parent b944dd3 commit 3469e16

File tree

33 files changed

+1230
-34
lines changed

33 files changed

+1230
-34
lines changed

x-pack/plugins/ml/common/constants/anomalies.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,5 @@ export enum ANOMALY_THRESHOLD {
2222
}
2323

2424
export const PARTITION_FIELDS = ['partition_field', 'over_field', 'by_field'] as const;
25+
export const JOB_ID = 'job_id';
26+
export const PARTITION_FIELD_VALUE = 'partition_field_value';

x-pack/plugins/ml/common/constants/messages.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,34 @@ export const getMessages = once(() => {
4343
const createJobsDocsUrl = `https://www.elastic.co/guide/en/machine-learning/{{version}}/create-jobs.html`;
4444

4545
return {
46+
categorizer_detector_missing_per_partition_field: {
47+
status: VALIDATION_STATUS.ERROR,
48+
text: i18n.translate(
49+
'xpack.ml.models.jobValidation.messages.categorizerMissingPerPartitionFieldMessage',
50+
{
51+
defaultMessage:
52+
'Partition field must be set for detectors that reference "mlcategory" when per-partition categorization is enabled.',
53+
}
54+
),
55+
url:
56+
'https://www.elastic.co/guide/en/machine-learning/{{version}}/ml-configuring-categories.html',
57+
},
58+
categorizer_varying_per_partition_fields: {
59+
status: VALIDATION_STATUS.ERROR,
60+
text: i18n.translate(
61+
'xpack.ml.models.jobValidation.messages.categorizerVaryingPerPartitionFieldNamesMessage',
62+
{
63+
defaultMessage:
64+
'Detectors with keyword "mlcategory" cannot have different partition_field_name when per-partition categorization is enabled. Found [{fields}].',
65+
66+
values: {
67+
fields: '"{{fields}}"',
68+
},
69+
}
70+
),
71+
url:
72+
'https://www.elastic.co/guide/en/machine-learning/{{version}}/ml-configuring-categories.html',
73+
},
4674
field_not_aggregatable: {
4775
status: VALIDATION_STATUS.ERROR,
4876
text: i18n.translate('xpack.ml.models.jobValidation.messages.fieldNotAggregatableMessage', {

x-pack/plugins/ml/common/types/anomalies.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,20 @@ export interface AnomaliesTableRecord {
5757
}
5858

5959
export type PartitionFieldsType = typeof PARTITION_FIELDS[number];
60+
61+
export interface AnomalyCategorizerStatsDoc {
62+
[key: string]: any;
63+
job_id: string;
64+
result_type: 'categorizer_stats';
65+
partition_field_name?: string;
66+
partition_field_value?: string;
67+
categorized_doc_count: number;
68+
total_category_count: number;
69+
frequent_category_count: number;
70+
rare_category_count: number;
71+
dead_category_count: number;
72+
failed_category_count: number;
73+
categorization_status: 'ok' | 'warn';
74+
log_time: number;
75+
timestamp: number;
76+
}

x-pack/plugins/ml/common/types/anomaly_detection_jobs/job.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,6 @@ export interface CustomRule {
9393
}
9494

9595
export interface PerPartitionCategorization {
96-
enabled: boolean;
96+
enabled?: boolean;
9797
stop_on_warn?: boolean;
9898
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
export interface GetStoppedPartitionResult {
8+
jobs: string[] | Record<string, string[]>;
9+
}

x-pack/plugins/ml/common/util/job_utils.ts

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import { EntityField } from './anomaly_utils';
2323
import { MlServerLimits } from '../types/ml_server_info';
2424
import { JobValidationMessage, JobValidationMessageId } from '../constants/messages';
2525
import { ES_AGGREGATION, ML_JOB_AGGREGATION } from '../constants/aggregation_types';
26+
import { MLCATEGORY } from '../constants/field_types';
2627

2728
export interface ValidationResults {
2829
valid: boolean;
@@ -86,9 +87,9 @@ export function isSourceDataChartableForDetector(job: CombinedJob, detectorIndex
8687
// whereas the 'function_description' field holds an ML-built display hint for function e.g. 'count'.
8788
isSourceDataChartable =
8889
mlFunctionToESAggregation(functionName) !== null &&
89-
dtr.by_field_name !== 'mlcategory' &&
90-
dtr.partition_field_name !== 'mlcategory' &&
91-
dtr.over_field_name !== 'mlcategory';
90+
dtr.by_field_name !== MLCATEGORY &&
91+
dtr.partition_field_name !== MLCATEGORY &&
92+
dtr.over_field_name !== MLCATEGORY;
9293

9394
// If the datafeed uses script fields, we can only plot the time series if
9495
// model plot is enabled. Without model plot it will be very difficult or impossible
@@ -380,27 +381,72 @@ export function basicJobValidation(
380381
valid = false;
381382
}
382383
}
383-
384+
let categorizerDetectorMissingPartitionField = false;
384385
if (job.analysis_config.detectors.length === 0) {
385386
messages.push({ id: 'detectors_empty' });
386387
valid = false;
387388
} else {
388389
let v = true;
390+
389391
each(job.analysis_config.detectors, (d) => {
390392
if (isEmpty(d.function)) {
391393
v = false;
392394
}
395+
// if detector has an ml category, check if the partition_field is missing
396+
const needToHavePartitionFieldName =
397+
job.analysis_config.per_partition_categorization?.enabled === true &&
398+
(d.by_field_name === MLCATEGORY || d.over_field_name === MLCATEGORY);
399+
400+
if (needToHavePartitionFieldName && d.partition_field_name === undefined) {
401+
categorizerDetectorMissingPartitionField = true;
402+
}
393403
});
394404
if (v) {
395405
messages.push({ id: 'detectors_function_not_empty' });
396406
} else {
397407
messages.push({ id: 'detectors_function_empty' });
398408
valid = false;
399409
}
410+
if (categorizerDetectorMissingPartitionField) {
411+
messages.push({ id: 'categorizer_detector_missing_per_partition_field' });
412+
valid = false;
413+
}
400414
}
401415

402-
// check for duplicate detectors
403416
if (job.analysis_config.detectors.length >= 2) {
417+
// check if the detectors with mlcategory might have different per_partition_field values
418+
// if per_partition_categorization is enabled
419+
if (job.analysis_config.per_partition_categorization !== undefined) {
420+
if (
421+
job.analysis_config.per_partition_categorization.enabled ||
422+
(job.analysis_config.per_partition_categorization.stop_on_warn &&
423+
Array.isArray(job.analysis_config.detectors) &&
424+
job.analysis_config.detectors.length >= 2)
425+
) {
426+
const categorizationDetectors = job.analysis_config.detectors.filter(
427+
(d) =>
428+
d.by_field_name === MLCATEGORY ||
429+
d.over_field_name === MLCATEGORY ||
430+
d.partition_field_name === MLCATEGORY
431+
);
432+
const uniqPartitions = [
433+
...new Set(
434+
categorizationDetectors
435+
.map((d) => d.partition_field_name)
436+
.filter((name) => name !== undefined)
437+
),
438+
];
439+
if (uniqPartitions.length > 1) {
440+
valid = false;
441+
messages.push({
442+
id: 'categorizer_varying_per_partition_fields',
443+
fields: uniqPartitions.join(', '),
444+
});
445+
}
446+
}
447+
}
448+
449+
// check for duplicate detectors
404450
// create an array of objects with a subset of the attributes
405451
// where we want to make sure they are not be the same across detectors
406452
const compareSubSet = job.analysis_config.detectors.map((d) =>

x-pack/plugins/ml/public/application/explorer/explorer.js

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ export class Explorer extends React.Component {
205205
updateLanguage = (language) => this.setState({ language });
206206

207207
render() {
208-
const { showCharts, severity } = this.props;
208+
const { showCharts, severity, stoppedPartitions } = this.props;
209209

210210
const {
211211
annotations,
@@ -298,6 +298,23 @@ export class Explorer extends React.Component {
298298

299299
<div className={mainColumnClasses}>
300300
<EuiSpacer size="m" />
301+
302+
{stoppedPartitions && (
303+
<EuiCallOut
304+
size={'s'}
305+
title={
306+
<FormattedMessage
307+
id="xpack.ml.explorer.stoppedPartitionsExistCallout"
308+
defaultMessage="There may be fewer results than there could have been because stop_on_warn is turned on. Both categorization and subsequent anomaly detection have stopped for some partitions in {jobsWithStoppedPartitions, plural, one {job} other {jobs}} [{stoppedPartitions}] where the categorization status has changed to warn."
309+
values={{
310+
jobsWithStoppedPartitions: stoppedPartitions.length,
311+
stoppedPartitions: stoppedPartitions.join(', '),
312+
}}
313+
/>
314+
}
315+
/>
316+
)}
317+
301318
<AnomalyTimeline
302319
explorerState={this.props.explorerState}
303320
setSelectedCells={this.props.setSelectedCells}

x-pack/plugins/ml/public/application/jobs/new_job/common/job_creator/categorization_job_creator.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ export class CategorizationJobCreator extends JobCreator {
4141
ML_JOB_AGGREGATION.COUNT;
4242
private _categorizationAnalyzer: CategorizationAnalyzer = {};
4343
private _defaultCategorizationAnalyzer: CategorizationAnalyzer;
44+
private _partitionFieldName: string | null = null;
4445

4546
constructor(
4647
indexPattern: IndexPattern,
@@ -75,6 +76,11 @@ export class CategorizationJobCreator extends JobCreator {
7576
private _createDetector(agg: Aggregation, field: Field) {
7677
const dtr: Detector = createBasicDetector(agg, field);
7778
dtr.by_field_name = mlCategory.id;
79+
80+
// API requires if per_partition_categorization is enabled, add partition field to the detector
81+
if (this.perPartitionCategorization && this.categorizationPerPartitionField !== null) {
82+
dtr.partition_field_name = this.categorizationPerPartitionField;
83+
}
7884
this._addDetector(dtr, agg, mlCategory);
7985
}
8086

@@ -173,4 +179,29 @@ export class CategorizationJobCreator extends JobCreator {
173179
this.bucketSpan = bs;
174180
}
175181
}
182+
183+
public get categorizationPerPartitionField() {
184+
return this._partitionFieldName;
185+
}
186+
187+
public set categorizationPerPartitionField(fieldName: string | null) {
188+
if (fieldName === null) {
189+
this._detectors.forEach((detector) => {
190+
delete detector.partition_field_name;
191+
});
192+
if (this._partitionFieldName !== null) this.removeInfluencer(this._partitionFieldName);
193+
this._partitionFieldName = null;
194+
} else {
195+
if (this._partitionFieldName !== fieldName) {
196+
// remove the previous field from list of influencers
197+
// and add the new one
198+
if (this._partitionFieldName !== null) this.removeInfluencer(this._partitionFieldName);
199+
this.addInfluencer(fieldName);
200+
this._partitionFieldName = fieldName;
201+
this._detectors.forEach((detector) => {
202+
detector.partition_field_name = fieldName;
203+
});
204+
}
205+
}
206+
}
176207
}

x-pack/plugins/ml/public/application/jobs/new_job/common/job_creator/job_creator.ts

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,36 @@ export class JobCreator {
622622
return JSON.stringify(this._datafeed_config, null, 2);
623623
}
624624

625+
private _initPerPartitionCategorization() {
626+
if (this._job_config.analysis_config.per_partition_categorization === undefined) {
627+
this._job_config.analysis_config.per_partition_categorization = {};
628+
}
629+
if (this._job_config.analysis_config.per_partition_categorization?.enabled === undefined) {
630+
this._job_config.analysis_config.per_partition_categorization!.enabled = false;
631+
}
632+
if (this._job_config.analysis_config.per_partition_categorization?.stop_on_warn === undefined) {
633+
this._job_config.analysis_config.per_partition_categorization!.stop_on_warn = false;
634+
}
635+
}
636+
637+
public get perPartitionCategorization() {
638+
return this._job_config.analysis_config.per_partition_categorization?.enabled === true;
639+
}
640+
641+
public set perPartitionCategorization(enabled: boolean) {
642+
this._initPerPartitionCategorization();
643+
this._job_config.analysis_config.per_partition_categorization!.enabled = enabled;
644+
}
645+
646+
public get perPartitionStopOnWarn() {
647+
return this._job_config.analysis_config.per_partition_categorization?.stop_on_warn === true;
648+
}
649+
650+
public set perPartitionStopOnWarn(enabled: boolean) {
651+
this._initPerPartitionCategorization();
652+
this._job_config.analysis_config.per_partition_categorization!.stop_on_warn = enabled;
653+
}
654+
625655
protected _overrideConfigs(job: Job, datafeed: Datafeed) {
626656
this._job_config = job;
627657
this._datafeed_config = datafeed;

x-pack/plugins/ml/public/application/jobs/new_job/common/job_validator/job_validator.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ export interface BasicValidations {
5151
queryDelay: Validation;
5252
frequency: Validation;
5353
scrollSize: Validation;
54+
categorizerMissingPerPartition: Validation;
55+
categorizerVaryingPerPartitionField: Validation;
5456
}
5557

5658
export interface AdvancedValidations {
@@ -76,6 +78,8 @@ export class JobValidator {
7678
queryDelay: { valid: true },
7779
frequency: { valid: true },
7880
scrollSize: { valid: true },
81+
categorizerMissingPerPartition: { valid: true },
82+
categorizerVaryingPerPartitionField: { valid: true },
7983
};
8084
private _advancedValidations: AdvancedValidations = {
8185
categorizationFieldValid: { valid: true },
@@ -273,6 +277,14 @@ export class JobValidator {
273277
this._advancedValidations.categorizationFieldValid.valid = valid;
274278
}
275279

280+
public get categorizerMissingPerPartition() {
281+
return this._basicValidations.categorizerMissingPerPartition;
282+
}
283+
284+
public get categorizerVaryingPerPartitionField() {
285+
return this._basicValidations.categorizerVaryingPerPartitionField;
286+
}
287+
276288
/**
277289
* Indicates if the Pick Fields step has a valid input
278290
*/
@@ -283,6 +295,8 @@ export class JobValidator {
283295
(this._jobCreator.type === JOB_TYPE.ADVANCED && this.modelMemoryLimit.valid)) &&
284296
this.bucketSpan.valid &&
285297
this.duplicateDetectors.valid &&
298+
this.categorizerMissingPerPartition.valid &&
299+
this.categorizerVaryingPerPartitionField.valid &&
286300
!this.validating &&
287301
(this._jobCreator.type !== JOB_TYPE.CATEGORIZATION ||
288302
(this._jobCreator.type === JOB_TYPE.CATEGORIZATION && this.categorizationField))

0 commit comments

Comments
 (0)