@@ -23,6 +23,7 @@ import { EntityField } from './anomaly_utils';
2323import { MlServerLimits } from '../types/ml_server_info' ;
2424import { JobValidationMessage , JobValidationMessageId } from '../constants/messages' ;
2525import { ES_AGGREGATION , ML_JOB_AGGREGATION } from '../constants/aggregation_types' ;
26+ import { MLCATEGORY } from '../constants/field_types' ;
2627
2728export interface ValidationResults {
2829 valid : boolean ;
@@ -86,9 +87,9 @@ export function isSourceDataChartableForDetector(job: CombinedJob, detectorIndex
8687 // whereas the 'function_description' field holds an ML-built display hint for function e.g. 'count'.
8788 isSourceDataChartable =
8889 mlFunctionToESAggregation ( functionName ) !== null &&
89- dtr . by_field_name !== 'mlcategory' &&
90- dtr . partition_field_name !== 'mlcategory' &&
91- dtr . over_field_name !== 'mlcategory' ;
90+ dtr . by_field_name !== MLCATEGORY &&
91+ dtr . partition_field_name !== MLCATEGORY &&
92+ dtr . over_field_name !== MLCATEGORY ;
9293
9394 // If the datafeed uses script fields, we can only plot the time series if
9495 // model plot is enabled. Without model plot it will be very difficult or impossible
@@ -380,27 +381,72 @@ export function basicJobValidation(
380381 valid = false ;
381382 }
382383 }
383-
384+ let categorizerDetectorMissingPartitionField = false ;
384385 if ( job . analysis_config . detectors . length === 0 ) {
385386 messages . push ( { id : 'detectors_empty' } ) ;
386387 valid = false ;
387388 } else {
388389 let v = true ;
390+
389391 each ( job . analysis_config . detectors , ( d ) => {
390392 if ( isEmpty ( d . function ) ) {
391393 v = false ;
392394 }
395+ // if detector has an ml category, check if the partition_field is missing
396+ const needToHavePartitionFieldName =
397+ job . analysis_config . per_partition_categorization ?. enabled === true &&
398+ ( d . by_field_name === MLCATEGORY || d . over_field_name === MLCATEGORY ) ;
399+
400+ if ( needToHavePartitionFieldName && d . partition_field_name === undefined ) {
401+ categorizerDetectorMissingPartitionField = true ;
402+ }
393403 } ) ;
394404 if ( v ) {
395405 messages . push ( { id : 'detectors_function_not_empty' } ) ;
396406 } else {
397407 messages . push ( { id : 'detectors_function_empty' } ) ;
398408 valid = false ;
399409 }
410+ if ( categorizerDetectorMissingPartitionField ) {
411+ messages . push ( { id : 'categorizer_detector_missing_per_partition_field' } ) ;
412+ valid = false ;
413+ }
400414 }
401415
402- // check for duplicate detectors
403416 if ( job . analysis_config . detectors . length >= 2 ) {
417+ // check if the detectors with mlcategory might have different per_partition_field values
418+ // if per_partition_categorization is enabled
419+ if ( job . analysis_config . per_partition_categorization !== undefined ) {
420+ if (
421+ job . analysis_config . per_partition_categorization . enabled ||
422+ ( job . analysis_config . per_partition_categorization . stop_on_warn &&
423+ Array . isArray ( job . analysis_config . detectors ) &&
424+ job . analysis_config . detectors . length >= 2 )
425+ ) {
426+ const categorizationDetectors = job . analysis_config . detectors . filter (
427+ ( d ) =>
428+ d . by_field_name === MLCATEGORY ||
429+ d . over_field_name === MLCATEGORY ||
430+ d . partition_field_name === MLCATEGORY
431+ ) ;
432+ const uniqPartitions = [
433+ ...new Set (
434+ categorizationDetectors
435+ . map ( ( d ) => d . partition_field_name )
436+ . filter ( ( name ) => name !== undefined )
437+ ) ,
438+ ] ;
439+ if ( uniqPartitions . length > 1 ) {
440+ valid = false ;
441+ messages . push ( {
442+ id : 'categorizer_varying_per_partition_fields' ,
443+ fields : uniqPartitions . join ( ', ' ) ,
444+ } ) ;
445+ }
446+ }
447+ }
448+
449+ // check for duplicate detectors
404450 // create an array of objects with a subset of the attributes
405451 // where we want to make sure they are not be the same across detectors
406452 const compareSubSet = job . analysis_config . detectors . map ( ( d ) =>
0 commit comments