Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies = [
# Once Python 3.10 is the minimum version, this can be removed.
"eval-type-backport>=0.2.2",
"joblib>=1.2.0",
"tabpfn-common-utils[telemetry-interactive]>=0.2.8",
"tabpfn-common-utils[telemetry-interactive]>=0.2.11",
]
requires-python = ">=3.9"
authors = [
Expand Down
5 changes: 4 additions & 1 deletion src/tabpfn/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,72 +16,72 @@

# Copyright (c) Prior Labs GmbH 2025.

from __future__ import annotations

import copy
import logging
import warnings
from collections.abc import Callable, Sequence
from pathlib import Path
from typing import TYPE_CHECKING, Annotated, Any, Literal
from typing_extensions import Self, deprecated

import numpy as np
import torch
from sklearn import config_context
from sklearn.base import BaseEstimator, ClassifierMixin, check_is_fitted
from sklearn.preprocessing import LabelEncoder
from tabpfn_common_utils.telemetry import track_model_call
from tabpfn_common_utils.telemetry import track_model_call, set_init_params

from tabpfn.base import (
ClassifierModelSpecs,
check_cpu_warning,
create_inference_engine,
determine_precision,
get_preprocessed_datasets_helper,
initialize_model_variables_helper,
initialize_telemetry,
)
from tabpfn.constants import (
PROBABILITY_EPSILON_ROUND_ZERO,
SKLEARN_16_DECIMAL_PRECISION,
ModelVersion,
XType,
YType,
)
from tabpfn.inference import InferenceEngine, InferenceEngineBatchedNoPreprocessing
from tabpfn.inference_tuning import (
ClassifierEvalMetrics,
ClassifierTuningConfig,
find_optimal_classification_thresholds,
find_optimal_temperature,
get_tuning_splits,
resolve_tuning_config,
)
from tabpfn.model_loading import (
ModelSource,
load_fitted_tabpfn_model,
prepend_cache_path,
save_fitted_tabpfn_model,
)
from tabpfn.preprocessing import (
ClassifierEnsembleConfig,
DatasetCollectionWithPreprocessing,
EnsembleConfig,
PreprocessorConfig,
)
from tabpfn.preprocessors.preprocessing_helpers import get_ordinal_encoder
from tabpfn.utils import (
DevicesSpecification,
balance_probas_by_class_counts,
fix_dtypes,
get_embeddings,
infer_categorical_features,
infer_random_state,
process_text_na_dataframe,
validate_X_predict,
validate_Xy_fit,
)

Check failure on line 84 in src/tabpfn/classifier.py

View workflow job for this annotation

GitHub Actions / Ruff Linting & Formatting

Ruff (I001)

src/tabpfn/classifier.py:19:1: I001 Import block is un-sorted or un-formatted

if TYPE_CHECKING:
import numpy.typing as npt
Expand Down Expand Up @@ -479,6 +479,9 @@
self.tuning_config = tuning_config
initialize_telemetry()

# Only anonymously record `fit_mode` usage
set_init_params({"fit_mode": self.fit_mode})
Comment on lines 480 to +483
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This telemetry initialization logic, including the call to set_init_params, is also present in TabPFNRegressor.__init__. To improve maintainability and reduce code duplication, consider creating a new helper function in src/tabpfn/base.py that encapsulates this logic.

For example, you could create a function in base.py:

from tabpfn_common_utils.telemetry import set_init_params

def initialize_telemetry_with_params(**params: Any) -> None:
    """Initializes telemetry and sets additional anonymous parameters."""
    initialize_telemetry()
    if params:
        set_init_params(params)

Then you could replace these lines in both TabPFNClassifier and TabPFNRegressor with:

        initialize_telemetry_with_params(fit_mode=self.fit_mode)

This would centralize the telemetry setup and make it easier to add more parameters in the future.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we do the same thing as for model_path and validate that it's a known fit mode? To avoid accidentally collecting PII.
We could define FitMode = Literal["low_memory","fit_preprocessors","fit_with_cache","batched"] in inference.py, import it here and in the regressor interface, and then use typing.get_args() to check the provided one is valid?


@classmethod
def create_default_for_version(cls, version: ModelVersion, **overrides) -> Self:
"""Construct a classifier that uses the given version of the model.
Expand Down
5 changes: 4 additions & 1 deletion src/tabpfn/regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,65 +15,65 @@

# Copyright (c) Prior Labs GmbH 2025.

from __future__ import annotations

import logging
import typing
import warnings
from collections.abc import Callable, Sequence
from functools import partial
from pathlib import Path
from typing import TYPE_CHECKING, Annotated, Any, Literal, Union
from typing_extensions import Self, TypedDict, deprecated, overload

import numpy as np
import torch
from sklearn import config_context
from sklearn.base import (
BaseEstimator,
RegressorMixin,
TransformerMixin,
check_is_fitted,
)
from tabpfn_common_utils.telemetry import track_model_call
from tabpfn_common_utils.telemetry import track_model_call, set_init_params

from tabpfn.architectures.base.bar_distribution import FullSupportBarDistribution
from tabpfn.base import (
RegressorModelSpecs,
check_cpu_warning,
create_inference_engine,
determine_precision,
get_preprocessed_datasets_helper,
initialize_model_variables_helper,
initialize_telemetry,
)
from tabpfn.constants import REGRESSION_CONSTANT_TARGET_BORDER_EPSILON, ModelVersion
from tabpfn.inference import InferenceEngine, InferenceEngineBatchedNoPreprocessing
from tabpfn.model_loading import (
ModelSource,
load_fitted_tabpfn_model,
prepend_cache_path,
save_fitted_tabpfn_model,
)
from tabpfn.preprocessing import (
DatasetCollectionWithPreprocessing,
EnsembleConfig,
RegressorEnsembleConfig,
)
from tabpfn.preprocessors import get_all_reshape_feature_distribution_preprocessors
from tabpfn.preprocessors.preprocessing_helpers import get_ordinal_encoder
from tabpfn.utils import (
DevicesSpecification,
fix_dtypes,
get_embeddings,
infer_categorical_features,
infer_random_state,
process_text_na_dataframe,
transform_borders_one,
translate_probs_across_borders,
validate_X_predict,
validate_Xy_fit,
)

Check failure on line 76 in src/tabpfn/regressor.py

View workflow job for this annotation

GitHub Actions / Ruff Linting & Formatting

Ruff (I001)

src/tabpfn/regressor.py:18:1: I001 Import block is un-sorted or un-formatted

if TYPE_CHECKING:
import numpy.typing as npt
Expand Down Expand Up @@ -465,6 +465,9 @@
self.n_preprocessing_jobs = n_preprocessing_jobs
initialize_telemetry()

# Only anonymously record `fit_mode` usage
set_init_params({"fit_mode": self.fit_mode})

@classmethod
def create_default_for_version(cls, version: ModelVersion, **overrides) -> Self:
"""Construct a regressor that uses the given version of the model.
Expand Down
Loading