From 38c678620d73b27f4471c9d8cf7b8053969102a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Morales?= Date: Fri, 29 Nov 2024 09:20:01 -0600 Subject: [PATCH] move dataset test to basic --- tests/python_package_test/test_basic.py | 19 +++++++++++++++++++ tests/python_package_test/test_engine.py | 20 -------------------- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py index d6ea4cdfdb25..b6b213751bd0 100644 --- a/tests/python_package_test/test_basic.py +++ b/tests/python_package_test/test_basic.py @@ -964,3 +964,22 @@ def test_no_copy_in_dataset_from_numpy_2d(rng, order, dtype): else: # makes a copy assert not np.shares_memory(X, X1d) + + +def test_equal_datasets_from_row_major_and_col_major_data(tmp_path): + # row-major dataset + X_row, y = make_blobs(n_samples=1_000, n_features=1, centers=2) + assert X_row.flags["C_CONTIGUOUS"] + ds_row = lgb.Dataset(X_row, y) + ds_row_path = tmp_path / "ds_row.txt" + ds_row._dump_text(ds_row_path) + + # col-major dataset + X_col = np.asfortranarray(X_row) + assert X_col.flags["F_CONTIGUOUS"] + ds_col = lgb.Dataset(X_col, y) + ds_col_path = tmp_path / "ds_col.txt" + ds_col._dump_text(ds_col_path) + + # check datasets are equal + assert filecmp.cmp(ds_row_path, ds_col_path) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index fe7024dae63d..9ae471e7f4b9 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -1,6 +1,5 @@ # coding: utf-8 import copy -import filecmp import itertools import json import math @@ -4612,22 +4611,3 @@ def test_bagging_by_query_in_lambdarank(): ndcg_score_no_bagging_by_query = gbm_no_bagging_by_query.best_score["valid_0"]["ndcg@5"] assert ndcg_score_bagging_by_query >= ndcg_score - 0.1 assert ndcg_score_no_bagging_by_query >= ndcg_score - 0.1 - - -def test_equal_datasets_from_row_major_and_col_major_data(tmp_path): - # row-major dataset - X_row, y = make_synthetic_regression() - assert X_row.flags["C_CONTIGUOUS"] - ds_row = lgb.Dataset(X_row, y) - ds_row_path = tmp_path / "ds_row.txt" - ds_row._dump_text(ds_row_path) - - # col-major dataset - X_col = np.asfortranarray(X_row) - assert X_col.flags["F_CONTIGUOUS"] - ds_col = lgb.Dataset(X_col, y) - ds_col_path = tmp_path / "ds_col.txt" - ds_col._dump_text(ds_col_path) - - # check datasets are equal - assert filecmp.cmp(ds_row_path, ds_col_path)