pandas-dev
diff --git a/‎.circleci/config.yml
Lines changed: 5 additions & 0 deletions b/‎.circleci/config.yml
Lines changed: 5 additions & 0 deletions
diff --git a/‎.github/workflows/wheels.yml
Lines changed: 14 additions & 1 deletion b/‎.github/workflows/wheels.yml
Lines changed: 14 additions & 1 deletion
diff --git a/‎doc/source/whatsnew/v2.1.2.rst
Lines changed: 1 addition & 0 deletions b/‎doc/source/whatsnew/v2.1.2.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/whatsnew/v2.2.0.rst
Lines changed: 3 additions & 1 deletion b/‎doc/source/whatsnew/v2.2.0.rst
Lines changed: 3 additions & 1 deletion
diff --git a/‎pandas/_libs/join.pyi
Lines changed: 6 additions & 6 deletions b/‎pandas/_libs/join.pyi
Lines changed: 6 additions & 6 deletions
diff --git a/‎pandas/_libs/join.pyx
Lines changed: 11 additions & 34 deletions b/‎pandas/_libs/join.pyx
Lines changed: 11 additions & 34 deletions
diff --git a/‎pandas/_libs/tslibs/offsets.pyx
Lines changed: 0 additions & 1 deletion b/‎pandas/_libs/tslibs/offsets.pyx
Lines changed: 0 additions & 1 deletion
diff --git a/‎pandas/core/groupby/generic.py
Lines changed: 5 additions & 2 deletions b/‎pandas/core/groupby/generic.py
Lines changed: 5 additions & 2 deletions
diff --git a/‎pandas/core/reshape/merge.py
Lines changed: 24 additions & 41 deletions b/‎pandas/core/reshape/merge.py
Lines changed: 24 additions & 41 deletions
diff --git a/‎pandas/tests/groupby/test_numba.py
Lines changed: 12 additions & 0 deletions b/‎pandas/tests/groupby/test_numba.py
Lines changed: 12 additions & 0 deletions
@@ -49,7 +49,12 @@ jobs:
           no_output_timeout: 30m # Sometimes the tests won't generate any output, make sure the job doesn't get killed by that
           command: |
             pip3 install cibuildwheel==2.15.0
+            # When this is a nightly wheel build, allow picking up NumPy 2.0 dev wheels:
+            if [[ "$IS_SCHEDULE_DISPATCH" == "true" || "$IS_PUSH" != 'true' ]]; then
+                export CIBW_ENVIRONMENT="PIP_EXTRA_INDEX_URL=https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
+            fi
             cibuildwheel --prerelease-pythons --output-dir wheelhouse
+
           environment:
             CIBW_BUILD: << parameters.cibw-build >>
 
 
@@ -137,14 +137,27 @@ jobs:
         shell: bash -el {0}
         run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
 
-      - name: Build wheels
+      - name: Build normal wheels
+        if: ${{ (env.IS_SCHEDULE_DISPATCH != 'true' || env.IS_PUSH == 'true') }}
         uses: pypa/cibuildwheel@v2.16.2
         with:
          package-dir: ./dist/${{ matrix.buildplat[1] == 'macosx_*' && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
         env:
           CIBW_PRERELEASE_PYTHONS: True
           CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
 
+      - name: Build nightly wheels (with NumPy pre-release)
+        if: ${{ (env.IS_SCHEDULE_DISPATCH == 'true' && env.IS_PUSH != 'true') }}
+        uses: pypa/cibuildwheel@v2.16.2
+        with:
+         package-dir: ./dist/${{ matrix.buildplat[1] == 'macosx_*' && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
+        env:
+          # The nightly wheels should be build witht he NumPy 2.0 pre-releases
+          # which requires the additional URL.
+          CIBW_ENVIRONMENT: PIP_EXTRA_INDEX_URL=https://pypi.anaconda.org/scientific-python-nightly-wheels/simple
+          CIBW_PRERELEASE_PYTHONS: True
+          CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
+
       - name: Set up Python
         uses: mamba-org/setup-micromamba@v1
         with:
 
@@ -16,6 +16,7 @@ Fixed regressions
 - Fixed regression in :meth:`DataFrame.join` where result has missing values and dtype is arrow backed string (:issue:`55348`)
 - Fixed regression in :meth:`DataFrame.resample` which was extrapolating back to ``origin`` when ``origin`` was outside its bounds (:issue:`55064`)
 - Fixed regression in :meth:`DataFrame.sort_index` which was not sorting correctly when the index was a sliced :class:`MultiIndex` (:issue:`55379`)
+- Fixed regression in :meth:`DataFrameGroupBy.agg` and :meth:`SeriesGroupBy.agg` where if the option ``compute.use_numba`` was set to True, groupby methods not supported by the numba engine would raise a ``TypeError`` (:issue:`55520`)
 - Fixed performance regression with wide DataFrames, typically involving methods where all columns were accessed individually (:issue:`55256`, :issue:`55245`)
 - Fixed regression in :func:`merge_asof` raising ``TypeError`` for ``by`` with datetime and timedelta dtypes (:issue:`55453`)
 
 
@@ -296,7 +296,7 @@ Other Deprecations
 Performance improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 - Performance improvement in :func:`concat` with ``axis=1`` and objects with unaligned indexes (:issue:`55084`)
-- Performance improvement in :func:`merge_asof` when ``by`` contains more than one key (:issue:`55580`)
+- Performance improvement in :func:`merge_asof` when ``by`` is not ``None`` (:issue:`55580`, :issue:`55678`)
 - Performance improvement in :func:`read_stata` for files with many variables (:issue:`55515`)
 - Performance improvement in :func:`to_dict` on converting DataFrame to dictionary (:issue:`50990`)
 - Performance improvement in :meth:`DataFrame.groupby` when aggregating pyarrow timestamp and duration dtypes (:issue:`55031`)
@@ -324,6 +324,7 @@ Datetimelike
 - Bug in :meth:`DatetimeIndex.union` returning object dtype for tz-aware indexes with the same timezone but different units (:issue:`55238`)
 - Bug in :meth:`Tick.delta` with very large ticks raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
 - Bug in adding or subtracting a :class:`Week` offset to a ``datetime64`` :class:`Series`, :class:`Index`, or :class:`DataFrame` column with non-nanosecond resolution returning incorrect results (:issue:`55583`)
+- Bug in addition or subtraction of :class:`BusinessDay` offset with ``offset`` attribute to non-nanosecond :class:`Index`, :class:`Series`, or :class:`DataFrame` column giving incorrect results (:issue:`55608`)
 - Bug in addition or subtraction of :class:`DateOffset` objects with microsecond components to ``datetime64`` :class:`Index`, :class:`Series`, or :class:`DataFrame` columns with non-nanosecond resolution (:issue:`55595`)
 - Bug in addition or subtraction of very large :class:`Tick` objects with :class:`Timestamp` or :class:`Timedelta` objects raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
 -
@@ -411,6 +412,7 @@ Groupby/resample/rolling
 Reshaping
 ^^^^^^^^^
 - Bug in :func:`concat` ignoring ``sort`` parameter when passed :class:`DatetimeIndex` indexes (:issue:`54769`)
+- Bug in :func:`merge_asof` raising ``TypeError`` when ``by`` dtype is not ``object``, ``int64``, or ``uint64`` (:issue:`22794`)
 - Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`)
 - Bug in :meth:`pandas.DataFrame.melt` where it would not preserve the datetime (:issue:`55254`)
 -
 
@@ -53,26 +53,26 @@ def outer_join_indexer(
 def asof_join_backward_on_X_by_Y(
     left_values: np.ndarray,  # ndarray[numeric_t]
     right_values: np.ndarray,  # ndarray[numeric_t]
-    left_by_values: np.ndarray,  # ndarray[by_t]
-    right_by_values: np.ndarray,  # ndarray[by_t]
+    left_by_values: np.ndarray,  # const int64_t[:]
+    right_by_values: np.ndarray,  # const int64_t[:]
     allow_exact_matches: bool = ...,
     tolerance: np.number | float | None = ...,
     use_hashtable: bool = ...,
 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
 def asof_join_forward_on_X_by_Y(
     left_values: np.ndarray,  # ndarray[numeric_t]
     right_values: np.ndarray,  # ndarray[numeric_t]
-    left_by_values: np.ndarray,  # ndarray[by_t]
-    right_by_values: np.ndarray,  # ndarray[by_t]
+    left_by_values: np.ndarray,  # const int64_t[:]
+    right_by_values: np.ndarray,  # const int64_t[:]
     allow_exact_matches: bool = ...,
     tolerance: np.number | float | None = ...,
     use_hashtable: bool = ...,
 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
 def asof_join_nearest_on_X_by_Y(
     left_values: np.ndarray,  # ndarray[numeric_t]
     right_values: np.ndarray,  # ndarray[numeric_t]
-    left_by_values: np.ndarray,  # ndarray[by_t]
-    right_by_values: np.ndarray,  # ndarray[by_t]
+    left_by_values: np.ndarray,  # const int64_t[:]
+    right_by_values: np.ndarray,  # const int64_t[:]
     allow_exact_matches: bool = ...,
     tolerance: np.number | float | None = ...,
     use_hashtable: bool = ...,
 
@@ -7,7 +7,6 @@ from numpy cimport (
     int64_t,
     intp_t,
     ndarray,
-    uint64_t,
 )
 
 cnp.import_array()
@@ -679,23 +678,13 @@ def outer_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
 # asof_join_by
 # ----------------------------------------------------------------------
 
-from pandas._libs.hashtable cimport (
-    HashTable,
-    Int64HashTable,
-    PyObjectHashTable,
-    UInt64HashTable,
-)
-
-ctypedef fused by_t:
-    object
-    int64_t
-    uint64_t
+from pandas._libs.hashtable cimport Int64HashTable
 
 
 def asof_join_backward_on_X_by_Y(ndarray[numeric_t] left_values,
                                  ndarray[numeric_t] right_values,
-                                 ndarray[by_t] left_by_values,
-                                 ndarray[by_t] right_by_values,
+                                 const int64_t[:] left_by_values,
+                                 const int64_t[:] right_by_values,
                                  bint allow_exact_matches=True,
                                  tolerance=None,
                                  bint use_hashtable=True):
@@ -706,8 +695,7 @@ def asof_join_backward_on_X_by_Y(ndarray[numeric_t] left_values,
         bint has_tolerance = False
         numeric_t tolerance_ = 0
         numeric_t diff = 0
-        HashTable hash_table
-        by_t by_value
+        Int64HashTable hash_table
 
     # if we are using tolerance, set our objects
     if tolerance is not None:
@@ -721,12 +709,7 @@ def asof_join_backward_on_X_by_Y(ndarray[numeric_t] left_values,
     right_indexer = np.empty(left_size, dtype=np.intp)
 
     if use_hashtable:
-        if by_t is object:
-            hash_table = PyObjectHashTable(right_size)
-        elif by_t is int64_t:
-            hash_table = Int64HashTable(right_size)
-        elif by_t is uint64_t:
-            hash_table = UInt64HashTable(right_size)
+        hash_table = Int64HashTable(right_size)
 
     right_pos = 0
     for left_pos in range(left_size):
@@ -771,8 +754,8 @@ def asof_join_backward_on_X_by_Y(ndarray[numeric_t] left_values,
 
 def asof_join_forward_on_X_by_Y(ndarray[numeric_t] left_values,
                                 ndarray[numeric_t] right_values,
-                                ndarray[by_t] left_by_values,
-                                ndarray[by_t] right_by_values,
+                                const int64_t[:] left_by_values,
+                                const int64_t[:] right_by_values,
                                 bint allow_exact_matches=1,
                                 tolerance=None,
                                 bint use_hashtable=True):
@@ -783,8 +766,7 @@ def asof_join_forward_on_X_by_Y(ndarray[numeric_t] left_values,
         bint has_tolerance = False
         numeric_t tolerance_ = 0
         numeric_t diff = 0
-        HashTable hash_table
-        by_t by_value
+        Int64HashTable hash_table
 
     # if we are using tolerance, set our objects
     if tolerance is not None:
@@ -798,12 +780,7 @@ def asof_join_forward_on_X_by_Y(ndarray[numeric_t] left_values,
     right_indexer = np.empty(left_size, dtype=np.intp)
 
     if use_hashtable:
-        if by_t is object:
-            hash_table = PyObjectHashTable(right_size)
-        elif by_t is int64_t:
-            hash_table = Int64HashTable(right_size)
-        elif by_t is uint64_t:
-            hash_table = UInt64HashTable(right_size)
+        hash_table = Int64HashTable(right_size)
 
     right_pos = right_size - 1
     for left_pos in range(left_size - 1, -1, -1):
@@ -849,8 +826,8 @@ def asof_join_forward_on_X_by_Y(ndarray[numeric_t] left_values,
 
 def asof_join_nearest_on_X_by_Y(ndarray[numeric_t] left_values,
                                 ndarray[numeric_t] right_values,
-                                ndarray[by_t] left_by_values,
-                                ndarray[by_t] right_by_values,
+                                const int64_t[:] left_by_values,
+                                const int64_t[:] right_by_values,
                                 bint allow_exact_matches=True,
                                 tolerance=None,
                                 bint use_hashtable=True):
 
@@ -1850,7 +1850,6 @@ cdef class BusinessDay(BusinessMixin):
         res = self._shift_bdays(i8other, reso=reso)
         if self.offset:
             res = res.view(dtarr.dtype) + Timedelta(self.offset)
-            res = res.view("i8")
         return res
 
     def is_on_offset(self, dt: datetime) -> bool:
 
@@ -236,10 +236,13 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
             kwargs = {}
 
         if isinstance(func, str):
-            if maybe_use_numba(engine):
+            if maybe_use_numba(engine) and engine is not None:
                 # Not all agg functions support numba, only propagate numba kwargs
-                # if user asks for numba
+                # if user asks for numba, and engine is not None
+                # (if engine is None, the called function will handle the case where
+                # numba is requested via the global option)
                 kwargs["engine"] = engine
+            if engine_kwargs is not None:
                 kwargs["engine_kwargs"] = engine_kwargs
             return getattr(self, func)(*args, **kwargs)
 
 
@@ -2153,54 +2153,37 @@ def _get_join_indexers(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]
         if self.left_by is not None:
             # remove 'on' parameter from values if one existed
             if self.left_index and self.right_index:
-                left_by_values = self.left_join_keys
-                right_by_values = self.right_join_keys
+                left_join_keys = self.left_join_keys
+                right_join_keys = self.right_join_keys
             else:
-                left_by_values = self.left_join_keys[0:-1]
-                right_by_values = self.right_join_keys[0:-1]
-
-            # get tuple representation of values if more than one
-            if len(left_by_values) == 1:
-                lbv = left_by_values[0]
-                rbv = right_by_values[0]
-
-                # TODO: conversions for EAs that can be no-copy.
-                lbv = np.asarray(lbv)
-                rbv = np.asarray(rbv)
-                if needs_i8_conversion(lbv.dtype):
-                    lbv = lbv.view("i8")
-                if needs_i8_conversion(rbv.dtype):
-                    rbv = rbv.view("i8")
+                left_join_keys = self.left_join_keys[0:-1]
+                right_join_keys = self.right_join_keys[0:-1]
+
+            mapped = [
+                _factorize_keys(
+                    left_join_keys[n],
+                    right_join_keys[n],
+                    sort=False,
+                    how="left",
+                )
+                for n in range(len(left_join_keys))
+            ]
+
+            if len(left_join_keys) == 1:
+                left_by_values = mapped[0][0]
+                right_by_values = mapped[0][1]
             else:
-                # We get here with non-ndarrays in test_merge_by_col_tz_aware
-                #  and test_merge_groupby_multiple_column_with_categorical_column
-                mapped = [
-                    _factorize_keys(
-                        left_by_values[n],
-                        right_by_values[n],
-                        sort=False,
-                        how="left",
-                    )
-                    for n in range(len(left_by_values))
-                ]
                 arrs = [np.concatenate(m[:2]) for m in mapped]
                 shape = tuple(m[2] for m in mapped)
                 group_index = get_group_index(
                     arrs, shape=shape, sort=False, xnull=False
                 )
-                left_len = len(left_by_values[0])
-                lbv = group_index[:left_len]
-                rbv = group_index[left_len:]
-            # error: Incompatible types in assignment (expression has type
-            # "Union[ndarray[Any, dtype[Any]], ndarray[Any, dtype[object_]]]",
-            # variable has type "List[Union[Union[ExtensionArray,
-            # ndarray[Any, Any]], Index, Series]]")
-            right_by_values = rbv  # type: ignore[assignment]
-            # error: Incompatible types in assignment (expression has type
-            # "Union[ndarray[Any, dtype[Any]], ndarray[Any, dtype[object_]]]",
-            # variable has type "List[Union[Union[ExtensionArray,
-            # ndarray[Any, Any]], Index, Series]]")
-            left_by_values = lbv  # type: ignore[assignment]
+                left_len = len(left_join_keys[0])
+                left_by_values = group_index[:left_len]
+                right_by_values = group_index[left_len:]
+
+            left_by_values = ensure_int64(left_by_values)
+            right_by_values = ensure_int64(right_by_values)
 
             # choose appropriate function by type
             func = _asof_by_function(self.direction)
 
@@ -3,6 +3,7 @@
 from pandas import (
     DataFrame,
     Series,
+    option_context,
 )
 import pandas._testing as tm
 
@@ -66,3 +67,14 @@ def test_axis_1_unsupported(self, numba_supported_reductions):
         gb = df.groupby("a", axis=1)
         with pytest.raises(NotImplementedError, match="axis=1"):
             getattr(gb, func)(engine="numba", **kwargs)
+
+    def test_no_engine_doesnt_raise(self):
+        # GH55520
+        df = DataFrame({"a": [3, 2, 3, 2], "b": range(4), "c": range(1, 5)})
+        gb = df.groupby("a")
+        # Make sure behavior of functions w/out engine argument don't raise
+        # when the global use_numba option is set
+        with option_context("compute.use_numba", True):
+            res = gb.agg({"b": "first"})
+        expected = gb.agg({"b": "first"})
+        tm.assert_frame_equal(res, expected)