diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index f0e1d194cd88f..2394b9af2015e 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -101,10 +101,14 @@ class StringDtype(StorageExtensionDtype):
     # base class "StorageExtensionDtype") with class variable
     name: ClassVar[str] = "string"  # type: ignore[misc]
 
-    #: StringDtype().na_value uses pandas.NA
+    #: StringDtype().na_value uses pandas.NA except the implementation that
+    # follows NumPy semantics, which uses nan.
     @property
-    def na_value(self) -> libmissing.NAType:
-        return libmissing.NA
+    def na_value(self) -> libmissing.NAType | float:  # type: ignore[override]
+        if self.storage == "pyarrow_numpy":
+            return np.nan
+        else:
+            return libmissing.NA
 
     _metadata = ("storage",)
 
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index b8f872529bc1a..24d8e43708b91 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -17,6 +17,13 @@
 )
 
 
+def na_val(dtype):
+    if dtype.storage == "pyarrow_numpy":
+        return np.nan
+    else:
+        return pd.NA
+
+
 @pytest.fixture
 def dtype(string_storage):
     """Fixture giving StringDtype from parametrized 'string_storage'"""
@@ -31,26 +38,34 @@ def cls(dtype):
 
 def test_repr(dtype):
     df = pd.DataFrame({"A": pd.array(["a", pd.NA, "b"], dtype=dtype)})
-    expected = "      A\n0     a\n1  <NA>\n2     b"
+    if dtype.storage == "pyarrow_numpy":
+        expected = "     A\n0    a\n1  NaN\n2    b"
+    else:
+        expected = "      A\n0     a\n1  <NA>\n2     b"
     assert repr(df) == expected
 
-    expected = "0       a\n1    <NA>\n2       b\nName: A, dtype: string"
+    if dtype.storage == "pyarrow_numpy":
+        expected = "0      a\n1    NaN\n2      b\nName: A, dtype: string"
+    else:
+        expected = "0       a\n1    <NA>\n2       b\nName: A, dtype: string"
     assert repr(df.A) == expected
 
     if dtype.storage == "pyarrow":
         arr_name = "ArrowStringArray"
+        expected = f"<{arr_name}>\n['a', <NA>, 'b']\nLength: 3, dtype: string"
     elif dtype.storage == "pyarrow_numpy":
         arr_name = "ArrowStringArrayNumpySemantics"
+        expected = f"<{arr_name}>\n['a', nan, 'b']\nLength: 3, dtype: string"
     else:
         arr_name = "StringArray"
-    expected = f"<{arr_name}>\n['a', <NA>, 'b']\nLength: 3, dtype: string"
+        expected = f"<{arr_name}>\n['a', <NA>, 'b']\nLength: 3, dtype: string"
     assert repr(df.A.array) == expected
 
 
 def test_none_to_nan(cls):
     a = cls._from_sequence(["a", None, "b"])
     assert a[1] is not None
-    assert a[1] is pd.NA
+    assert a[1] is na_val(a.dtype)
 
 
 def test_setitem_validates(cls):
@@ -213,13 +228,9 @@ def test_comparison_methods_scalar(comparison_op, dtype):
     other = "a"
     result = getattr(a, op_name)(other)
     if dtype.storage == "pyarrow_numpy":
-        expected = np.array([getattr(item, op_name)(other) for item in a], dtype=object)
-        expected = (
-            pd.array(expected, dtype="boolean")
-            .to_numpy(na_value=False)
-            .astype(np.bool_)
-        )
-        tm.assert_numpy_array_equal(result, expected)
+        expected = np.array([getattr(item, op_name)(other) for item in a])
+        expected[1] = False
+        tm.assert_numpy_array_equal(result, expected.astype(np.bool_))
     else:
         expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean"
         expected = np.array([getattr(item, op_name)(other) for item in a], dtype=object)
@@ -415,7 +426,7 @@ def test_min_max(method, skipna, dtype, request):
         expected = "a" if method == "min" else "c"
         assert result == expected
     else:
-        assert result is pd.NA
+        assert result is na_val(arr.dtype)
 
 
 @pytest.mark.parametrize("method", ["min", "max"])
@@ -483,7 +494,7 @@ def test_arrow_roundtrip(dtype, string_storage2):
     expected = df.astype(f"string[{string_storage2}]")
     tm.assert_frame_equal(result, expected)
     # ensure the missing value is represented by NA and not np.nan or None
-    assert result.loc[2, "a"] is pd.NA
+    assert result.loc[2, "a"] is na_val(result["a"].dtype)
 
 
 def test_arrow_load_from_zero_chunks(dtype, string_storage2):
@@ -581,7 +592,7 @@ def test_astype_from_float_dtype(float_dtype, dtype):
 def test_to_numpy_returns_pdna_default(dtype):
     arr = pd.array(["a", pd.NA, "b"], dtype=dtype)
     result = np.array(arr)
-    expected = np.array(["a", pd.NA, "b"], dtype=object)
+    expected = np.array(["a", na_val(dtype), "b"], dtype=object)
     tm.assert_numpy_array_equal(result, expected)
 
 
@@ -621,7 +632,7 @@ def test_setitem_scalar_with_mask_validation(dtype):
     mask = np.array([False, True, False])
 
     ser[mask] = None
-    assert ser.array[1] is pd.NA
+    assert ser.array[1] is na_val(ser.dtype)
 
     # for other non-string we should also raise an error
     ser = pd.Series(["a", "b", "c"], dtype=dtype)
diff --git a/pandas/tests/strings/__init__.py b/pandas/tests/strings/__init__.py
index bf119f2721ed4..01b49b5e5b633 100644
--- a/pandas/tests/strings/__init__.py
+++ b/pandas/tests/strings/__init__.py
@@ -1,4 +1,4 @@
-# Needed for new arrow string dtype
+import numpy as np
 
 import pandas as pd
 
@@ -7,6 +7,9 @@
 
 def _convert_na_value(ser, expected):
     if ser.dtype != object:
-        # GH#18463
-        expected = expected.fillna(pd.NA)
+        if ser.dtype.storage == "pyarrow_numpy":
+            expected = expected.fillna(np.nan)
+        else:
+            # GH#18463
+            expected = expected.fillna(pd.NA)
     return expected
diff --git a/pandas/tests/strings/test_split_partition.py b/pandas/tests/strings/test_split_partition.py
index 7fabe238d2b86..0a7d409773dd6 100644
--- a/pandas/tests/strings/test_split_partition.py
+++ b/pandas/tests/strings/test_split_partition.py
@@ -12,7 +12,10 @@
     Series,
     _testing as tm,
 )
-from pandas.tests.strings import _convert_na_value
+from pandas.tests.strings import (
+    _convert_na_value,
+    object_pyarrow_numpy,
+)
 
 
 @pytest.mark.parametrize("method", ["split", "rsplit"])
@@ -113,8 +116,8 @@ def test_split_object_mixed(expand, method):
 def test_split_n(any_string_dtype, method, n):
     s = Series(["a b", pd.NA, "b c"], dtype=any_string_dtype)
     expected = Series([["a", "b"], pd.NA, ["b", "c"]])
-
     result = getattr(s.str, method)(" ", n=n)
+    expected = _convert_na_value(s, expected)
     tm.assert_series_equal(result, expected)
 
 
@@ -381,7 +384,7 @@ def test_split_nan_expand(any_string_dtype):
     # check that these are actually np.nan/pd.NA and not None
     # TODO see GH 18463
     # tm.assert_frame_equal does not differentiate
-    if any_string_dtype == "object":
+    if any_string_dtype in object_pyarrow_numpy:
         assert all(np.isnan(x) for x in result.iloc[1])
     else:
         assert all(x is pd.NA for x in result.iloc[1])