FIX-#7292: Prepare Modin code to NumPy 2.0 (#7293)

anmyachev · web-flow · commit 2006292e5442 · 2024-05-29T17:26:39.000+02:00
Signed-off-by: Anatoly Myachev &lt;anatoly.myachev@intel.com&gt;
diff --git a/modin/core/dataframe/pandas/dataframe/utils.py b/modin/core/dataframe/pandas/dataframe/utils.py
@@ -868,8 +868,8 @@ def add_missing_categories_to_groupby(
     ### At this stage we want to get a fill_value for missing categorical values
     if is_udf_agg and isinstance(total_index, pandas.MultiIndex):
         # if grouping on multiple columns and aggregating with an UDF, then the
-        # fill value is always `np.NaN`
-        missing_values = pandas.DataFrame({0: [np.NaN]})
+        # fill value is always `np.nan`
+        missing_values = pandas.DataFrame({0: [np.nan]})
     else:
         # In case of a udf aggregation we're forced to run the operator against each
         # missing category, as in theory it can return different results for each
@@ -903,8 +903,8 @@ def add_missing_categories_to_groupby(
         ).columns
     else:
         # HACK: If the aggregation has failed, the result would be empty. Assuming the
-        # fill value to be `np.NaN` here (this may not always be correct!!!)
-        fill_value = np.NaN if len(missing_values) == 0 else missing_values.iloc[0, 0]
+        # fill value to be `np.nan` here (this may not always be correct!!!)
+        fill_value = np.nan if len(missing_values) == 0 else missing_values.iloc[0, 0]
         missing_values = pandas.DataFrame(
             fill_value, index=missing_index, columns=combined_cols
         )
diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
@@ -1105,7 +1105,7 @@ def map_func(df, resample_kwargs=resample_kwargs):  # pragma: no cover
                     resample_kwargs = resample_kwargs.copy()
                     resample_kwargs["level"] = None
                 filler = pandas.DataFrame(
-                    np.NaN, index=pandas.Index(timestamps), columns=df.columns
+                    np.nan, index=pandas.Index(timestamps), columns=df.columns
                 )
                 df = pandas.concat([df, filler], copy=False)
             if df_op is not None:
diff --git a/modin/numpy/arr.py b/modin/numpy/arr.py
@@ -1622,7 +1622,7 @@ def floor_divide(
                 # the output.
                 result = (
                     result.replace(numpy.inf, 0)
-                    .replace(numpy.NINF, 0)
+                    .replace(-numpy.inf, 0)
                     .where(self._query_compiler.ne(0), 0)
                 )
             return fix_dtypes_and_determine_return(
@@ -1644,7 +1644,7 @@ def floor_divide(
             # the output.
             result = (
                 result.replace(numpy.inf, 0)
-                .replace(numpy.NINF, 0)
+                .replace(-numpy.inf, 0)
                 .where(callee.ne(0), 0)
             )
         return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)
@@ -1902,7 +1902,7 @@ def remainder(
             if x2 == 0 and numpy.issubdtype(out_dtype, numpy.integer):
                 # NumPy's remainder by 0 works differently from pandas', so we need to fix
                 # the output.
-                result = result.replace(numpy.NaN, 0)
+                result = result.replace(numpy.nan, 0)
             return fix_dtypes_and_determine_return(
                 result, self._ndim, dtype, out, where
             )
@@ -1920,7 +1920,7 @@ def remainder(
         if callee.eq(0).any() and numpy.issubdtype(out_dtype, numpy.integer):
             # NumPy's floor_divide by 0 works differently from pandas', so we need to fix
             # the output.
-            result = result.replace(numpy.NaN, 0)
+            result = result.replace(numpy.nan, 0)
         return fix_dtypes_and_determine_return(result, new_ndim, dtype, out, where)
 
     __mod__ = remainder
diff --git a/modin/pandas/indexing.py b/modin/pandas/indexing.py
@@ -844,7 +844,7 @@ def _setitem_with_new_columns(self, row_loc, col_loc, item):
             for i in range(len(common_label_loc)):
                 if not common_label_loc[i]:
                     columns = columns.insert(len(columns), col_loc[i])
-            self.qc = self.qc.reindex(labels=columns, axis=1, fill_value=np.NaN)
+            self.qc = self.qc.reindex(labels=columns, axis=1, fill_value=np.nan)
             self.df._update_inplace(new_query_compiler=self.qc)
         self._set_item_existing_loc(row_loc, np.array(col_loc), item)
 
diff --git a/modin/tests/numpy/test_array.py b/modin/tests/numpy/test_array.py
@@ -275,7 +275,7 @@ def test_array_where():
 
 @pytest.mark.parametrize("method", ["argmax", "argmin"])
 def test_argmax_argmin(method):
-    numpy_arr = numpy.array([[1, 2, 3], [4, 5, np.NaN]])
+    numpy_arr = numpy.array([[1, 2, 3], [4, 5, np.nan]])
     modin_arr = np.array(numpy_arr)
     assert_scalar_or_array_equal(
         getattr(np, method)(modin_arr, axis=1),
diff --git a/modin/tests/pandas/dataframe/test_binary.py b/modin/tests/pandas/dataframe/test_binary.py
@@ -254,7 +254,7 @@ def test_multi_level_comparison(data, op):
         pytest.param({}, {}, True, id="two_empty_dataframes"),
         pytest.param([[1]], [[0]], False, id="single_unequal_values"),
         pytest.param([[None]], [[None]], True, id="single_none_values"),
-        pytest.param([[np.NaN]], [[np.NaN]], True, id="single_nan_values"),
+        pytest.param([[np.nan]], [[np.nan]], True, id="single_nan_values"),
         pytest.param({1: [10]}, {1.0: [10]}, True, id="different_column_types"),
         pytest.param({1: [10]}, {2: [10]}, False, id="different_columns"),
         pytest.param(
diff --git a/modin/tests/pandas/dataframe/test_join_sort.py b/modin/tests/pandas/dataframe/test_join_sort.py
@@ -879,7 +879,7 @@ def test_sort_values_with_only_one_non_na_row_in_partition(ascending, na_positio
         np.random.rand(1000, 100), columns=[f"col {i}" for i in range(100)]
     )
     # Need to ensure that one of the partitions has all NA values except for one row
-    pandas_df.iloc[340:] = np.NaN
+    pandas_df.iloc[340:] = np.nan
     pandas_df.iloc[-1] = -4.0
     modin_df = pd.DataFrame(pandas_df)
     eval_general(
diff --git a/modin/tests/pandas/test_groupby.py b/modin/tests/pandas/test_groupby.py
@@ -479,8 +479,8 @@ def test_simple_row_groupby(by, as_index, col1_category):
     pandas_df = pandas.DataFrame(
         {
             "col1": [0, 1, 2, 3],
-            "col2": [4, 5, np.NaN, 7],
-            "col3": [np.NaN, np.NaN, 12, 10],
+            "col2": [4, 5, np.nan, 7],
+            "col3": [np.nan, np.nan, 12, 10],
             "col4": [17, 13, 16, 15],
             "col5": [-4, -5, -6, -7],
         }
@@ -1850,8 +1850,8 @@ def test_shift_freq(groupby_axis, shift_axis, groupby_sort):
     pandas_df = pandas.DataFrame(
         {
             "col1": [1, 0, 2, 3],
-            "col2": [4, 5, np.NaN, 7],
-            "col3": [np.NaN, np.NaN, 12, 10],
+            "col2": [4, 5, np.nan, 7],
+            "col3": [np.nan, np.nan, 12, 10],
             "col4": [17, 13, 16, 15],
         }
     )
diff --git a/modin/tests/pandas/test_series.py b/modin/tests/pandas/test_series.py
@@ -4008,11 +4008,11 @@ def test_str___getitem__(data, key):
 # Test str operations
 @pytest.mark.parametrize(
     "others",
-    [["abC|DeF,Hik", "gSaf,qWer|Gre", "asd3,4sad|", np.NaN], None],
+    [["abC|DeF,Hik", "gSaf,qWer|Gre", "asd3,4sad|", np.nan], None],
     ids=["list", "None"],
 )
 def test_str_cat(others):
-    data = ["abC|DeF,Hik", "gSaf,qWer|Gre", "asd3,4sad|", np.NaN]
+    data = ["abC|DeF,Hik", "gSaf,qWer|Gre", "asd3,4sad|", np.nan]
     eval_general(*create_test_series(data), lambda s: s.str.cat(others=others))
 
 
@@ -4644,7 +4644,7 @@ def str_encode_decode_test_data() -> list[str]:
         "234,3245.67",
         "gSaf,qWer|Gre",
         "asd3,4sad|",
-        np.NaN,
+        np.nan,
         None,
         # add a string that we can't encode in ascii, and whose utf-8 encoding
         # we cannot decode in ascii
diff --git a/modin/tests/pandas/utils.py b/modin/tests/pandas/utils.py
@@ -88,7 +88,7 @@
             (
                 x
                 if (j % 4 == 0 and i > NCOLS // 2) or (j != i and i <= NCOLS // 2)
-                else np.NaN
+                else np.nan
             )
             for j, x in enumerate(
                 random_state.uniform(RAND_LOW, RAND_HIGH, size=(NROWS))
@@ -161,7 +161,7 @@
 for col in test_data["float_nan_data"]:
     for row in range(NROWS // 2):
         if row % 16 == 0:
-            test_data["float_nan_data"][col][row] = np.NaN
+            test_data["float_nan_data"][col][row] = np.nan
 
 test_data_values = list(test_data.values())
 test_data_keys = list(test_data.keys())
@@ -226,8 +226,8 @@
 
 test_data_diff_dtype = {
     "int_col": [-5, 2, 7, 16],
-    "float_col": [np.NaN, -9.4, 10.1, np.NaN],
-    "str_col": ["a", np.NaN, "c", "d"],
+    "float_col": [np.nan, -9.4, 10.1, np.nan],
+    "str_col": ["a", np.nan, "c", "d"],
     "bool_col": [False, True, True, False],
 }
 
@@ -272,7 +272,7 @@
         "234,3245.67",
         "gSaf,qWer|Gre",
         "asd3,4sad|",
-        np.NaN,
+        np.nan,
     ]
 }
 
@@ -290,7 +290,7 @@
 string_sep_values = list(string_seperators.values())
 string_sep_keys = list(string_seperators.keys())
 
-string_na_rep = {"None na_rep": None, "- na_rep": "-", "nan na_rep": np.NaN}
+string_na_rep = {"None na_rep": None, "- na_rep": "-", "nan na_rep": np.nan}
 
 string_na_rep_values = list(string_na_rep.values())
 string_na_rep_keys = list(string_na_rep.keys())

Original file line number	Diff line number	Diff line change
`@@ -1105,7 +1105,7 @@ def map_func(df, resample_kwargs=resample_kwargs): # pragma: no cover`
`1105`	`1105`	`resample_kwargs = resample_kwargs.copy()`
`1106`	`1106`	`resample_kwargs["level"] = None`
`1107`	`1107`	`filler = pandas.DataFrame(`
`1108`		`- np.NaN, index=pandas.Index(timestamps), columns=df.columns`
	`1108`	`+ np.nan, index=pandas.Index(timestamps), columns=df.columns`
`1109`	`1109`	`)`
`1110`	`1110`	`df = pandas.concat([df, filler], copy=False)`
`1111`	`1111`	`if df_op is not None:`
Original file line number	Diff line number	Diff line change
`@@ -879,7 +879,7 @@ def test_sort_values_with_only_one_non_na_row_in_partition(ascending, na_positio`
`879`	`879`	`np.random.rand(1000, 100), columns=[f"col {i}" for i in range(100)]`
`880`	`880`	`)`
`881`	`881`	`# Need to ensure that one of the partitions has all NA values except for one row`
`882`		`- pandas_df.iloc[340:] = np.NaN`
	`882`	`+ pandas_df.iloc[340:] = np.nan`
`883`	`883`	`pandas_df.iloc[-1] = -4.0`
`884`	`884`	`modin_df = pd.DataFrame(pandas_df)`
`885`	`885`	`eval_general(`
Original file line number	Diff line number	Diff line change
`@@ -479,8 +479,8 @@ def test_simple_row_groupby(by, as_index, col1_category):`
`479`	`479`	`pandas_df = pandas.DataFrame(`
`480`	`480`	`{`
`481`	`481`	`"col1": [0, 1, 2, 3],`
`482`		`- "col2": [4, 5, np.NaN, 7],`
`483`		`- "col3": [np.NaN, np.NaN, 12, 10],`
	`482`	`+ "col2": [4, 5, np.nan, 7],`
	`483`	`+ "col3": [np.nan, np.nan, 12, 10],`
`484`	`484`	`"col4": [17, 13, 16, 15],`
`485`	`485`	`"col5": [-4, -5, -6, -7],`
`486`	`486`	`}`
`@@ -1850,8 +1850,8 @@ def test_shift_freq(groupby_axis, shift_axis, groupby_sort):`
`1850`	`1850`	`pandas_df = pandas.DataFrame(`
`1851`	`1851`	`{`
`1852`	`1852`	`"col1": [1, 0, 2, 3],`
`1853`		`- "col2": [4, 5, np.NaN, 7],`
`1854`		`- "col3": [np.NaN, np.NaN, 12, 10],`
	`1853`	`+ "col2": [4, 5, np.nan, 7],`
	`1854`	`+ "col3": [np.nan, np.nan, 12, 10],`
`1855`	`1855`	`"col4": [17, 13, 16, 15],`
`1856`	`1856`	`}`
`1857`	`1857`	`)`