[CI] Fix flaky Ray Datasets integration test. (#952)

clarkzinzow · web-flow · commit 942ad48dee9a · 2023-05-23T18:28:04.000-07:00
Datasets partition ordering is now nondeterministic, so any tests that assert equality with some cross-partition ordering will need to sort or freeze the data before comparison. This PR applies the fix from #917 (freezing) to the tensor test.
diff --git a/tests/ray/test_datasets.py b/tests/ray/test_datasets.py
@@ -187,13 +187,14 @@ def test_from_ray_dataset_tensor(n_partitions: int):
     ds = ds.map(lambda i: {"int": i, "np": np.ones((3, 3))}).repartition(n_partitions)
 
     df = daft.from_ray_dataset(ds)
-    np.testing.assert_equal(
-        df.to_pydict(),
-        {
-            "int": list(range(8)),
-            "np": [np.ones((3, 3)) for i in range(8)],
-        },
-    )
+    out = df.to_pydict()
+    out["np"] = [arr.tolist() for arr in out["np"]]
+    expected = {
+        "int": list(range(8)),
+        "np": [np.ones((3, 3)) for i in range(8)],
+    }
+    expected["np"] = [arr.tolist() for arr in expected["np"]]
+    assert freeze(out) == freeze(expected)
 
 
 @pytest.mark.skipif(get_context().runner_config.name != "ray", reason="Needs to run on Ray runner")