From 939238695fdb0d8e5f226e99e9b5f45353b9a570 Mon Sep 17 00:00:00 2001
From: Irina Truong <i.chernyavska@gmail.com>
Date: Fri, 17 Mar 2023 17:15:11 -0700
Subject: [PATCH] Experiment.

---
 .github/workflows/tests.yml    | 53 +++++++++----------
 tests/benchmarks/test_arrow.py | 94 ++++++++++++++++++----------------
 2 files changed, 78 insertions(+), 69 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 7f4704a18e..2657848602 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -31,44 +31,45 @@ jobs:
       matrix:
         os: [ubuntu-latest]
         python-version: ["3.9"]
-        pytest_args: [tests]
-        runtime-version: [upstream, latest, "0.2.1"]
+        pytest_args: [tests/benchmarks/test_arrow.py]
+        runtime-version: [upstream]
+        # runtime-version: [upstream, latest, "0.2.1"]
         include:
           # Run stability tests on Python 3.8
           - pytest_args: tests/stability
             python-version: "3.8"
             runtime-version: upstream
             os: ubuntu-latest
-          - pytest_args: tests/stability
-            python-version: "3.8"
-            runtime-version: latest
-            os: ubuntu-latest
-          - pytest_args: tests/stability
-            python-version: "3.8"
-            runtime-version: "0.2.1"
-            os: ubuntu-latest
+#          - pytest_args: tests/stability
+#            python-version: "3.8"
+#            runtime-version: latest
+#            os: ubuntu-latest
+#          - pytest_args: tests/stability
+#            python-version: "3.8"
+#            runtime-version: "0.2.1"
+#            os: ubuntu-latest
           # Run stability tests on Python 3.10
           - pytest_args: tests/stability
             python-version: "3.10"
             runtime-version: upstream
             os: ubuntu-latest
-          - pytest_args: tests/stability
-            python-version: "3.10"
-            runtime-version: latest
-            os: ubuntu-latest
-          - pytest_args: tests/stability
-            python-version: "3.10"
-            runtime-version: "0.2.1"
-            os: ubuntu-latest
+#          - pytest_args: tests/stability
+#            python-version: "3.10"
+#            runtime-version: latest
+#            os: ubuntu-latest
+#          - pytest_args: tests/stability
+#            python-version: "3.10"
+#            runtime-version: "0.2.1"
+#            os: ubuntu-latest
           # Run stability tests on Python Windows and MacOS (latest py39 only)
-          - pytest_args: tests/stability
-            python-version: "3.9"
-            runtime-version: latest
-            os: windows-latest
-          - pytest_args: tests/stability
-            python-version: "3.9"
-            runtime-version: latest
-            os: macos-latest
+#          - pytest_args: tests/stability
+#            python-version: "3.9"
+#            runtime-version: latest
+#            os: windows-latest
+#          - pytest_args: tests/stability
+#            python-version: "3.9"
+#            runtime-version: latest
+#            os: macos-latest
 
     steps:
       - name: Checkout
diff --git a/tests/benchmarks/test_arrow.py b/tests/benchmarks/test_arrow.py
index 9878e386ff..8801851cff 100644
--- a/tests/benchmarks/test_arrow.py
+++ b/tests/benchmarks/test_arrow.py
@@ -1,52 +1,60 @@
-import pytest
 import pandas as pd
+import pytest
 
 from ..utils_test import cluster_memory, timeseries_of_size, wait
 
 
-@pytest.mark.skipif()
-def test_unique(small_client):
-    """Find unique values"""
-    memory = cluster_memory(small_client)
-    df = timeseries_of_size(memory)
-    s = df.name.astype(pd.StringDtype("pyarrow")).persist()
-    result = s.unique()
-    wait(result, small_client, 10 * 60)
-
-
-def test_contains(small_client):
-    """String contains"""
-    memory = cluster_memory(small_client)
-    df = timeseries_of_size(memory)
-    s = df.name.astype(pd.StringDtype("pyarrow")).persist()
-    result = s.str.contains("a")
-    wait(result, small_client, 10 * 60)
-
-
-def test_startswith(small_client):
-    """String starts with"""
+@pytest.fixture(params=[True, False])
+def series_with_client(request, small_client):
     memory = cluster_memory(small_client)
     df = timeseries_of_size(memory)
-    s = df.name.astype(pd.StringDtype("pyarrow")).persist()
-    result = s.str.startswith("B")
-    wait(result, small_client, 10 * 60)
-
+    series = df.name
+    if request.param:
+        series = series.astype(pd.StringDtype("pyarrow"))
+    series = series.persist()
+    yield series, small_client
 
-def test_filter(small_client):
-    """How fast can we filter a DataFrame?"""
-    memory = cluster_memory(small_client)
-    df = timeseries_of_size(memory)
-    df.name = df.name.astype(pd.StringDtype("pyarrow"))
-    df = df.persist()
-    name = df.head(1).name.iloc[0]  # Get first name that appears
-    result = df[df.name == name]
-    wait(result, small_client, 10 * 60)
 
-
-def test_value_counts(small_client):
-    """Value counts on string values"""
-    memory = cluster_memory(small_client)
-    df = timeseries_of_size(memory)
-    s = df.name.astype(pd.StringDtype("pyarrow")).persist()
-    result = s.value_counts()
-    wait(result, small_client, 10 * 60)
+def test_unique(series_with_client):
+    """Find unique values"""
+    series, client = series_with_client
+    result = series.unique()
+    wait(result, client, 10 * 60)
+
+
+# def test_contains(small_client):
+#     """String contains"""
+#     memory = cluster_memory(small_client)
+#     df = timeseries_of_size(memory)
+#     s = df.name.astype(pd.StringDtype("pyarrow")).persist()
+#     result = s.str.contains("a")
+#     wait(result, small_client, 10 * 60)
+#
+#
+# def test_startswith(small_client):
+#     """String starts with"""
+#     memory = cluster_memory(small_client)
+#     df = timeseries_of_size(memory)
+#     s = df.name.astype(pd.StringDtype("pyarrow")).persist()
+#     result = s.str.startswith("B")
+#     wait(result, small_client, 10 * 60)
+#
+#
+# def test_filter(small_client):
+#     """How fast can we filter a DataFrame?"""
+#     memory = cluster_memory(small_client)
+#     df = timeseries_of_size(memory)
+#     df.name = df.name.astype(pd.StringDtype("pyarrow"))
+#     df = df.persist()
+#     name = df.head(1).name.iloc[0]  # Get first name that appears
+#     result = df[df.name == name]
+#     wait(result, small_client, 10 * 60)
+#
+#
+# def test_value_counts(small_client):
+#     """Value counts on string values"""
+#     memory = cluster_memory(small_client)
+#     df = timeseries_of_size(memory)
+#     s = df.name.astype(pd.StringDtype("pyarrow")).persist()
+#     result = s.value_counts()
+#     wait(result, small_client, 10 * 60)