From 939238695fdb0d8e5f226e99e9b5f45353b9a570 Mon Sep 17 00:00:00 2001 From: Irina Truong Date: Fri, 17 Mar 2023 17:15:11 -0700 Subject: [PATCH] Experiment. --- .github/workflows/tests.yml | 53 +++++++++---------- tests/benchmarks/test_arrow.py | 94 ++++++++++++++++++---------------- 2 files changed, 78 insertions(+), 69 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7f4704a18e..2657848602 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -31,44 +31,45 @@ jobs: matrix: os: [ubuntu-latest] python-version: ["3.9"] - pytest_args: [tests] - runtime-version: [upstream, latest, "0.2.1"] + pytest_args: [tests/benchmarks/test_arrow.py] + runtime-version: [upstream] + # runtime-version: [upstream, latest, "0.2.1"] include: # Run stability tests on Python 3.8 - pytest_args: tests/stability python-version: "3.8" runtime-version: upstream os: ubuntu-latest - - pytest_args: tests/stability - python-version: "3.8" - runtime-version: latest - os: ubuntu-latest - - pytest_args: tests/stability - python-version: "3.8" - runtime-version: "0.2.1" - os: ubuntu-latest +# - pytest_args: tests/stability +# python-version: "3.8" +# runtime-version: latest +# os: ubuntu-latest +# - pytest_args: tests/stability +# python-version: "3.8" +# runtime-version: "0.2.1" +# os: ubuntu-latest # Run stability tests on Python 3.10 - pytest_args: tests/stability python-version: "3.10" runtime-version: upstream os: ubuntu-latest - - pytest_args: tests/stability - python-version: "3.10" - runtime-version: latest - os: ubuntu-latest - - pytest_args: tests/stability - python-version: "3.10" - runtime-version: "0.2.1" - os: ubuntu-latest +# - pytest_args: tests/stability +# python-version: "3.10" +# runtime-version: latest +# os: ubuntu-latest +# - pytest_args: tests/stability +# python-version: "3.10" +# runtime-version: "0.2.1" +# os: ubuntu-latest # Run stability tests on Python Windows and MacOS (latest py39 only) - - pytest_args: tests/stability - python-version: "3.9" - runtime-version: latest - os: windows-latest - - pytest_args: tests/stability - python-version: "3.9" - runtime-version: latest - os: macos-latest +# - pytest_args: tests/stability +# python-version: "3.9" +# runtime-version: latest +# os: windows-latest +# - pytest_args: tests/stability +# python-version: "3.9" +# runtime-version: latest +# os: macos-latest steps: - name: Checkout diff --git a/tests/benchmarks/test_arrow.py b/tests/benchmarks/test_arrow.py index 9878e386ff..8801851cff 100644 --- a/tests/benchmarks/test_arrow.py +++ b/tests/benchmarks/test_arrow.py @@ -1,52 +1,60 @@ -import pytest import pandas as pd +import pytest from ..utils_test import cluster_memory, timeseries_of_size, wait -@pytest.mark.skipif() -def test_unique(small_client): - """Find unique values""" - memory = cluster_memory(small_client) - df = timeseries_of_size(memory) - s = df.name.astype(pd.StringDtype("pyarrow")).persist() - result = s.unique() - wait(result, small_client, 10 * 60) - - -def test_contains(small_client): - """String contains""" - memory = cluster_memory(small_client) - df = timeseries_of_size(memory) - s = df.name.astype(pd.StringDtype("pyarrow")).persist() - result = s.str.contains("a") - wait(result, small_client, 10 * 60) - - -def test_startswith(small_client): - """String starts with""" +@pytest.fixture(params=[True, False]) +def series_with_client(request, small_client): memory = cluster_memory(small_client) df = timeseries_of_size(memory) - s = df.name.astype(pd.StringDtype("pyarrow")).persist() - result = s.str.startswith("B") - wait(result, small_client, 10 * 60) - + series = df.name + if request.param: + series = series.astype(pd.StringDtype("pyarrow")) + series = series.persist() + yield series, small_client -def test_filter(small_client): - """How fast can we filter a DataFrame?""" - memory = cluster_memory(small_client) - df = timeseries_of_size(memory) - df.name = df.name.astype(pd.StringDtype("pyarrow")) - df = df.persist() - name = df.head(1).name.iloc[0] # Get first name that appears - result = df[df.name == name] - wait(result, small_client, 10 * 60) - -def test_value_counts(small_client): - """Value counts on string values""" - memory = cluster_memory(small_client) - df = timeseries_of_size(memory) - s = df.name.astype(pd.StringDtype("pyarrow")).persist() - result = s.value_counts() - wait(result, small_client, 10 * 60) +def test_unique(series_with_client): + """Find unique values""" + series, client = series_with_client + result = series.unique() + wait(result, client, 10 * 60) + + +# def test_contains(small_client): +# """String contains""" +# memory = cluster_memory(small_client) +# df = timeseries_of_size(memory) +# s = df.name.astype(pd.StringDtype("pyarrow")).persist() +# result = s.str.contains("a") +# wait(result, small_client, 10 * 60) +# +# +# def test_startswith(small_client): +# """String starts with""" +# memory = cluster_memory(small_client) +# df = timeseries_of_size(memory) +# s = df.name.astype(pd.StringDtype("pyarrow")).persist() +# result = s.str.startswith("B") +# wait(result, small_client, 10 * 60) +# +# +# def test_filter(small_client): +# """How fast can we filter a DataFrame?""" +# memory = cluster_memory(small_client) +# df = timeseries_of_size(memory) +# df.name = df.name.astype(pd.StringDtype("pyarrow")) +# df = df.persist() +# name = df.head(1).name.iloc[0] # Get first name that appears +# result = df[df.name == name] +# wait(result, small_client, 10 * 60) +# +# +# def test_value_counts(small_client): +# """Value counts on string values""" +# memory = cluster_memory(small_client) +# df = timeseries_of_size(memory) +# s = df.name.astype(pd.StringDtype("pyarrow")).persist() +# result = s.value_counts() +# wait(result, small_client, 10 * 60)