Skip to content

Commit

Permalink
Experiment.
Browse files Browse the repository at this point in the history
  • Loading branch information
j-bennet committed Mar 18, 2023
1 parent 35e9079 commit a4316ca
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 69 deletions.
53 changes: 27 additions & 26 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,44 +31,45 @@ jobs:
matrix:
os: [ubuntu-latest]
python-version: ["3.9"]
pytest_args: [tests]
runtime-version: [upstream, latest, "0.2.1"]
pytest_args: [tests/benchmarks/test_arrow.py]
runtime-version: [upstream]
# runtime-version: [upstream, latest, "0.2.1"]
include:
# Run stability tests on Python 3.8
- pytest_args: tests/stability
python-version: "3.8"
runtime-version: upstream
os: ubuntu-latest
- pytest_args: tests/stability
python-version: "3.8"
runtime-version: latest
os: ubuntu-latest
- pytest_args: tests/stability
python-version: "3.8"
runtime-version: "0.2.1"
os: ubuntu-latest
# - pytest_args: tests/stability
# python-version: "3.8"
# runtime-version: latest
# os: ubuntu-latest
# - pytest_args: tests/stability
# python-version: "3.8"
# runtime-version: "0.2.1"
# os: ubuntu-latest
# Run stability tests on Python 3.10
- pytest_args: tests/stability
python-version: "3.10"
runtime-version: upstream
os: ubuntu-latest
- pytest_args: tests/stability
python-version: "3.10"
runtime-version: latest
os: ubuntu-latest
- pytest_args: tests/stability
python-version: "3.10"
runtime-version: "0.2.1"
os: ubuntu-latest
# - pytest_args: tests/stability
# python-version: "3.10"
# runtime-version: latest
# os: ubuntu-latest
# - pytest_args: tests/stability
# python-version: "3.10"
# runtime-version: "0.2.1"
# os: ubuntu-latest
# Run stability tests on Python Windows and MacOS (latest py39 only)
- pytest_args: tests/stability
python-version: "3.9"
runtime-version: latest
os: windows-latest
- pytest_args: tests/stability
python-version: "3.9"
runtime-version: latest
os: macos-latest
# - pytest_args: tests/stability
# python-version: "3.9"
# runtime-version: latest
# os: windows-latest
# - pytest_args: tests/stability
# python-version: "3.9"
# runtime-version: latest
# os: macos-latest

steps:
- name: Checkout
Expand Down
93 changes: 50 additions & 43 deletions tests/benchmarks/test_arrow.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,59 @@
import pytest
import pandas as pd
import pytest

from ..utils_test import cluster_memory, timeseries_of_size, wait


@pytest.mark.skipif()
def test_unique(small_client):
"""Find unique values"""
memory = cluster_memory(small_client)
df = timeseries_of_size(memory)
s = df.name.astype(pd.StringDtype("pyarrow")).persist()
result = s.unique()
wait(result, small_client, 10 * 60)


def test_contains(small_client):
"""String contains"""
memory = cluster_memory(small_client)
df = timeseries_of_size(memory)
s = df.name.astype(pd.StringDtype("pyarrow")).persist()
result = s.str.contains("a")
wait(result, small_client, 10 * 60)


def test_startswith(small_client):
"""String starts with"""
@pytest.fixture(params=[True, False])
def series_with_client(request, small_client):
memory = cluster_memory(small_client)
df = timeseries_of_size(memory)
s = df.name.astype(pd.StringDtype("pyarrow")).persist()
result = s.str.startswith("B")
wait(result, small_client, 10 * 60)

if request.param:
series = df.name.astype(pd.StringDtype("pyarrow"))
series = series.persist()
yield series, small_client

def test_filter(small_client):
"""How fast can we filter a DataFrame?"""
memory = cluster_memory(small_client)
df = timeseries_of_size(memory)
df.name = df.name.astype(pd.StringDtype("pyarrow"))
df = df.persist()
name = df.head(1).name.iloc[0] # Get first name that appears
result = df[df.name == name]
wait(result, small_client, 10 * 60)


def test_value_counts(small_client):
"""Value counts on string values"""
memory = cluster_memory(small_client)
df = timeseries_of_size(memory)
s = df.name.astype(pd.StringDtype("pyarrow")).persist()
result = s.value_counts()
wait(result, small_client, 10 * 60)
def test_unique(series_with_client):
"""Find unique values"""
series, client = series_with_client
result = series.unique()
wait(result, client, 10 * 60)


# def test_contains(small_client):
# """String contains"""
# memory = cluster_memory(small_client)
# df = timeseries_of_size(memory)
# s = df.name.astype(pd.StringDtype("pyarrow")).persist()
# result = s.str.contains("a")
# wait(result, small_client, 10 * 60)
#
#
# def test_startswith(small_client):
# """String starts with"""
# memory = cluster_memory(small_client)
# df = timeseries_of_size(memory)
# s = df.name.astype(pd.StringDtype("pyarrow")).persist()
# result = s.str.startswith("B")
# wait(result, small_client, 10 * 60)
#
#
# def test_filter(small_client):
# """How fast can we filter a DataFrame?"""
# memory = cluster_memory(small_client)
# df = timeseries_of_size(memory)
# df.name = df.name.astype(pd.StringDtype("pyarrow"))
# df = df.persist()
# name = df.head(1).name.iloc[0] # Get first name that appears
# result = df[df.name == name]
# wait(result, small_client, 10 * 60)
#
#
# def test_value_counts(small_client):
# """Value counts on string values"""
# memory = cluster_memory(small_client)
# df = timeseries_of_size(memory)
# s = df.name.astype(pd.StringDtype("pyarrow")).persist()
# result = s.value_counts()
# wait(result, small_client, 10 * 60)

0 comments on commit a4316ca

Please sign in to comment.