Skip to content

Commit

Permalink
[ci][microcheck] use commit instead of branch for failed prs (ray-pro…
Browse files Browse the repository at this point in the history
…ject#45384)

Use commit instead of branch to identify failed prs, to account for all
PR revisions. We seems to be under-coverage in large PRs where people
make several bugs across multiple PR revisions.

Test:
- CI

Signed-off-by: can <[email protected]>
Signed-off-by: gchurch <[email protected]>
  • Loading branch information
can-anyscale authored and GabeChurch committed Jun 11, 2024
1 parent 3555dbf commit 05afebc
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 38 deletions.
55 changes: 27 additions & 28 deletions ci/ray_ci/automation/determine_microcheck_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,11 @@ def main(
]
logger.info(f"Analyzing {len(tests)} tests for team {team}")

test_to_prs = {
test.get_name(): _get_failed_prs(test, test_history_length) for test in tests
test_to_commits = {
test.get_name(): _get_failed_commits(test, test_history_length)
for test in tests
}
high_impact_tests = _get_test_with_minimal_coverage(test_to_prs, coverage)
high_impact_tests = _get_test_with_minimal_coverage(test_to_commits, coverage)
if consider_master_branch:
high_impact_tests = high_impact_tests.union(
_get_failed_tests_from_master_branch(tests, test_history_length)
Expand Down Expand Up @@ -99,54 +100,52 @@ def _get_failed_tests_from_master_branch(


def _get_test_with_minimal_coverage(
test_to_prs: Dict[str, Set[str]], coverage: int
test_to_commits: Dict[str, Set[str]], coverage: int
) -> Set[str]:
"""
Get the minimal set of tests that cover a certain percentage of PRs
"""
all_prs = set()
all_commits = set()
high_impact_tests = set()
for prs in test_to_prs.values():
all_prs.update(prs)
if not all_prs:
for commits in test_to_commits.values():
all_commits.update(commits)
if not all_commits:
return set()

covered_prs = set()
covered_pr_count = 0
while 100 * len(covered_prs) / len(all_prs) < coverage:
most_impact_test = _get_most_impact_test(test_to_prs, covered_prs)
covered_commits = set()
covered_commit_count = 0
while 100 * len(covered_commits) / len(all_commits) < coverage:
most_impact_test = _get_most_impact_test(test_to_commits, covered_commits)
high_impact_tests.add(most_impact_test)
covered_prs.update(test_to_prs[most_impact_test])
assert covered_pr_count < len(covered_prs), "No progress in coverage"
covered_pr_count = len(covered_prs)
covered_commits.update(test_to_commits[most_impact_test])
assert covered_commit_count < len(covered_commits), "No progress in coverage"
covered_commit_count = len(covered_commits)

return high_impact_tests


def _get_most_impact_test(
test_to_prs: Dict[str, Set[str]],
already_covered_prs: Set[str],
test_to_commits: Dict[str, Set[str]],
already_covered_commits: Set[str],
) -> str:
"""
Get the test that covers the most PRs, excluding the PRs that have already been
covered
Get the test that covers the most PR revisions, excluding the revisions that have
already been covered
"""
most_impact_test = None
for test, prs in test_to_prs.items():
if most_impact_test is None or len(prs - already_covered_prs) > len(
test_to_prs[most_impact_test] - already_covered_prs
for test, prs in test_to_commits.items():
if most_impact_test is None or len(prs - already_covered_commits) > len(
test_to_commits[most_impact_test] - already_covered_commits
):
most_impact_test = test

return most_impact_test


def _get_failed_prs(test: Test, test_history_length: int) -> Set[str]:
def _get_failed_commits(test: Test, test_history_length: int) -> Set[str]:
"""
Get the failed PRs for a test. Currently we use the branch name as an identifier
for a PR.
TODO (can): Use the PR number instead of the branch name
Get the failed PRs for a test. We use the commit to account for all revisions
of a PR.
"""
logger.info(f"Analyzing test {test.get_name()}")
results = [
Expand All @@ -159,7 +158,7 @@ def _get_failed_prs(test: Test, test_history_length: int) -> Set[str]:
)
if result.status == ResultStatus.ERROR.value
]
return {result.branch for result in results if result.branch}
return {result.commit for result in results if result.commit}


if __name__ == "__main__":
Expand Down
20 changes: 10 additions & 10 deletions ci/ray_ci/automation/test_determine_microcheck_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest

from ci.ray_ci.automation.determine_microcheck_tests import (
_get_failed_prs,
_get_failed_commits,
_get_test_with_minimal_coverage,
_get_failed_tests_from_master_branch,
_update_high_impact_tests,
Expand Down Expand Up @@ -35,11 +35,11 @@ def persist_to_s3(self) -> None:
DB[self["name"]] = json.dumps(self)


def stub_test_result(status: ResultStatus, branch: str) -> TestResult:
def stub_test_result(status: ResultStatus, branch: str, commit: str = "") -> TestResult:
return TestResult(
status=status.value,
branch=branch,
commit="",
commit=commit,
url="",
timestamp=0,
pull_request="",
Expand Down Expand Up @@ -67,21 +67,21 @@ def test_update_high_impact_tests():
assert json.loads(DB["bad_test"])[Test.KEY_IS_HIGH_IMPACT] == "false"


def test_get_failed_prs():
assert _get_failed_prs(
def test_get_failed_commits():
assert _get_failed_commits(
MockTest(
{
"name": "test",
"test_results": [
stub_test_result(ResultStatus.ERROR, "w00t"),
stub_test_result(ResultStatus.ERROR, "w00t"),
stub_test_result(ResultStatus.SUCCESS, "hi"),
stub_test_result(ResultStatus.ERROR, "f00"),
stub_test_result(ResultStatus.ERROR, "w00t", commit="1w00t2"),
stub_test_result(ResultStatus.ERROR, "w00t", commit="2w00t3"),
stub_test_result(ResultStatus.SUCCESS, "hi", commit="5hi7"),
stub_test_result(ResultStatus.ERROR, "f00", commit="1f003"),
],
}
),
1,
) == {"w00t", "f00"}
) == {"1w00t2", "2w00t3", "1f003"}


def test_get_failed_tests_from_master_branch():
Expand Down

0 comments on commit 05afebc

Please sign in to comment.