Michael Rabinovich Cursor commited on
Commit ·
98bc085
1
Parent(s): 770de2c
submit: drop per-candidate OCC parse from validation gate
Browse filesThe gate no longer rejects on candidate content (eval re-validates and scores
invalid STEPs 0), so parsing every STEP at submit time was pure wasted work --
slow and OOM-prone on large/malformed files (e.g. a 58 MB STEP), which made
validation crawl and could error the whole submission. Keep only the cheap
presence + non-empty checks; validity is the evaluator's job.
Co-authored-by: Cursor <cursoragent@cursor.com>
submit.py
CHANGED
|
@@ -103,7 +103,6 @@ from typing import Any
|
|
| 103 |
import cadgenbench
|
| 104 |
import gradio as gr
|
| 105 |
from cadgenbench.common.paths import data_inputs_dir
|
| 106 |
-
from cadgenbench.common.validity import parse_step
|
| 107 |
from huggingface_hub import (
|
| 108 |
CommitOperationAdd,
|
| 109 |
HfApi,
|
|
@@ -700,49 +699,28 @@ def _validate_fixture_set(unpacked: Path) -> set[str]:
|
|
| 700 |
|
| 701 |
|
| 702 |
def _validate_candidates_parseable(unpacked: Path, fixture_names: set[str]) -> None:
|
| 703 |
-
#
|
| 704 |
-
#
|
| 705 |
-
#
|
| 706 |
-
# (
|
| 707 |
-
#
|
| 708 |
-
# 1-5s, so fanning out a 5+ fixture set across cpu-upgrade vCPUs
|
| 709 |
-
# cuts wall time roughly linearly. ex.map raises the first child
|
| 710 |
-
# exception when its iterator is consumed, so wrapping in list()
|
| 711 |
-
# preserves the same `Sample <name>` rejection text as the
|
| 712 |
-
# sequential loop did.
|
| 713 |
def _check_one_candidate(name: str) -> None:
|
| 714 |
candidate = _candidate_path(unpacked / name)
|
| 715 |
if candidate is None:
|
| 716 |
# Missing output is a valid benchmark outcome: the evaluator writes
|
| 717 |
# status="missing" and the fixture contributes cad_score=0.
|
| 718 |
return
|
| 719 |
-
#
|
| 720 |
-
#
|
| 721 |
-
#
|
| 722 |
-
#
|
| 723 |
-
#
|
| 724 |
-
#
|
| 725 |
if candidate.stat().st_size == 0:
|
| 726 |
logger.warning(
|
| 727 |
"Sample %s has an empty %s; will score 0 (invalid).",
|
| 728 |
name, candidate.name,
|
| 729 |
)
|
| 730 |
-
return
|
| 731 |
-
suffix = candidate.suffix.lower()
|
| 732 |
-
if suffix in {".step", ".stp"}:
|
| 733 |
-
try:
|
| 734 |
-
parse_step(candidate)
|
| 735 |
-
except Exception as e: # noqa: BLE001 - non-fatal; eval scores it 0
|
| 736 |
-
logger.warning(
|
| 737 |
-
"Sample %s has an %s that is not loadable as STEP (%s); "
|
| 738 |
-
"will score 0 (invalid).",
|
| 739 |
-
name, candidate.name, e,
|
| 740 |
-
)
|
| 741 |
-
else: # pragma: no cover - _candidate_path constrains suffixes
|
| 742 |
-
logger.warning(
|
| 743 |
-
"Sample %s uses unexpected candidate file %s; will score 0.",
|
| 744 |
-
name, candidate.name,
|
| 745 |
-
)
|
| 746 |
|
| 747 |
with ThreadPoolExecutor(
|
| 748 |
max_workers=min(8, os.cpu_count() or 1),
|
|
|
|
| 103 |
import cadgenbench
|
| 104 |
import gradio as gr
|
| 105 |
from cadgenbench.common.paths import data_inputs_dir
|
|
|
|
| 106 |
from huggingface_hub import (
|
| 107 |
CommitOperationAdd,
|
| 108 |
HfApi,
|
|
|
|
| 699 |
|
| 700 |
|
| 701 |
def _validate_candidates_parseable(unpacked: Path, fixture_names: set[str]) -> None:
|
| 702 |
+
# Cheap structural gate: confirm each present candidate is non-empty. It
|
| 703 |
+
# does NOT OCC-parse candidates -- the evaluator re-validates every one and
|
| 704 |
+
# scores invalid/unloadable STEPs as cad_score=0, so a submit-time parse is
|
| 705 |
+
# wasted (and was slow/OOM-prone on large or malformed files). Threaded
|
| 706 |
+
# only to fan out the per-fixture stat() across the sample set.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 707 |
def _check_one_candidate(name: str) -> None:
|
| 708 |
candidate = _candidate_path(unpacked / name)
|
| 709 |
if candidate is None:
|
| 710 |
# Missing output is a valid benchmark outcome: the evaluator writes
|
| 711 |
# status="missing" and the fixture contributes cad_score=0.
|
| 712 |
return
|
| 713 |
+
# Cheap presence/size check only -- deliberately NO OCC parse here.
|
| 714 |
+
# The gate never rejects on candidate content (the evaluator
|
| 715 |
+
# re-validates every candidate and scores any unloadable/invalid STEP
|
| 716 |
+
# as cad_score=0), so parsing at submit time is wasted work: it's slow
|
| 717 |
+
# and memory-heavy on large or malformed STEPs and used to dominate (or
|
| 718 |
+
# crash) validation. Presence + non-empty is all this gate needs.
|
| 719 |
if candidate.stat().st_size == 0:
|
| 720 |
logger.warning(
|
| 721 |
"Sample %s has an empty %s; will score 0 (invalid).",
|
| 722 |
name, candidate.name,
|
| 723 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 724 |
|
| 725 |
with ThreadPoolExecutor(
|
| 726 |
max_workers=min(8, os.cpu_count() or 1),
|