Spaces:

HuggingAI4Engineering
/

CADGenBench

Running

Michael Rabinovich Cursor commited on 3 days ago

Commit

98bc085

1 Parent(s): 770de2c

submit: drop per-candidate OCC parse from validation gate

The gate no longer rejects on candidate content (eval re-validates and scores
invalid STEPs 0), so parsing every STEP at submit time was pure wasted work --
slow and OOM-prone on large/malformed files (e.g. a 58 MB STEP), which made
validation crawl and could error the whole submission. Keep only the cheap
presence + non-empty checks; validity is the evaluator's job.

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (1) hide show

submit.py +11 -33

submit.py CHANGED Viewed

@@ -103,7 +103,6 @@ from typing import Any
 import cadgenbench
 import gradio as gr
 from cadgenbench.common.paths import data_inputs_dir
-from cadgenbench.common.validity import parse_step
 from huggingface_hub import (
     CommitOperationAdd,
     HfApi,
@@ -700,49 +699,28 @@ def _validate_fixture_set(unpacked: Path) -> set[str]:
 def _validate_candidates_parseable(unpacked: Path, fixture_names: set[str]) -> None:
-    # Threads (not processes): OCC's parse_step releases the GIL during
-    # the C++ STEP read, and mesh loading is I/O + numpy/trimesh work. This
-    # gate doesn't touch the VTK renderer
-    # (which is the only piece in the eval pipeline that needs the
-    # ProcessPoolExecutor + spawn dance). Per-fixture I/O + OCC load is
-    # 1-5s, so fanning out a 5+ fixture set across cpu-upgrade vCPUs
-    # cuts wall time roughly linearly. ex.map raises the first child
-    # exception when its iterator is consumed, so wrapping in list()
-    # preserves the same `Sample <name>` rejection text as the
-    # sequential loop did.
     def _check_one_candidate(name: str) -> None:
         candidate = _candidate_path(unpacked / name)
         if candidate is None:
             # Missing output is a valid benchmark outcome: the evaluator writes
             # status="missing" and the fixture contributes cad_score=0.
             return
-        # A present-but-unloadable candidate is NOT a submission error. The
-        # evaluator's CAD-validity gate scores any non-valid solid as
-        # cad_score=0 (status="invalid"), so an empty or unparseable file is a
-        # legitimate (zero-scoring) outcome, not a reason to reject the whole
-        # submission. We only log here; the gate never blocks on candidate
-        # content, so one bad part can't sink an otherwise-valid submission.
         if candidate.stat().st_size == 0:
             logger.warning(
                 "Sample %s has an empty %s; will score 0 (invalid).",
                 name, candidate.name,
             )
-            return
-        suffix = candidate.suffix.lower()
-        if suffix in {".step", ".stp"}:
-            try:
-                parse_step(candidate)
-            except Exception as e:  # noqa: BLE001 - non-fatal; eval scores it 0
-                logger.warning(
-                    "Sample %s has an %s that is not loadable as STEP (%s); "
-                    "will score 0 (invalid).",
-                    name, candidate.name, e,
-                )
-        else:  # pragma: no cover - _candidate_path constrains suffixes
-            logger.warning(
-                "Sample %s uses unexpected candidate file %s; will score 0.",
-                name, candidate.name,
-            )
     with ThreadPoolExecutor(
         max_workers=min(8, os.cpu_count() or 1),

 import cadgenbench
 import gradio as gr
 from cadgenbench.common.paths import data_inputs_dir
 from huggingface_hub import (
     CommitOperationAdd,
     HfApi,
 def _validate_candidates_parseable(unpacked: Path, fixture_names: set[str]) -> None:
+    # Cheap structural gate: confirm each present candidate is non-empty. It
+    # does NOT OCC-parse candidates -- the evaluator re-validates every one and
+    # scores invalid/unloadable STEPs as cad_score=0, so a submit-time parse is
+    # wasted (and was slow/OOM-prone on large or malformed files). Threaded
+    # only to fan out the per-fixture stat() across the sample set.
     def _check_one_candidate(name: str) -> None:
         candidate = _candidate_path(unpacked / name)
         if candidate is None:
             # Missing output is a valid benchmark outcome: the evaluator writes
             # status="missing" and the fixture contributes cad_score=0.
             return
+        # Cheap presence/size check only -- deliberately NO OCC parse here.
+        # The gate never rejects on candidate content (the evaluator
+        # re-validates every candidate and scores any unloadable/invalid STEP
+        # as cad_score=0), so parsing at submit time is wasted work: it's slow
+        # and memory-heavy on large or malformed STEPs and used to dominate (or
+        # crash) validation. Presence + non-empty is all this gate needs.
         if candidate.stat().st_size == 0:
             logger.warning(
                 "Sample %s has an empty %s; will score 0 (invalid).",
                 name, candidate.name,
             )
     with ThreadPoolExecutor(
         max_workers=min(8, os.cpu_count() or 1),