Michael Rabinovich Cursor commited on
Commit
98bc085
·
1 Parent(s): 770de2c

submit: drop per-candidate OCC parse from validation gate

Browse files

The gate no longer rejects on candidate content (eval re-validates and scores
invalid STEPs 0), so parsing every STEP at submit time was pure wasted work --
slow and OOM-prone on large/malformed files (e.g. a 58 MB STEP), which made
validation crawl and could error the whole submission. Keep only the cheap
presence + non-empty checks; validity is the evaluator's job.

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (1) hide show
  1. submit.py +11 -33
submit.py CHANGED
@@ -103,7 +103,6 @@ from typing import Any
103
  import cadgenbench
104
  import gradio as gr
105
  from cadgenbench.common.paths import data_inputs_dir
106
- from cadgenbench.common.validity import parse_step
107
  from huggingface_hub import (
108
  CommitOperationAdd,
109
  HfApi,
@@ -700,49 +699,28 @@ def _validate_fixture_set(unpacked: Path) -> set[str]:
700
 
701
 
702
  def _validate_candidates_parseable(unpacked: Path, fixture_names: set[str]) -> None:
703
- # Threads (not processes): OCC's parse_step releases the GIL during
704
- # the C++ STEP read, and mesh loading is I/O + numpy/trimesh work. This
705
- # gate doesn't touch the VTK renderer
706
- # (which is the only piece in the eval pipeline that needs the
707
- # ProcessPoolExecutor + spawn dance). Per-fixture I/O + OCC load is
708
- # 1-5s, so fanning out a 5+ fixture set across cpu-upgrade vCPUs
709
- # cuts wall time roughly linearly. ex.map raises the first child
710
- # exception when its iterator is consumed, so wrapping in list()
711
- # preserves the same `Sample <name>` rejection text as the
712
- # sequential loop did.
713
  def _check_one_candidate(name: str) -> None:
714
  candidate = _candidate_path(unpacked / name)
715
  if candidate is None:
716
  # Missing output is a valid benchmark outcome: the evaluator writes
717
  # status="missing" and the fixture contributes cad_score=0.
718
  return
719
- # A present-but-unloadable candidate is NOT a submission error. The
720
- # evaluator's CAD-validity gate scores any non-valid solid as
721
- # cad_score=0 (status="invalid"), so an empty or unparseable file is a
722
- # legitimate (zero-scoring) outcome, not a reason to reject the whole
723
- # submission. We only log here; the gate never blocks on candidate
724
- # content, so one bad part can't sink an otherwise-valid submission.
725
  if candidate.stat().st_size == 0:
726
  logger.warning(
727
  "Sample %s has an empty %s; will score 0 (invalid).",
728
  name, candidate.name,
729
  )
730
- return
731
- suffix = candidate.suffix.lower()
732
- if suffix in {".step", ".stp"}:
733
- try:
734
- parse_step(candidate)
735
- except Exception as e: # noqa: BLE001 - non-fatal; eval scores it 0
736
- logger.warning(
737
- "Sample %s has an %s that is not loadable as STEP (%s); "
738
- "will score 0 (invalid).",
739
- name, candidate.name, e,
740
- )
741
- else: # pragma: no cover - _candidate_path constrains suffixes
742
- logger.warning(
743
- "Sample %s uses unexpected candidate file %s; will score 0.",
744
- name, candidate.name,
745
- )
746
 
747
  with ThreadPoolExecutor(
748
  max_workers=min(8, os.cpu_count() or 1),
 
103
  import cadgenbench
104
  import gradio as gr
105
  from cadgenbench.common.paths import data_inputs_dir
 
106
  from huggingface_hub import (
107
  CommitOperationAdd,
108
  HfApi,
 
699
 
700
 
701
  def _validate_candidates_parseable(unpacked: Path, fixture_names: set[str]) -> None:
702
+ # Cheap structural gate: confirm each present candidate is non-empty. It
703
+ # does NOT OCC-parse candidates -- the evaluator re-validates every one and
704
+ # scores invalid/unloadable STEPs as cad_score=0, so a submit-time parse is
705
+ # wasted (and was slow/OOM-prone on large or malformed files). Threaded
706
+ # only to fan out the per-fixture stat() across the sample set.
 
 
 
 
 
707
  def _check_one_candidate(name: str) -> None:
708
  candidate = _candidate_path(unpacked / name)
709
  if candidate is None:
710
  # Missing output is a valid benchmark outcome: the evaluator writes
711
  # status="missing" and the fixture contributes cad_score=0.
712
  return
713
+ # Cheap presence/size check only -- deliberately NO OCC parse here.
714
+ # The gate never rejects on candidate content (the evaluator
715
+ # re-validates every candidate and scores any unloadable/invalid STEP
716
+ # as cad_score=0), so parsing at submit time is wasted work: it's slow
717
+ # and memory-heavy on large or malformed STEPs and used to dominate (or
718
+ # crash) validation. Presence + non-empty is all this gate needs.
719
  if candidate.stat().st_size == 0:
720
  logger.warning(
721
  "Sample %s has an empty %s; will score 0 (invalid).",
722
  name, candidate.name,
723
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
724
 
725
  with ThreadPoolExecutor(
726
  max_workers=min(8, os.cpu_count() or 1),