Michael Rabinovich Cursor commited on
Commit
04e4262
·
1 Parent(s): 1fd03de

Drop mesh-file submissions; STEP-only candidates

Browse files

Remove the triangle-mesh submission path (and its OpenSCAD framing) from the
submit gate, UI copy, and metrics page; candidates are STEP/BREP only. Pin the
eval image's cadgenbench to a47c951.

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (6) hide show
  1. Dockerfile +1 -1
  2. README.md +1 -1
  3. app.py +4 -5
  4. metrics_page.py +3 -7
  5. submit.py +6 -24
  6. tests/test_submit.py +0 -17
Dockerfile CHANGED
@@ -41,7 +41,7 @@ RUN pip install --no-cache-dir -r /tmp/requirements.txt \
41
  # image rebuild picks up the latest code (pre-v1: always-updated). Lock
42
  # to a specific commit SHA at the v1 release so published scores are
43
  # reproducible (see space-setup/post-gt-swap.md Stage F).
44
- ARG CADGENBENCH_SHA=e7a60e0
45
  # Cache-bust the install below whenever the tracked ref moves: the
46
  # GitHub commits endpoint's response changes with each new commit on
47
  # `main`, so BuildKit re-fetches and invalidates the cached pip layer.
 
41
  # image rebuild picks up the latest code (pre-v1: always-updated). Lock
42
  # to a specific commit SHA at the v1 release so published scores are
43
  # reproducible (see space-setup/post-gt-swap.md Stage F).
44
+ ARG CADGENBENCH_SHA=a47c951
45
  # Cache-bust the install below whenever the tracked ref moves: the
46
  # GitHub commits endpoint's response changes with each new commit on
47
  # `main`, so BuildKit re-fetches and invalidates the cached pip layer.
README.md CHANGED
@@ -30,6 +30,6 @@ textual or visual description of a mechanical part into a valid,
30
  geometrically correct 3D model?
31
 
32
  Browse the **Leaderboard** tab for ranked submissions, or upload your
33
- own STEP/BREP or mesh candidate files on the **Submit** tab: the Space runs the eval and
34
  publishes a row with a per-submission report. The **About** tab covers
35
  the scoring axes and links to the benchmark code and datasets.
 
30
  geometrically correct 3D model?
31
 
32
  Browse the **Leaderboard** tab for ranked submissions, or upload your
33
+ own STEP/BREP candidate files on the **Submit** tab: the Space runs the eval and
34
  publishes a row with a per-submission report. The **About** tab covers
35
  the scoring axes and links to the benchmark code and datasets.
app.py CHANGED
@@ -108,8 +108,8 @@ turn a description of a mechanical part into a valid, geometrically
108
  correct 3D model.
109
 
110
  - **Reference baseline**: an iterative AI agent that writes CAD code.
111
- - **Submission flow**: upload a zip of per-fixture candidate files (STEP/BREP
112
- or mesh); the Space
113
  runs the eval and appends a row to the submissions dataset.
114
  - **Datasets**: fixture inputs in
115
  [`{HF_DATA_REPO}`](https://huggingface.co/datasets/{HF_DATA_REPO});
@@ -1152,10 +1152,9 @@ with gr.Blocks(
1152
  f"""
1153
  **Submission format.** A single zip with:
1154
 
1155
- - one folder per sample in `{HF_DATA_REPO}`; include one accepted candidate
1156
  file for samples where your system produced a result. Accepted names:
1157
- `output.step`, `output.stp`, `output.stl`, `output.obj`, `output.off`,
1158
- `output.3mf`, or `output.ply`. Missing `output.*` scores zero for that
1159
  sample;
1160
  - a top-level `meta.json`:
1161
 
 
108
  correct 3D model.
109
 
110
  - **Reference baseline**: an iterative AI agent that writes CAD code.
111
+ - **Submission flow**: upload a zip of per-fixture STEP/BREP candidate files;
112
+ the Space
113
  runs the eval and appends a row to the submissions dataset.
114
  - **Datasets**: fixture inputs in
115
  [`{HF_DATA_REPO}`](https://huggingface.co/datasets/{HF_DATA_REPO});
 
1152
  f"""
1153
  **Submission format.** A single zip with:
1154
 
1155
+ - one folder per sample in `{HF_DATA_REPO}`; include one STEP candidate
1156
  file for samples where your system produced a result. Accepted names:
1157
+ `output.step` or `output.stp`. Missing `output.step` scores zero for that
 
1158
  sample;
1159
  - a top-level `meta.json`:
1160
 
metrics_page.py CHANGED
@@ -147,7 +147,7 @@ def build_metrics_page() -> str:
147
  tag="CAD Score",
148
  title="How one part is scored",
149
  body=(
150
- "<p>CADGenBench scores a generated part (STEP/BREP or mesh) "
151
  "against one ground-truth STEP. First a hard <b>validity gate</b>; if it "
152
  "passes, the <b>CAD Score</b> is a weighted mean of three "
153
  "independent metrics, each in [0, 1].</p>"
@@ -194,7 +194,8 @@ def build_metrics_page() -> str:
194
  "failure sets <code>is_valid = False</code> and forces "
195
  "<code>cad_score = 0</code>, so an invalid solid never beats a worse "
196
  "but valid one.</p>"
197
- "<p><b>STEP/BREP candidates</b> must pass all of:</p>"
 
198
  "<ol>"
199
  "<li><b>Well-formed BREP</b>: no per-face / edge / vertex errors "
200
  "(self-intersecting wires, edges off their surface, etc.).</li>"
@@ -204,11 +205,6 @@ def build_metrics_page() -> str:
204
  "to a manifold, closed (3F = 2E), orientation-consistent triangle "
205
  "mesh.</li>"
206
  "</ol>"
207
- "<p><b>Mesh candidates</b> (<code>output.stl</code>, "
208
- "<code>output.obj</code>, <code>output.off</code>, "
209
- "<code>output.3mf</code>, or <code>output.ply</code>) skip BREP "
210
- "checks and must directly satisfy the mesh gate: manifold, closed, "
211
- "and orientation-consistent.</p>"
212
  ),
213
  )
214
 
 
147
  tag="CAD Score",
148
  title="How one part is scored",
149
  body=(
150
+ "<p>CADGenBench scores a generated part (STEP/BREP) "
151
  "against one ground-truth STEP. First a hard <b>validity gate</b>; if it "
152
  "passes, the <b>CAD Score</b> is a weighted mean of three "
153
  "independent metrics, each in [0, 1].</p>"
 
194
  "failure sets <code>is_valid = False</code> and forces "
195
  "<code>cad_score = 0</code>, so an invalid solid never beats a worse "
196
  "but valid one.</p>"
197
+ "<p>A candidate (<code>output.step</code> / <code>output.stp</code>) "
198
+ "must pass all of:</p>"
199
  "<ol>"
200
  "<li><b>Well-formed BREP</b>: no per-face / edge / vertex errors "
201
  "(self-intersecting wires, edges off their surface, etc.).</li>"
 
205
  "to a manifold, closed (3F = 2E), orientation-consistent triangle "
206
  "mesh.</li>"
207
  "</ol>"
 
 
 
 
 
208
  ),
209
  )
210
 
submit.py CHANGED
@@ -44,12 +44,11 @@ Validation gates, in order:
44
  4. Fixture-set match: the set of folders inside the zip equals the
45
  set of fixture directories in :func:`cadgenbench.common.paths.data_inputs_dir`
46
  (no missing, no extras).
47
- 5. Candidate parseability: any present ``<fixture>/output.*`` candidate
48
- (STEP/BREP or accepted mesh format) can be cheaply loaded. A missing
49
- candidate is allowed and scores zero via the evaluator's
50
- ``status="missing"`` path. Per-fixture validity (watertight, manifold,
51
- etc.) is *not* checked here; this gate only rejects files that are present
52
- but not actually loadable as their declared candidate kind.
53
 
54
  Hub-write ordering (after validation passes):
55
 
@@ -104,7 +103,6 @@ from typing import Any
104
  import cadgenbench
105
  import gradio as gr
106
  from cadgenbench.common.paths import data_inputs_dir
107
- from cadgenbench.common.mesh import MESH_FILE_SUFFIXES, mesh_from_file
108
  from cadgenbench.common.validity import parse_step
109
  from huggingface_hub import (
110
  CommitOperationAdd,
@@ -731,14 +729,6 @@ def _validate_candidates_parseable(unpacked: Path, fixture_names: set[str]) -> N
731
  f"Sample `{name}` has an `{candidate.name}` that is not "
732
  f"loadable as STEP geometry: {e}"
733
  ) from e
734
- elif suffix in MESH_FILE_SUFFIXES:
735
- try:
736
- mesh_from_file(candidate)
737
- except Exception as e: # noqa: BLE001 - normalize user-facing error
738
- raise _ValidationError(
739
- f"Sample `{name}` has an `{candidate.name}` that is not "
740
- f"loadable as a triangle mesh: {e}"
741
- ) from e
742
  else: # pragma: no cover - _candidate_path constrains suffixes
743
  raise _ValidationError(
744
  f"Sample `{name}` uses unsupported candidate file `{candidate.name}`."
@@ -751,19 +741,11 @@ def _validate_candidates_parseable(unpacked: Path, fixture_names: set[str]) -> N
751
 
752
 
753
  def _candidate_path(fixture_dir: Path) -> Path | None:
754
- """Return the submitted candidate for *fixture_dir*, if present.
755
-
756
- STEP/BREP wins when both a STEP and a mesh are present, matching the
757
- evaluator's candidate discovery policy.
758
- """
759
  for name in ("output.step", "output.stp"):
760
  candidate = fixture_dir / name
761
  if candidate.is_file():
762
  return candidate
763
- for suffix in sorted(MESH_FILE_SUFFIXES):
764
- candidate = fixture_dir / f"output{suffix}"
765
- if candidate.is_file():
766
- return candidate
767
  return None
768
 
769
 
 
44
  4. Fixture-set match: the set of folders inside the zip equals the
45
  set of fixture directories in :func:`cadgenbench.common.paths.data_inputs_dir`
46
  (no missing, no extras).
47
+ 5. Candidate parseability: any present ``<fixture>/output.step`` candidate
48
+ can be cheaply loaded as STEP geometry. A missing candidate is allowed and
49
+ scores zero via the evaluator's ``status="missing"`` path. Per-fixture
50
+ validity (watertight, etc.) is *not* checked here; this gate only rejects
51
+ files that are present but not actually loadable as STEP.
 
52
 
53
  Hub-write ordering (after validation passes):
54
 
 
103
  import cadgenbench
104
  import gradio as gr
105
  from cadgenbench.common.paths import data_inputs_dir
 
106
  from cadgenbench.common.validity import parse_step
107
  from huggingface_hub import (
108
  CommitOperationAdd,
 
729
  f"Sample `{name}` has an `{candidate.name}` that is not "
730
  f"loadable as STEP geometry: {e}"
731
  ) from e
 
 
 
 
 
 
 
 
732
  else: # pragma: no cover - _candidate_path constrains suffixes
733
  raise _ValidationError(
734
  f"Sample `{name}` uses unsupported candidate file `{candidate.name}`."
 
741
 
742
 
743
  def _candidate_path(fixture_dir: Path) -> Path | None:
744
+ """Return the submitted STEP candidate for *fixture_dir*, if present."""
 
 
 
 
745
  for name in ("output.step", "output.stp"):
746
  candidate = fixture_dir / name
747
  if candidate.is_file():
748
  return candidate
 
 
 
 
749
  return None
750
 
751
 
tests/test_submit.py CHANGED
@@ -416,20 +416,3 @@ def test_validate_steps_checks_present_output_stp(tmp_path: Path, monkeypatch):
416
  submit._validate_candidates_parseable(tmp_path, {"101"})
417
 
418
  assert calls == [candidate]
419
-
420
-
421
- def test_validate_steps_checks_present_output_mesh(tmp_path: Path, monkeypatch):
422
- """Mesh candidates are cheap-load checked instead of STEP-parsed."""
423
- fixture = tmp_path / "101"
424
- fixture.mkdir()
425
- candidate = fixture / "output.stl"
426
- candidate.write_text("solid empty\nendsolid empty\n")
427
- step_calls: list[Path] = []
428
- mesh_calls: list[Path] = []
429
- monkeypatch.setattr(submit, "parse_step", lambda p: step_calls.append(p))
430
- monkeypatch.setattr(submit, "mesh_from_file", lambda p: mesh_calls.append(p))
431
-
432
- submit._validate_candidates_parseable(tmp_path, {"101"})
433
-
434
- assert step_calls == []
435
- assert mesh_calls == [candidate]
 
416
  submit._validate_candidates_parseable(tmp_path, {"101"})
417
 
418
  assert calls == [candidate]