Michael Rabinovich Cursor commited on
Commit ·
04e4262
1
Parent(s): 1fd03de
Drop mesh-file submissions; STEP-only candidates
Browse filesRemove the triangle-mesh submission path (and its OpenSCAD framing) from the
submit gate, UI copy, and metrics page; candidates are STEP/BREP only. Pin the
eval image's cadgenbench to a47c951.
Co-authored-by: Cursor <cursoragent@cursor.com>
- Dockerfile +1 -1
- README.md +1 -1
- app.py +4 -5
- metrics_page.py +3 -7
- submit.py +6 -24
- tests/test_submit.py +0 -17
Dockerfile
CHANGED
|
@@ -41,7 +41,7 @@ RUN pip install --no-cache-dir -r /tmp/requirements.txt \
|
|
| 41 |
# image rebuild picks up the latest code (pre-v1: always-updated). Lock
|
| 42 |
# to a specific commit SHA at the v1 release so published scores are
|
| 43 |
# reproducible (see space-setup/post-gt-swap.md Stage F).
|
| 44 |
-
ARG CADGENBENCH_SHA=
|
| 45 |
# Cache-bust the install below whenever the tracked ref moves: the
|
| 46 |
# GitHub commits endpoint's response changes with each new commit on
|
| 47 |
# `main`, so BuildKit re-fetches and invalidates the cached pip layer.
|
|
|
|
| 41 |
# image rebuild picks up the latest code (pre-v1: always-updated). Lock
|
| 42 |
# to a specific commit SHA at the v1 release so published scores are
|
| 43 |
# reproducible (see space-setup/post-gt-swap.md Stage F).
|
| 44 |
+
ARG CADGENBENCH_SHA=a47c951
|
| 45 |
# Cache-bust the install below whenever the tracked ref moves: the
|
| 46 |
# GitHub commits endpoint's response changes with each new commit on
|
| 47 |
# `main`, so BuildKit re-fetches and invalidates the cached pip layer.
|
README.md
CHANGED
|
@@ -30,6 +30,6 @@ textual or visual description of a mechanical part into a valid,
|
|
| 30 |
geometrically correct 3D model?
|
| 31 |
|
| 32 |
Browse the **Leaderboard** tab for ranked submissions, or upload your
|
| 33 |
-
own STEP/BREP
|
| 34 |
publishes a row with a per-submission report. The **About** tab covers
|
| 35 |
the scoring axes and links to the benchmark code and datasets.
|
|
|
|
| 30 |
geometrically correct 3D model?
|
| 31 |
|
| 32 |
Browse the **Leaderboard** tab for ranked submissions, or upload your
|
| 33 |
+
own STEP/BREP candidate files on the **Submit** tab: the Space runs the eval and
|
| 34 |
publishes a row with a per-submission report. The **About** tab covers
|
| 35 |
the scoring axes and links to the benchmark code and datasets.
|
app.py
CHANGED
|
@@ -108,8 +108,8 @@ turn a description of a mechanical part into a valid, geometrically
|
|
| 108 |
correct 3D model.
|
| 109 |
|
| 110 |
- **Reference baseline**: an iterative AI agent that writes CAD code.
|
| 111 |
-
- **Submission flow**: upload a zip of per-fixture
|
| 112 |
-
|
| 113 |
runs the eval and appends a row to the submissions dataset.
|
| 114 |
- **Datasets**: fixture inputs in
|
| 115 |
[`{HF_DATA_REPO}`](https://huggingface.co/datasets/{HF_DATA_REPO});
|
|
@@ -1152,10 +1152,9 @@ with gr.Blocks(
|
|
| 1152 |
f"""
|
| 1153 |
**Submission format.** A single zip with:
|
| 1154 |
|
| 1155 |
-
- one folder per sample in `{HF_DATA_REPO}`; include one
|
| 1156 |
file for samples where your system produced a result. Accepted names:
|
| 1157 |
-
`output.step`
|
| 1158 |
-
`output.3mf`, or `output.ply`. Missing `output.*` scores zero for that
|
| 1159 |
sample;
|
| 1160 |
- a top-level `meta.json`:
|
| 1161 |
|
|
|
|
| 108 |
correct 3D model.
|
| 109 |
|
| 110 |
- **Reference baseline**: an iterative AI agent that writes CAD code.
|
| 111 |
+
- **Submission flow**: upload a zip of per-fixture STEP/BREP candidate files;
|
| 112 |
+
the Space
|
| 113 |
runs the eval and appends a row to the submissions dataset.
|
| 114 |
- **Datasets**: fixture inputs in
|
| 115 |
[`{HF_DATA_REPO}`](https://huggingface.co/datasets/{HF_DATA_REPO});
|
|
|
|
| 1152 |
f"""
|
| 1153 |
**Submission format.** A single zip with:
|
| 1154 |
|
| 1155 |
+
- one folder per sample in `{HF_DATA_REPO}`; include one STEP candidate
|
| 1156 |
file for samples where your system produced a result. Accepted names:
|
| 1157 |
+
`output.step` or `output.stp`. Missing `output.step` scores zero for that
|
|
|
|
| 1158 |
sample;
|
| 1159 |
- a top-level `meta.json`:
|
| 1160 |
|
metrics_page.py
CHANGED
|
@@ -147,7 +147,7 @@ def build_metrics_page() -> str:
|
|
| 147 |
tag="CAD Score",
|
| 148 |
title="How one part is scored",
|
| 149 |
body=(
|
| 150 |
-
"<p>CADGenBench scores a generated part (STEP/BREP
|
| 151 |
"against one ground-truth STEP. First a hard <b>validity gate</b>; if it "
|
| 152 |
"passes, the <b>CAD Score</b> is a weighted mean of three "
|
| 153 |
"independent metrics, each in [0, 1].</p>"
|
|
@@ -194,7 +194,8 @@ def build_metrics_page() -> str:
|
|
| 194 |
"failure sets <code>is_valid = False</code> and forces "
|
| 195 |
"<code>cad_score = 0</code>, so an invalid solid never beats a worse "
|
| 196 |
"but valid one.</p>"
|
| 197 |
-
"<p><
|
|
|
|
| 198 |
"<ol>"
|
| 199 |
"<li><b>Well-formed BREP</b>: no per-face / edge / vertex errors "
|
| 200 |
"(self-intersecting wires, edges off their surface, etc.).</li>"
|
|
@@ -204,11 +205,6 @@ def build_metrics_page() -> str:
|
|
| 204 |
"to a manifold, closed (3F = 2E), orientation-consistent triangle "
|
| 205 |
"mesh.</li>"
|
| 206 |
"</ol>"
|
| 207 |
-
"<p><b>Mesh candidates</b> (<code>output.stl</code>, "
|
| 208 |
-
"<code>output.obj</code>, <code>output.off</code>, "
|
| 209 |
-
"<code>output.3mf</code>, or <code>output.ply</code>) skip BREP "
|
| 210 |
-
"checks and must directly satisfy the mesh gate: manifold, closed, "
|
| 211 |
-
"and orientation-consistent.</p>"
|
| 212 |
),
|
| 213 |
)
|
| 214 |
|
|
|
|
| 147 |
tag="CAD Score",
|
| 148 |
title="How one part is scored",
|
| 149 |
body=(
|
| 150 |
+
"<p>CADGenBench scores a generated part (STEP/BREP) "
|
| 151 |
"against one ground-truth STEP. First a hard <b>validity gate</b>; if it "
|
| 152 |
"passes, the <b>CAD Score</b> is a weighted mean of three "
|
| 153 |
"independent metrics, each in [0, 1].</p>"
|
|
|
|
| 194 |
"failure sets <code>is_valid = False</code> and forces "
|
| 195 |
"<code>cad_score = 0</code>, so an invalid solid never beats a worse "
|
| 196 |
"but valid one.</p>"
|
| 197 |
+
"<p>A candidate (<code>output.step</code> / <code>output.stp</code>) "
|
| 198 |
+
"must pass all of:</p>"
|
| 199 |
"<ol>"
|
| 200 |
"<li><b>Well-formed BREP</b>: no per-face / edge / vertex errors "
|
| 201 |
"(self-intersecting wires, edges off their surface, etc.).</li>"
|
|
|
|
| 205 |
"to a manifold, closed (3F = 2E), orientation-consistent triangle "
|
| 206 |
"mesh.</li>"
|
| 207 |
"</ol>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
),
|
| 209 |
)
|
| 210 |
|
submit.py
CHANGED
|
@@ -44,12 +44,11 @@ Validation gates, in order:
|
|
| 44 |
4. Fixture-set match: the set of folders inside the zip equals the
|
| 45 |
set of fixture directories in :func:`cadgenbench.common.paths.data_inputs_dir`
|
| 46 |
(no missing, no extras).
|
| 47 |
-
5. Candidate parseability: any present ``<fixture>/output.
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
but not actually loadable as their declared candidate kind.
|
| 53 |
|
| 54 |
Hub-write ordering (after validation passes):
|
| 55 |
|
|
@@ -104,7 +103,6 @@ from typing import Any
|
|
| 104 |
import cadgenbench
|
| 105 |
import gradio as gr
|
| 106 |
from cadgenbench.common.paths import data_inputs_dir
|
| 107 |
-
from cadgenbench.common.mesh import MESH_FILE_SUFFIXES, mesh_from_file
|
| 108 |
from cadgenbench.common.validity import parse_step
|
| 109 |
from huggingface_hub import (
|
| 110 |
CommitOperationAdd,
|
|
@@ -731,14 +729,6 @@ def _validate_candidates_parseable(unpacked: Path, fixture_names: set[str]) -> N
|
|
| 731 |
f"Sample `{name}` has an `{candidate.name}` that is not "
|
| 732 |
f"loadable as STEP geometry: {e}"
|
| 733 |
) from e
|
| 734 |
-
elif suffix in MESH_FILE_SUFFIXES:
|
| 735 |
-
try:
|
| 736 |
-
mesh_from_file(candidate)
|
| 737 |
-
except Exception as e: # noqa: BLE001 - normalize user-facing error
|
| 738 |
-
raise _ValidationError(
|
| 739 |
-
f"Sample `{name}` has an `{candidate.name}` that is not "
|
| 740 |
-
f"loadable as a triangle mesh: {e}"
|
| 741 |
-
) from e
|
| 742 |
else: # pragma: no cover - _candidate_path constrains suffixes
|
| 743 |
raise _ValidationError(
|
| 744 |
f"Sample `{name}` uses unsupported candidate file `{candidate.name}`."
|
|
@@ -751,19 +741,11 @@ def _validate_candidates_parseable(unpacked: Path, fixture_names: set[str]) -> N
|
|
| 751 |
|
| 752 |
|
| 753 |
def _candidate_path(fixture_dir: Path) -> Path | None:
|
| 754 |
-
"""Return the submitted candidate for *fixture_dir*, if present.
|
| 755 |
-
|
| 756 |
-
STEP/BREP wins when both a STEP and a mesh are present, matching the
|
| 757 |
-
evaluator's candidate discovery policy.
|
| 758 |
-
"""
|
| 759 |
for name in ("output.step", "output.stp"):
|
| 760 |
candidate = fixture_dir / name
|
| 761 |
if candidate.is_file():
|
| 762 |
return candidate
|
| 763 |
-
for suffix in sorted(MESH_FILE_SUFFIXES):
|
| 764 |
-
candidate = fixture_dir / f"output{suffix}"
|
| 765 |
-
if candidate.is_file():
|
| 766 |
-
return candidate
|
| 767 |
return None
|
| 768 |
|
| 769 |
|
|
|
|
| 44 |
4. Fixture-set match: the set of folders inside the zip equals the
|
| 45 |
set of fixture directories in :func:`cadgenbench.common.paths.data_inputs_dir`
|
| 46 |
(no missing, no extras).
|
| 47 |
+
5. Candidate parseability: any present ``<fixture>/output.step`` candidate
|
| 48 |
+
can be cheaply loaded as STEP geometry. A missing candidate is allowed and
|
| 49 |
+
scores zero via the evaluator's ``status="missing"`` path. Per-fixture
|
| 50 |
+
validity (watertight, etc.) is *not* checked here; this gate only rejects
|
| 51 |
+
files that are present but not actually loadable as STEP.
|
|
|
|
| 52 |
|
| 53 |
Hub-write ordering (after validation passes):
|
| 54 |
|
|
|
|
| 103 |
import cadgenbench
|
| 104 |
import gradio as gr
|
| 105 |
from cadgenbench.common.paths import data_inputs_dir
|
|
|
|
| 106 |
from cadgenbench.common.validity import parse_step
|
| 107 |
from huggingface_hub import (
|
| 108 |
CommitOperationAdd,
|
|
|
|
| 729 |
f"Sample `{name}` has an `{candidate.name}` that is not "
|
| 730 |
f"loadable as STEP geometry: {e}"
|
| 731 |
) from e
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 732 |
else: # pragma: no cover - _candidate_path constrains suffixes
|
| 733 |
raise _ValidationError(
|
| 734 |
f"Sample `{name}` uses unsupported candidate file `{candidate.name}`."
|
|
|
|
| 741 |
|
| 742 |
|
| 743 |
def _candidate_path(fixture_dir: Path) -> Path | None:
|
| 744 |
+
"""Return the submitted STEP candidate for *fixture_dir*, if present."""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 745 |
for name in ("output.step", "output.stp"):
|
| 746 |
candidate = fixture_dir / name
|
| 747 |
if candidate.is_file():
|
| 748 |
return candidate
|
|
|
|
|
|
|
|
|
|
|
|
|
| 749 |
return None
|
| 750 |
|
| 751 |
|
tests/test_submit.py
CHANGED
|
@@ -416,20 +416,3 @@ def test_validate_steps_checks_present_output_stp(tmp_path: Path, monkeypatch):
|
|
| 416 |
submit._validate_candidates_parseable(tmp_path, {"101"})
|
| 417 |
|
| 418 |
assert calls == [candidate]
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
def test_validate_steps_checks_present_output_mesh(tmp_path: Path, monkeypatch):
|
| 422 |
-
"""Mesh candidates are cheap-load checked instead of STEP-parsed."""
|
| 423 |
-
fixture = tmp_path / "101"
|
| 424 |
-
fixture.mkdir()
|
| 425 |
-
candidate = fixture / "output.stl"
|
| 426 |
-
candidate.write_text("solid empty\nendsolid empty\n")
|
| 427 |
-
step_calls: list[Path] = []
|
| 428 |
-
mesh_calls: list[Path] = []
|
| 429 |
-
monkeypatch.setattr(submit, "parse_step", lambda p: step_calls.append(p))
|
| 430 |
-
monkeypatch.setattr(submit, "mesh_from_file", lambda p: mesh_calls.append(p))
|
| 431 |
-
|
| 432 |
-
submit._validate_candidates_parseable(tmp_path, {"101"})
|
| 433 |
-
|
| 434 |
-
assert step_calls == []
|
| 435 |
-
assert mesh_calls == [candidate]
|
|
|
|
| 416 |
submit._validate_candidates_parseable(tmp_path, {"101"})
|
| 417 |
|
| 418 |
assert calls == [candidate]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|