Spaces:

HuggingAI4Engineering
/

CADGenBench

Running

Michael Rabinovich Cursor commited on 1 day ago

Commit

04e4262

1 Parent(s): 1fd03de

Drop mesh-file submissions; STEP-only candidates

Remove the triangle-mesh submission path (and its OpenSCAD framing) from the
submit gate, UI copy, and metrics page; candidates are STEP/BREP only. Pin the
eval image's cadgenbench to a47c951.

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (6) hide show

Dockerfile +1 -1
README.md +1 -1
app.py +4 -5
metrics_page.py +3 -7
submit.py +6 -24
tests/test_submit.py +0 -17

Dockerfile CHANGED Viewed

@@ -41,7 +41,7 @@ RUN pip install --no-cache-dir -r /tmp/requirements.txt \
 # image rebuild picks up the latest code (pre-v1: always-updated). Lock
 # to a specific commit SHA at the v1 release so published scores are
 # reproducible (see space-setup/post-gt-swap.md Stage F).
-ARG CADGENBENCH_SHA=e7a60e0
 # Cache-bust the install below whenever the tracked ref moves: the
 # GitHub commits endpoint's response changes with each new commit on
 # `main`, so BuildKit re-fetches and invalidates the cached pip layer.

 # image rebuild picks up the latest code (pre-v1: always-updated). Lock
 # to a specific commit SHA at the v1 release so published scores are
 # reproducible (see space-setup/post-gt-swap.md Stage F).
+ARG CADGENBENCH_SHA=a47c951
 # Cache-bust the install below whenever the tracked ref moves: the
 # GitHub commits endpoint's response changes with each new commit on
 # `main`, so BuildKit re-fetches and invalidates the cached pip layer.

README.md CHANGED Viewed

@@ -30,6 +30,6 @@ textual or visual description of a mechanical part into a valid,
 geometrically correct 3D model?
 Browse the **Leaderboard** tab for ranked submissions, or upload your
-own STEP/BREP or mesh candidate files on the **Submit** tab: the Space runs the eval and
 publishes a row with a per-submission report. The **About** tab covers
 the scoring axes and links to the benchmark code and datasets.

 geometrically correct 3D model?
 Browse the **Leaderboard** tab for ranked submissions, or upload your
+own STEP/BREP candidate files on the **Submit** tab: the Space runs the eval and
 publishes a row with a per-submission report. The **About** tab covers
 the scoring axes and links to the benchmark code and datasets.

app.py CHANGED Viewed

@@ -108,8 +108,8 @@ turn a description of a mechanical part into a valid, geometrically
 correct 3D model.
 - **Reference baseline**: an iterative AI agent that writes CAD code.
-- **Submission flow**: upload a zip of per-fixture candidate files (STEP/BREP
-  or mesh); the Space
   runs the eval and appends a row to the submissions dataset.
 - **Datasets**: fixture inputs in
   [`{HF_DATA_REPO}`](https://huggingface.co/datasets/{HF_DATA_REPO});
@@ -1152,10 +1152,9 @@ with gr.Blocks(
             f"""
 **Submission format.** A single zip with:
-- one folder per sample in `{HF_DATA_REPO}`; include one accepted candidate
   file for samples where your system produced a result. Accepted names:
-  `output.step`, `output.stp`, `output.stl`, `output.obj`, `output.off`,
-  `output.3mf`, or `output.ply`. Missing `output.*` scores zero for that
   sample;
 - a top-level `meta.json`:

 correct 3D model.
 - **Reference baseline**: an iterative AI agent that writes CAD code.
+- **Submission flow**: upload a zip of per-fixture STEP/BREP candidate files;
+  the Space
   runs the eval and appends a row to the submissions dataset.
 - **Datasets**: fixture inputs in
   [`{HF_DATA_REPO}`](https://huggingface.co/datasets/{HF_DATA_REPO});
             f"""
 **Submission format.** A single zip with:
+- one folder per sample in `{HF_DATA_REPO}`; include one STEP candidate
   file for samples where your system produced a result. Accepted names:
+  `output.step` or `output.stp`. Missing `output.step` scores zero for that
   sample;
 - a top-level `meta.json`:

metrics_page.py CHANGED Viewed

@@ -147,7 +147,7 @@ def build_metrics_page() -> str:
         tag="CAD Score",
         title="How one part is scored",
         body=(
-            "<p>CADGenBench scores a generated part (STEP/BREP or mesh) "
             "against one ground-truth STEP. First a hard <b>validity gate</b>; if it "
             "passes, the <b>CAD Score</b> is a weighted mean of three "
             "independent metrics, each in [0, 1].</p>"
@@ -194,7 +194,8 @@ def build_metrics_page() -> str:
             "failure sets <code>is_valid = False</code> and forces "
             "<code>cad_score = 0</code>, so an invalid solid never beats a worse "
             "but valid one.</p>"
-            "<p><b>STEP/BREP candidates</b> must pass all of:</p>"
             "<ol>"
             "<li><b>Well-formed BREP</b>: no per-face / edge / vertex errors "
             "(self-intersecting wires, edges off their surface, etc.).</li>"
@@ -204,11 +205,6 @@ def build_metrics_page() -> str:
             "to a manifold, closed (3F = 2E), orientation-consistent triangle "
             "mesh.</li>"
             "</ol>"
-            "<p><b>Mesh candidates</b> (<code>output.stl</code>, "
-            "<code>output.obj</code>, <code>output.off</code>, "
-            "<code>output.3mf</code>, or <code>output.ply</code>) skip BREP "
-            "checks and must directly satisfy the mesh gate: manifold, closed, "
-            "and orientation-consistent.</p>"
         ),
     )

         tag="CAD Score",
         title="How one part is scored",
         body=(
+            "<p>CADGenBench scores a generated part (STEP/BREP) "
             "against one ground-truth STEP. First a hard <b>validity gate</b>; if it "
             "passes, the <b>CAD Score</b> is a weighted mean of three "
             "independent metrics, each in [0, 1].</p>"
             "failure sets <code>is_valid = False</code> and forces "
             "<code>cad_score = 0</code>, so an invalid solid never beats a worse "
             "but valid one.</p>"
+            "<p>A candidate (<code>output.step</code> / <code>output.stp</code>) "
+            "must pass all of:</p>"
             "<ol>"
             "<li><b>Well-formed BREP</b>: no per-face / edge / vertex errors "
             "(self-intersecting wires, edges off their surface, etc.).</li>"
             "to a manifold, closed (3F = 2E), orientation-consistent triangle "
             "mesh.</li>"
             "</ol>"
         ),
     )

submit.py CHANGED Viewed

@@ -44,12 +44,11 @@ Validation gates, in order:
 4. Fixture-set match: the set of folders inside the zip equals the
    set of fixture directories in :func:`cadgenbench.common.paths.data_inputs_dir`
    (no missing, no extras).
-5. Candidate parseability: any present ``<fixture>/output.*`` candidate
-   (STEP/BREP or accepted mesh format) can be cheaply loaded. A missing
-   candidate is allowed and scores zero via the evaluator's
-   ``status="missing"`` path. Per-fixture validity (watertight, manifold,
-   etc.) is *not* checked here; this gate only rejects files that are present
-   but not actually loadable as their declared candidate kind.
 Hub-write ordering (after validation passes):
@@ -104,7 +103,6 @@ from typing import Any
 import cadgenbench
 import gradio as gr
 from cadgenbench.common.paths import data_inputs_dir
-from cadgenbench.common.mesh import MESH_FILE_SUFFIXES, mesh_from_file
 from cadgenbench.common.validity import parse_step
 from huggingface_hub import (
     CommitOperationAdd,
@@ -731,14 +729,6 @@ def _validate_candidates_parseable(unpacked: Path, fixture_names: set[str]) -> N
                     f"Sample `{name}` has an `{candidate.name}` that is not "
                     f"loadable as STEP geometry: {e}"
                 ) from e
-        elif suffix in MESH_FILE_SUFFIXES:
-            try:
-                mesh_from_file(candidate)
-            except Exception as e:  # noqa: BLE001 - normalize user-facing error
-                raise _ValidationError(
-                    f"Sample `{name}` has an `{candidate.name}` that is not "
-                    f"loadable as a triangle mesh: {e}"
-                ) from e
         else:  # pragma: no cover - _candidate_path constrains suffixes
             raise _ValidationError(
                 f"Sample `{name}` uses unsupported candidate file `{candidate.name}`."
@@ -751,19 +741,11 @@ def _validate_candidates_parseable(unpacked: Path, fixture_names: set[str]) -> N
 def _candidate_path(fixture_dir: Path) -> Path | None:
-    """Return the submitted candidate for *fixture_dir*, if present.
-    STEP/BREP wins when both a STEP and a mesh are present, matching the
-    evaluator's candidate discovery policy.
-    """
     for name in ("output.step", "output.stp"):
         candidate = fixture_dir / name
         if candidate.is_file():
             return candidate
-    for suffix in sorted(MESH_FILE_SUFFIXES):
-        candidate = fixture_dir / f"output{suffix}"
-        if candidate.is_file():
-            return candidate
     return None

 4. Fixture-set match: the set of folders inside the zip equals the
    set of fixture directories in :func:`cadgenbench.common.paths.data_inputs_dir`
    (no missing, no extras).
+5. Candidate parseability: any present ``<fixture>/output.step`` candidate
+   can be cheaply loaded as STEP geometry. A missing candidate is allowed and
+   scores zero via the evaluator's ``status="missing"`` path. Per-fixture
+   validity (watertight, etc.) is *not* checked here; this gate only rejects
+   files that are present but not actually loadable as STEP.
 Hub-write ordering (after validation passes):
 import cadgenbench
 import gradio as gr
 from cadgenbench.common.paths import data_inputs_dir
 from cadgenbench.common.validity import parse_step
 from huggingface_hub import (
     CommitOperationAdd,
                     f"Sample `{name}` has an `{candidate.name}` that is not "
                     f"loadable as STEP geometry: {e}"
                 ) from e
         else:  # pragma: no cover - _candidate_path constrains suffixes
             raise _ValidationError(
                 f"Sample `{name}` uses unsupported candidate file `{candidate.name}`."
 def _candidate_path(fixture_dir: Path) -> Path | None:
+    """Return the submitted STEP candidate for *fixture_dir*, if present."""
     for name in ("output.step", "output.stp"):
         candidate = fixture_dir / name
         if candidate.is_file():
             return candidate
     return None

tests/test_submit.py CHANGED Viewed

@@ -416,20 +416,3 @@ def test_validate_steps_checks_present_output_stp(tmp_path: Path, monkeypatch):
     submit._validate_candidates_parseable(tmp_path, {"101"})
     assert calls == [candidate]
-def test_validate_steps_checks_present_output_mesh(tmp_path: Path, monkeypatch):
-    """Mesh candidates are cheap-load checked instead of STEP-parsed."""
-    fixture = tmp_path / "101"
-    fixture.mkdir()
-    candidate = fixture / "output.stl"
-    candidate.write_text("solid empty\nendsolid empty\n")
-    step_calls: list[Path] = []
-    mesh_calls: list[Path] = []
-    monkeypatch.setattr(submit, "parse_step", lambda p: step_calls.append(p))
-    monkeypatch.setattr(submit, "mesh_from_file", lambda p: mesh_calls.append(p))
-    submit._validate_candidates_parseable(tmp_path, {"101"})
-    assert step_calls == []
-    assert mesh_calls == [candidate]


416	submit._validate_candidates_parseable(tmp_path, {"101"})
417
418	assert calls == [candidate]