Spaces:
Running
Running
fix: parse multipart/mixed response from /understand (was expecting JSON, got multipart)
Browse files
app.py
CHANGED
|
@@ -151,36 +151,46 @@ def _caption_via_understand(audio_path, timeout=600, cancel_check=None):
|
|
| 151 |
logger.warning("[Caption] %s: /understand -> %s (%.0fs)", fname, status, elapsed)
|
| 152 |
return None
|
| 153 |
|
| 154 |
-
#
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
|
| 178 |
-
|
| 179 |
-
logger.
|
| 180 |
-
return result
|
| 181 |
|
| 182 |
-
logger.warning("[Caption] %s: no caption
|
| 183 |
-
fname, list(poll_data.keys()) if isinstance(poll_data, dict) else "N/A")
|
| 184 |
return None
|
| 185 |
|
| 186 |
|
|
|
|
| 151 |
logger.warning("[Caption] %s: /understand -> %s (%.0fs)", fname, status, elapsed)
|
| 152 |
return None
|
| 153 |
|
| 154 |
+
# Fetch result — /understand returns multipart/mixed (JSON + latents)
|
| 155 |
+
try:
|
| 156 |
+
r = _fetch_result(job_id, timeout=120)
|
| 157 |
+
if r.status_code != 200:
|
| 158 |
+
logger.warning("[Caption] %s: result fetch HTTP %d", fname, r.status_code)
|
| 159 |
+
return None
|
| 160 |
+
|
| 161 |
+
content_type = r.headers.get("Content-Type", "")
|
| 162 |
+
|
| 163 |
+
# multipart/mixed: extract JSON part (caption metadata)
|
| 164 |
+
if "multipart" in content_type:
|
| 165 |
+
boundary = None
|
| 166 |
+
for part in content_type.split(";"):
|
| 167 |
+
part = part.strip()
|
| 168 |
+
if part.startswith("boundary="):
|
| 169 |
+
boundary = part.split("=", 1)[1].strip('"')
|
| 170 |
+
if boundary:
|
| 171 |
+
import re
|
| 172 |
+
parts = r.content.split(f"--{boundary}".encode())
|
| 173 |
+
for part in parts:
|
| 174 |
+
if b"application/json" in part:
|
| 175 |
+
json_start = part.find(b"{")
|
| 176 |
+
json_end = part.rfind(b"}") + 1
|
| 177 |
+
if json_start >= 0 and json_end > json_start:
|
| 178 |
+
data = json.loads(part[json_start:json_end])
|
| 179 |
+
if isinstance(data, dict) and data.get("caption"):
|
| 180 |
+
logger.info("[Caption] %s: got caption (%d chars)",
|
| 181 |
+
fname, len(data["caption"]))
|
| 182 |
+
return data
|
| 183 |
+
|
| 184 |
+
# Plain JSON fallback
|
| 185 |
+
if r.text.strip():
|
| 186 |
+
data = r.json()
|
| 187 |
+
if isinstance(data, dict) and data.get("caption"):
|
| 188 |
+
return data
|
| 189 |
|
| 190 |
+
except Exception as exc:
|
| 191 |
+
logger.warning("[Caption] %s: result parse failed: %s", fname, exc)
|
|
|
|
| 192 |
|
| 193 |
+
logger.warning("[Caption] %s: no caption extracted from result", fname)
|
|
|
|
| 194 |
return None
|
| 195 |
|
| 196 |
|