Spaces:
Running
Running
LM captioning: 5h timeout per file, check feasibility before starting
Browse files
app.py
CHANGED
|
@@ -662,21 +662,25 @@ def gradio_main():
|
|
| 662 |
|
| 663 |
if audio_to_caption and use_lm_caption and _server_ok():
|
| 664 |
# --- Mode: GGUF LM captioning (slow, best quality) ---
|
|
|
|
|
|
|
| 665 |
est_total = int(total_dur * 7 + len(audio_to_caption) * 600)
|
| 666 |
-
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
file_dur = _lr.get_duration(path=full_path)
|
| 673 |
-
file_timeout = int(file_dur * 7 + 600)
|
| 674 |
-
_log(f" {audio_fname}: LM captioning (timeout {file_timeout // 60} min)...")
|
| 675 |
yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 680 |
if caption_data:
|
| 681 |
bpm_s = caption_data.get("bpm", "?")
|
| 682 |
key_s = caption_data.get("keyscale", caption_data.get("key", "?"))
|
|
@@ -687,7 +691,7 @@ def gradio_main():
|
|
| 687 |
_log(f" {audio_fname}: LM failed, will use fast captioning")
|
| 688 |
yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
|
| 689 |
|
| 690 |
-
|
| 691 |
# --- Mode: Fast captioning (CLAP + Whisper + librosa) ---
|
| 692 |
_log(f"[INFO] Fast captioning {len(audio_to_caption)} files "
|
| 693 |
f"(CLAP tags + lyrics + BPM)...")
|
|
|
|
| 662 |
|
| 663 |
if audio_to_caption and use_lm_caption and _server_ok():
|
| 664 |
# --- Mode: GGUF LM captioning (slow, best quality) ---
|
| 665 |
+
# 5h total budget — check if feasible first
|
| 666 |
+
LM_TIMEOUT = 18000 # 5h per file
|
| 667 |
est_total = int(total_dur * 7 + len(audio_to_caption) * 600)
|
| 668 |
+
if est_total > LM_TIMEOUT:
|
| 669 |
+
_log(f"[WARN] Estimated {est_total // 60} min for LM captioning "
|
| 670 |
+
f"— exceeds 5h, switching to fast captioning")
|
| 671 |
+
use_lm_caption = False
|
| 672 |
+
else:
|
| 673 |
+
_log(f"[INFO] LM captioning {len(audio_to_caption)} files...")
|
|
|
|
|
|
|
|
|
|
| 674 |
yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
|
| 675 |
+
for audio_fname, full_path, sidecar_json in audio_to_caption:
|
| 676 |
+
if _training_cancel.is_set():
|
| 677 |
+
break
|
| 678 |
+
_log(f" {audio_fname}: LM captioning...")
|
| 679 |
+
yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
|
| 680 |
+
caption_data = _caption_via_understand(
|
| 681 |
+
full_path, timeout=LM_TIMEOUT,
|
| 682 |
+
cancel_check=lambda: _training_cancel.is_set(),
|
| 683 |
+
)
|
| 684 |
if caption_data:
|
| 685 |
bpm_s = caption_data.get("bpm", "?")
|
| 686 |
key_s = caption_data.get("keyscale", caption_data.get("key", "?"))
|
|
|
|
| 691 |
_log(f" {audio_fname}: LM failed, will use fast captioning")
|
| 692 |
yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
|
| 693 |
|
| 694 |
+
if audio_to_caption and not use_lm_caption:
|
| 695 |
# --- Mode: Fast captioning (CLAP + Whisper + librosa) ---
|
| 696 |
_log(f"[INFO] Fast captioning {len(audio_to_caption)} files "
|
| 697 |
f"(CLAP tags + lyrics + BPM)...")
|