Spaces:

WeReCooking
/

ACE-Step-CPU

Running

App Files Files Community

Nekochu commited on 26 days ago

Commit

2dc2899

1 Parent(s): 567a93e

rewrite app.py for ace-server HTTP API, no torch

Browse files

Files changed (1) hide show

app.py +100 -439

app.py CHANGED Viewed

@@ -1,461 +1,122 @@
-"""
-ACE-Step 1.5 Music Generation + LoRA Training (CPU)
-Runs on HuggingFace Spaces free CPU tier.
-"""
 import os
-import sys
-import gc
 import time
 import tempfile
-import shutil
-from pathlib import Path
-# Force CPU, no CUDA
-os.environ["CUDA_VISIBLE_DEVICES"] = ""
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-os.environ["TORCHAUDIO_USE_BACKEND"] = "ffmpeg"
-os.environ["ACESTEP_DISABLE_TQDM"] = "1"
-import torch
-torch.set_default_dtype(torch.float32)
-import numpy as np
 import gradio as gr
-import soundfile as sf
-# ---------------------------------------------------------------------------
-# Clone ACE-Step repo if not present
-# ---------------------------------------------------------------------------
-REPO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ace-step-source")
-if not os.path.isdir(REPO_DIR):
-    print("[Setup] Cloning ACE-Step 1.5 repository...")
-    os.system(f"git clone --depth 1 https://github.com/ace-step/ACE-Step-1.5 {REPO_DIR}")
-# Add repo to path
-if REPO_DIR not in sys.path:
-    sys.path.insert(0, REPO_DIR)
-# ---------------------------------------------------------------------------
-# Lazy-load handler (downloads model on first use)
-# ---------------------------------------------------------------------------
-_dit_handler = None
-_init_status = None
-CHECKPOINT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "checkpoints")
-LORA_OUTPUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "lora_output")
-CURRENT_LM_SIZE = "1.7B"  # Track current LM size
-def get_handler():
-    """Get or initialize the ACE-Step handler (lazy, first call downloads model)."""
-    global _dit_handler, _init_status
-    if _dit_handler is not None and _dit_handler.model is not None:
-        return _dit_handler, _init_status
-    from acestep.handler import AceStepHandler
-    from acestep.model_downloader import ensure_main_model
-    print("[Init] Ensuring model is downloaded...")
-    success, msg = ensure_main_model(
-        checkpoints_dir=Path(CHECKPOINT_DIR),
-        prefer_source="huggingface",
-    )
-    print(f"[Init] Model download: {msg}")
-    if not success:
-        _init_status = f"Model download failed: {msg}"
-        return None, _init_status
-    _dit_handler = AceStepHandler()
-    project_root = os.path.dirname(os.path.abspath(__file__))
-    os.environ["ACESTEP_PROJECT_ROOT"] = project_root
-    status, ok = _dit_handler.initialize_service(
-        project_root=project_root,
-        config_path="acestep-v15-turbo",
-        device="cpu",
-        use_flash_attention=False,
-        compile_model=False,
-        offload_to_cpu=False,
-        offload_dit_to_cpu=False,
-        quantization=None,
-        use_mlx_dit=False,
-    )
-    _init_status = status
-    if not ok:
-        print(f"[Init] FAILED: {status}")
-        _dit_handler = None
-        return None, _init_status
-    # Force float32 on everything
-    _dit_handler.dtype = torch.float32
-    if _dit_handler.model is not None:
-        _dit_handler.model = _dit_handler.model.float().to("cpu")
-    if _dit_handler.vae is not None:
-        _dit_handler.vae = _dit_handler.vae.float().to("cpu")
-    if _dit_handler.text_encoder is not None:
-        _dit_handler.text_encoder = _dit_handler.text_encoder.float().to("cpu")
-    print(f"[Init] OK: {status}")
-    return _dit_handler, _init_status
-def get_trained_loras():
-    """List available trained LoRAs."""
-    loras = ["None (no LoRA)"]
-    if os.path.isdir(LORA_OUTPUT_DIR):
-        for name in sorted(os.listdir(LORA_OUTPUT_DIR)):
-            lora_dir = os.path.join(LORA_OUTPUT_DIR, name)
-            if os.path.isdir(lora_dir):
-                # Check for any .safetensors or .pt files
-                for f in os.listdir(lora_dir):
-                    if f.endswith((".safetensors", ".pt", ".bin")):
-                        loras.append(name)
-                        break
-    return loras
-# ---------------------------------------------------------------------------
-# Generate Tab
-# ---------------------------------------------------------------------------
-def generate_music(
-    caption,
-    lyrics,
-    instrumental,
-    bpm,
-    duration,
-    seed,
-    inference_steps,
-    lm_size,
-    lora_choice,
-    progress=gr.Progress(track_tqdm=True),
-):
-    """Generate music from text prompt on CPU."""
     t0 = time.time()
-    handler, status = get_handler()
-    if handler is None:
-        return None, f"Model not ready: {status}"
-    # Apply trained LoRA if selected
-    if lora_choice and lora_choice != "None (no LoRA)":
-        lora_dir = os.path.join(LORA_OUTPUT_DIR, lora_choice)
-        if os.path.isdir(lora_dir):
-            try:
-                handler.load_lora(lora_dir)
-                print(f"[Gen] Loaded LoRA: {lora_choice}")
-            except Exception as e:
-                print(f"[Gen] LoRA load failed: {e}")
-    # TODO: LM size switching requires re-downloading the LM model
-    # For now, log the selected size
-    if lm_size != CURRENT_LM_SIZE:
-        print(f"[Gen] LM size {lm_size} requested (current: {CURRENT_LM_SIZE})")
-    # Clamp values
-    duration = max(10, min(float(duration), 120))  # cap at 120s for CPU
-    inference_steps = max(1, min(int(inference_steps), 32))
-    bpm_val = int(bpm) if bpm and int(bpm) > 0 else None
-    seed_val = int(seed) if seed and int(seed) >= 0 else -1
     try:
-        result = handler.generate_music(
-            captions=caption or "upbeat electronic dance music",
-            lyrics=lyrics or "[Instrumental]",
-            bpm=bpm_val,
-            audio_duration=duration,
-            inference_steps=inference_steps,
-            guidance_scale=1.0,  # turbo model, no CFG needed
-            use_random_seed=(seed_val < 0),
-            seed=str(seed_val) if seed_val >= 0 else "",
-            batch_size=1,
-            task_type="text2music",
-            vocal_language="en",
-            shift=1.0,
-            infer_method="ode",
-            progress=None,
-        )
-        elapsed = time.time() - t0
-        if not result.get("success", False):
-            error = result.get("error", result.get("status_message", "Unknown error"))
-            return None, f"Generation failed: {error}"
-        audios = result.get("audios", [])
-        if not audios:
-            return None, "No audio generated"
-        audio_tensor = audios[0].get("tensor")
-        sample_rate = audios[0].get("sample_rate", 48000)
-        if audio_tensor is None:
-            return None, "Audio tensor is None"
-        # Convert to numpy
-        if isinstance(audio_tensor, torch.Tensor):
-            audio_np = audio_tensor.cpu().float().numpy()
-        else:
-            audio_np = np.array(audio_tensor, dtype=np.float32)
-        # Save to temp file
-        tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
-        # soundfile expects (samples, channels)
-        if audio_np.ndim == 2:
-            audio_np = audio_np.T  # (channels, samples) -> (samples, channels)
-        sf.write(tmp.name, audio_np, sample_rate)
-        status_msg = (
-            f"Generated in {elapsed:.1f}s | "
-            f"Duration: {duration}s | Steps: {inference_steps} | "
-            f"Seed: {seed_val}"
-        )
-        return tmp.name, status_msg
     except Exception as e:
-        import traceback
-        return None, f"Error: {e}\n{traceback.format_exc()}"
-    finally:
-        gc.collect()
-# ---------------------------------------------------------------------------
-# Train LoRA Tab
-# ---------------------------------------------------------------------------
-def train_lora(
-    audio_files,
-    lora_name,
-    epochs,
-    learning_rate,
-    lora_rank,
-    progress=gr.Progress(track_tqdm=True),
-):
-    """Train a LoRA adapter from uploaded audio files on CPU."""
-    if not audio_files:
-        return "No audio files uploaded."
-    handler, status = get_handler()
-    if handler is None:
-        return f"Model not ready: {status}"
-    lora_name = lora_name.strip() or "my_lora"
-    epochs = max(1, min(int(epochs), 10))
-    lr = float(learning_rate)
-    rank = max(1, min(int(lora_rank), 64))
-    output_dir = os.path.join(
-        os.path.dirname(os.path.abspath(__file__)), "lora_output", lora_name
-    )
-    os.makedirs(output_dir, exist_ok=True)
-    # Create a temp directory for audio files
-    audio_dir = os.path.join(output_dir, "audio_input")
-    os.makedirs(audio_dir, exist_ok=True)
-    # Copy uploaded files
-    for f in audio_files:
-        src = f.name if hasattr(f, "name") else str(f)
-        dst = os.path.join(audio_dir, os.path.basename(src))
-        shutil.copy2(src, dst)
-    log_lines = []
-    log_lines.append(f"LoRA Training: '{lora_name}'")
-    log_lines.append(f"Audio files: {len(audio_files)}")
-    log_lines.append(f"Epochs: {epochs}, LR: {lr}, Rank: {rank}")
-    log_lines.append(f"Output: {output_dir}")
-    log_lines.append("")
     try:
-        # Preprocessing step: encode audio files to tensors
-        log_lines.append("[Step 1/2] Preprocessing audio files...")
-        tensor_dir = os.path.join(output_dir, "preprocessed_tensors")
-        os.makedirs(tensor_dir, exist_ok=True)
-        from acestep.training_v2.preprocess import preprocess_audio_files
-        preprocess_result = preprocess_audio_files(
-            audio_dir=audio_dir,
-            output_dir=tensor_dir,
-            checkpoint_dir=CHECKPOINT_DIR,
-            variant="turbo",
-            max_duration=60.0,
-            device="cpu",
-            precision="float32",
-        )
-        processed = preprocess_result.get("processed", 0)
-        total = preprocess_result.get("total", 0)
-        failed = preprocess_result.get("failed", 0)
-        log_lines.append(f"  Preprocessed: {processed}/{total} (failed: {failed})")
-        if processed == 0:
-            log_lines.append("ERROR: No files were preprocessed successfully.")
-            return "\n".join(log_lines)
-        # Training step
-        log_lines.append("[Step 2/2] Training LoRA adapter...")
-        from acestep.training_v2.model_loader import load_decoder_for_training
-        from acestep.training_v2.trainer_fixed import FixedLoRATrainer
-        from acestep.training_v2.configs import TrainingConfigV2, LoRAConfigV2
-        # Load model for training (force float32 for CPU)
-        model = load_decoder_for_training(
-            checkpoint_dir=CHECKPOINT_DIR,
-            variant="turbo",
-            device="cpu",
-            precision="float32",
-        )
-        model = model.float()
-        adapter_cfg = LoRAConfigV2(
-            r=rank,
-            alpha=rank,
-            dropout=0.0,
-        )
-        train_cfg = TrainingConfigV2(
-            checkpoint_dir=CHECKPOINT_DIR,
-            model_variant="turbo",
-            dataset_dir=tensor_dir,
-            output_dir=output_dir,
-            max_epochs=epochs,
-            batch_size=1,
-            learning_rate=lr,
-            device="cpu",
-            precision="float32",
-            seed=42,
-            num_workers=0,
-            pin_memory=False,
-        )
-        trainer = FixedLoRATrainer(model, adapter_cfg, train_cfg)
-        step_count = 0
-        last_loss = 0.0
-        for update in trainer.train():
-            if hasattr(update, "step"):
-                step_count = update.step
-                last_loss = update.loss
-                if step_count % 5 == 0:
-                    log_lines.append(f"  Step {step_count}: loss={last_loss:.4f}")
-            elif isinstance(update, tuple) and len(update) >= 2:
-                step_count = update[0]
-                last_loss = update[1]
-                if step_count % 5 == 0:
-                    log_lines.append(f"  Step {step_count}: loss={last_loss:.4f}")
-        log_lines.append(f"Training complete! Final step: {step_count}, loss: {last_loss:.4f}")
-        log_lines.append(f"LoRA saved to: {output_dir}")
-        # Cleanup
-        del model, trainer
-        gc.collect()
     except Exception as e:
-        import traceback
-        log_lines.append(f"ERROR: {e}")
-        log_lines.append(traceback.format_exc())
-    return "\n".join(log_lines)
-# ---------------------------------------------------------------------------
-# Gradio UI
-# ---------------------------------------------------------------------------
-def build_ui():
-    theme = gr.themes.Default()
     try:
-        theme = gr.Theme.from_hub("NoCrypt/miku")
-    except Exception:
-        pass
-    with gr.Blocks(
-        theme=theme,
-        title="ACE-Step 1.5 CPU",
-        css="""
-        .main-title { text-align: center; margin-bottom: 0.5em; }
-        .status-box { font-family: monospace; font-size: 0.85em; }
-        """,
-    ) as demo:
-        gr.Markdown("**[ACE-Step 1.5 (CPU)](https://github.com/ace-step/ACE-Step-1.5)**")
-        with gr.Tabs():
-            # ---- Generate Tab ----
-            with gr.Tab("Generate Music"):
-                with gr.Row():
-                    with gr.Column(scale=2):
-                        caption_input = gr.Textbox(
-                            label="Music Description",
-                            placeholder="e.g. upbeat electronic dance music, 120 BPM",
-                            lines=2,
-                            value="upbeat electronic dance music, energetic synth leads, driving bassline",
-                        )
-                        lyrics_input = gr.Textbox(
-                            label="Lyrics ([Instrumental] for no vocals)",
-                            lines=2,
-                            value="[Instrumental]",
-                        )
-                    with gr.Column(scale=1):
-                        audio_output = gr.Audio(label="Output", type="filepath")
-                        gen_status = gr.Textbox(label="Status", interactive=False, lines=1, elem_classes="status-box")
-                with gr.Row():
-                    instrumental_cb = gr.Checkbox(label="Instrumental", value=True, scale=1)
-                    bpm_input = gr.Number(label="BPM", value=120, minimum=0, maximum=300, scale=1)
-                    duration_input = gr.Slider(label="Duration (s)", minimum=10, maximum=120, value=10, step=5, scale=1)
-                    steps_input = gr.Slider(label="Steps", minimum=1, maximum=32, value=8, step=1, scale=1)
-                with gr.Row():
-                    seed_input = gr.Number(label="Seed", value=-1, scale=1)
-                    lm_size_input = gr.Dropdown(label="LM Size", choices=["0.6B (fast)", "1.7B (balanced)", "4B (best quality)"], value="4B (best quality)", scale=1)
-                    lora_select = gr.Dropdown(label="LoRA", choices=get_trained_loras(), value="None (no LoRA)", scale=1)
-                generate_btn = gr.Button("Generate Music", variant="primary")
-                generate_btn.click(
-                    fn=generate_music,
-                    inputs=[caption_input, lyrics_input, instrumental_cb, bpm_input, duration_input, seed_input, steps_input, lm_size_input, lora_select],
-                    outputs=[audio_output, gen_status],
-                )
-            # ---- Train LoRA Tab ----
-            with gr.Tab("Train LoRA"):
-                gr.Markdown("Upload audio files to train a LoRA adapter. Training on CPU, keep epochs low.")
-                with gr.Row():
-                    audio_upload = gr.File(label="Audio Files", file_count="multiple", file_types=["audio"], scale=2)
-                    with gr.Column(scale=1):
-                        lora_name_input = gr.Textbox(label="LoRA Name", value="my_lora")
-                        train_model_info = gr.Textbox(label="Training Model", value="acestep-v15-turbo (DiT decoder)", interactive=False)
-                with gr.Row():
-                    epochs_input = gr.Slider(label="Epochs", minimum=1, maximum=10, value=1, step=1, scale=1)
-                    lr_input = gr.Number(label="LR", value=1e-4, scale=1)
-                    rank_input = gr.Slider(label="LoRA Rank", minimum=1, maximum=64, value=8, step=1, scale=1)
-                train_btn = gr.Button("Start Training", variant="primary")
-                train_log = gr.Textbox(label="Training Log", interactive=False, lines=10, elem_classes="status-box")
-                def train_and_refresh(*args):
-                    log = train_lora(*args)
-                    new_loras = get_trained_loras()
-                    return log, gr.update(choices=new_loras, value=new_loras[-1] if len(new_loras) > 1 else "None (no LoRA)")
-                train_btn.click(
-                    fn=train_and_refresh,
-                    inputs=[audio_upload, lora_name_input, epochs_input, lr_input, rank_input],
-                    outputs=[train_log, lora_select],
-                )
-    return demo
-if __name__ == "__main__":
-    demo = build_ui()
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True,
-        ssr_mode=False,
-    )

+"""ACE-Step 1.5 XL (CPU) - Gradio frontend for ace-server GGUF inference"""
 import os
 import time
 import tempfile
+import requests
 import gradio as gr
+ACE_SERVER = "http://127.0.0.1:8085"
+OUTPUT_DIR = "/app/outputs"
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+def _server_ok():
+    try:
+        return requests.get(f"{ACE_SERVER}/health", timeout=5).status_code == 200
+    except Exception:
+        return False
+def _poll_job(job_id, timeout=600):
     t0 = time.time()
+    while time.time() - t0 < timeout:
+        try:
+            r = requests.get(f"{ACE_SERVER}/job", params={"id": job_id}, timeout=10)
+            status = r.json().get("status", "unknown")
+            if status in ("done", "error"):
+                return status, time.time() - t0
+        except Exception:
+            pass
+        time.sleep(2)
+    return "timeout", time.time() - t0
+def generate_music(caption, lyrics, instrumental, bpm, duration, seed, steps, progress=gr.Progress(track_tqdm=True)):
+    t0 = time.time()
+    if not _server_ok():
+        return None, "ace-server not running"
+    req = {"caption": caption or "upbeat electronic dance music"}
+    if instrumental or not lyrics or lyrics.strip() == "":
+        req["lyrics"] = "[Instrumental]"
+    else:
+        req["lyrics"] = lyrics
     try:
+        if bpm and int(bpm) > 0: req["bpm"] = int(bpm)
+        if duration and float(duration) > 0: req["duration"] = min(float(duration), 300)
+        if seed is not None and int(seed) >= 0: req["seed"] = int(seed)
+        if steps and int(steps) > 0: req["inference_steps"] = int(steps)
+    except (ValueError, TypeError) as e:
+        return None, f"Bad param: {e}"
+    progress(0.05, desc="Submitting LM job...")
+    try:
+        r = requests.post(f"{ACE_SERVER}/lm", json=req, timeout=30)
+        if r.status_code != 200:
+            return None, f"LM failed: {r.status_code} {r.text}"
+        lm_job_id = r.json().get("id")
     except Exception as e:
+        return None, f"LM error: {e}"
+    progress(0.1, desc=f"LM generating (job {lm_job_id})...")
+    lm_status, lm_elapsed = _poll_job(lm_job_id, timeout=300)
+    if lm_status != "done":
+        return None, f"LM {lm_status} after {lm_elapsed:.0f}s"
     try:
+        r = requests.get(f"{ACE_SERVER}/job", params={"id": lm_job_id, "result": 1}, timeout=30)
+        lm_results = r.json()
+        if not isinstance(lm_results, list) or len(lm_results) == 0:
+            return None, f"LM no results: {lm_results}"
+        synth_request = lm_results[0]
     except Exception as e:
+        return None, f"LM result error: {e}"
+    progress(0.4, desc="Submitting synth job...")
+    synth_request["output_format"] = "wav16"
     try:
+        r = requests.post(f"{ACE_SERVER}/synth", json=synth_request, timeout=30)
+        if r.status_code != 200:
+            return None, f"Synth failed: {r.status_code} {r.text}"
+        synth_job_id = r.json().get("id")
+    except Exception as e:
+        return None, f"Synth error: {e}"
+    progress(0.5, desc=f"Synthesizing (job {synth_job_id})...")
+    synth_status, synth_elapsed = _poll_job(synth_job_id, timeout=600)
+    if synth_status != "done":
+        return None, f"Synth {synth_status} after {synth_elapsed:.0f}s"
+    progress(0.9, desc="Fetching audio...")
+    try:
+        r = requests.get(f"{ACE_SERVER}/job", params={"id": synth_job_id, "result": 1}, timeout=60)
+        if r.status_code != 200:
+            return None, f"Audio fetch failed: {r.status_code}"
+        tmp = tempfile.NamedTemporaryFile(suffix=".wav", dir=OUTPUT_DIR, delete=False)
+        tmp.write(r.content)
+        tmp.close()
+    except Exception as e:
+        return None, f"Audio error: {e}"
+    elapsed = time.time() - t0
+    return tmp.name, f"Done in {elapsed:.0f}s | {duration}s audio, {steps} steps"
+with gr.Blocks(title="ACE-Step 1.5 XL (CPU)") as demo:
+    gr.Markdown("**[ACE-Step 1.5 XL (CPU)](https://github.com/ace-step/ACE-Step-1.5)** GGUF Q4_K_M via [acestep.cpp](https://github.com/ServeurpersoCom/acestep.cpp)")
+    with gr.Row():
+        with gr.Column(scale=2):
+            caption = gr.Textbox(label="Music Description", lines=2, value="upbeat electronic dance music, energetic synth leads")
+            lyrics = gr.Textbox(label="Lyrics ([Instrumental] for no vocals)", lines=2, value="[Instrumental]")
+        with gr.Column(scale=1):
+            audio_out = gr.Audio(label="Output", type="filepath")
+            status = gr.Textbox(label="Status", interactive=False, lines=1)
+    with gr.Row():
+        instrumental = gr.Checkbox(label="Instrumental", value=True, scale=1)
+        bpm = gr.Number(label="BPM", value=120, minimum=0, maximum=300, scale=1)
+        duration = gr.Slider(label="Duration (s)", minimum=10, maximum=120, value=10, step=5, scale=1)
+        steps = gr.Slider(label="Steps", minimum=1, maximum=32, value=8, step=1, scale=1)
+        seed = gr.Number(label="Seed", value=-1, scale=1)
+    gen_btn = gr.Button("Generate Music", variant="primary")
+    gen_btn.click(fn=generate_music, inputs=[caption, lyrics, instrumental, bpm, duration, seed, steps], outputs=[audio_out, status])
+demo.launch(server_name="0.0.0.0", server_port=7860)