Spaces:

WeReCooking
/

ACE-Step-CPU

Running

Nekochu commited on 22 days ago

Commit

d42aa91

1 Parent(s): b38d0b1

save only on cancel/finish, max epochs 1000, default 3

Files changed (2) hide show

app.py CHANGED Viewed

@@ -713,7 +713,7 @@ def gradio_main():
                     with gr.Column(scale=1):
                         lora_name = gr.Textbox(label="LoRA Name", value="my-lora")
                         train_epochs = gr.Slider(
-                            label="Epochs", minimum=1, maximum=10,
                             value=3, step=1,
                         )
                         train_lr = gr.Number(label="Learning Rate", value=1e-4)

                     with gr.Column(scale=1):
                         lora_name = gr.Textbox(label="LoRA Name", value="my-lora")
                         train_epochs = gr.Slider(
+                            label="Epochs", minimum=1, maximum=1000,
                             value=3, step=1,
                         )
                         train_lr = gr.Number(label="Learning Rate", value=1e-4)

train_engine.py CHANGED Viewed

@@ -2185,7 +2185,7 @@ def train_lora_generator(
     warmup_steps: int = 100,
     weight_decay: float = 0.01,
     max_grad_norm: float = 1.0,
-    save_every_n_epochs: int = 50,
     seed: int = 42,
     variant: str = "base",
     device: str = "cpu",
@@ -2489,8 +2489,8 @@ def train_lora_generator(
             model.decoder.train()
             yield f"[OK] Best model saved (epoch {epoch + 1}, loss: {best_loss:.4f})"
-        # Periodic checkpoint
-        if (epoch + 1) % save_every_n_epochs == 0:
             ckpt_path = str(out_path / "checkpoints" / f"epoch_{epoch + 1}")
             model.decoder.eval()
             save_lora_adapter(model, ckpt_path)

     warmup_steps: int = 100,
     weight_decay: float = 0.01,
     max_grad_norm: float = 1.0,
+    save_every_n_epochs: int = 0,
     seed: int = 42,
     variant: str = "base",
     device: str = "cpu",
             model.decoder.train()
             yield f"[OK] Best model saved (epoch {epoch + 1}, loss: {best_loss:.4f})"
+        # Periodic checkpoint (0 = disabled, only save on cancel/finish)
+        if save_every_n_epochs > 0 and (epoch + 1) % save_every_n_epochs == 0:
             ckpt_path = str(out_path / "checkpoints" / f"epoch_{epoch + 1}")
             model.decoder.eval()
             save_lora_adapter(model, ckpt_path)