Spaces:
Running
Running
save only on cancel/finish, max epochs 1000, default 3
Browse files- app.py +1 -1
- train_engine.py +3 -3
app.py
CHANGED
|
@@ -713,7 +713,7 @@ def gradio_main():
|
|
| 713 |
with gr.Column(scale=1):
|
| 714 |
lora_name = gr.Textbox(label="LoRA Name", value="my-lora")
|
| 715 |
train_epochs = gr.Slider(
|
| 716 |
-
label="Epochs", minimum=1, maximum=
|
| 717 |
value=3, step=1,
|
| 718 |
)
|
| 719 |
train_lr = gr.Number(label="Learning Rate", value=1e-4)
|
|
|
|
| 713 |
with gr.Column(scale=1):
|
| 714 |
lora_name = gr.Textbox(label="LoRA Name", value="my-lora")
|
| 715 |
train_epochs = gr.Slider(
|
| 716 |
+
label="Epochs", minimum=1, maximum=1000,
|
| 717 |
value=3, step=1,
|
| 718 |
)
|
| 719 |
train_lr = gr.Number(label="Learning Rate", value=1e-4)
|
train_engine.py
CHANGED
|
@@ -2185,7 +2185,7 @@ def train_lora_generator(
|
|
| 2185 |
warmup_steps: int = 100,
|
| 2186 |
weight_decay: float = 0.01,
|
| 2187 |
max_grad_norm: float = 1.0,
|
| 2188 |
-
save_every_n_epochs: int =
|
| 2189 |
seed: int = 42,
|
| 2190 |
variant: str = "base",
|
| 2191 |
device: str = "cpu",
|
|
@@ -2489,8 +2489,8 @@ def train_lora_generator(
|
|
| 2489 |
model.decoder.train()
|
| 2490 |
yield f"[OK] Best model saved (epoch {epoch + 1}, loss: {best_loss:.4f})"
|
| 2491 |
|
| 2492 |
-
# Periodic checkpoint
|
| 2493 |
-
if (epoch + 1) % save_every_n_epochs == 0:
|
| 2494 |
ckpt_path = str(out_path / "checkpoints" / f"epoch_{epoch + 1}")
|
| 2495 |
model.decoder.eval()
|
| 2496 |
save_lora_adapter(model, ckpt_path)
|
|
|
|
| 2185 |
warmup_steps: int = 100,
|
| 2186 |
weight_decay: float = 0.01,
|
| 2187 |
max_grad_norm: float = 1.0,
|
| 2188 |
+
save_every_n_epochs: int = 0,
|
| 2189 |
seed: int = 42,
|
| 2190 |
variant: str = "base",
|
| 2191 |
device: str = "cpu",
|
|
|
|
| 2489 |
model.decoder.train()
|
| 2490 |
yield f"[OK] Best model saved (epoch {epoch + 1}, loss: {best_loss:.4f})"
|
| 2491 |
|
| 2492 |
+
# Periodic checkpoint (0 = disabled, only save on cancel/finish)
|
| 2493 |
+
if save_every_n_epochs > 0 and (epoch + 1) % save_every_n_epochs == 0:
|
| 2494 |
ckpt_path = str(out_path / "checkpoints" / f"epoch_{epoch + 1}")
|
| 2495 |
model.decoder.eval()
|
| 2496 |
save_lora_adapter(model, ckpt_path)
|