Nekochu commited on
Commit
d42aa91
·
1 Parent(s): b38d0b1

save only on cancel/finish, max epochs 1000, default 3

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. train_engine.py +3 -3
app.py CHANGED
@@ -713,7 +713,7 @@ def gradio_main():
713
  with gr.Column(scale=1):
714
  lora_name = gr.Textbox(label="LoRA Name", value="my-lora")
715
  train_epochs = gr.Slider(
716
- label="Epochs", minimum=1, maximum=10,
717
  value=3, step=1,
718
  )
719
  train_lr = gr.Number(label="Learning Rate", value=1e-4)
 
713
  with gr.Column(scale=1):
714
  lora_name = gr.Textbox(label="LoRA Name", value="my-lora")
715
  train_epochs = gr.Slider(
716
+ label="Epochs", minimum=1, maximum=1000,
717
  value=3, step=1,
718
  )
719
  train_lr = gr.Number(label="Learning Rate", value=1e-4)
train_engine.py CHANGED
@@ -2185,7 +2185,7 @@ def train_lora_generator(
2185
  warmup_steps: int = 100,
2186
  weight_decay: float = 0.01,
2187
  max_grad_norm: float = 1.0,
2188
- save_every_n_epochs: int = 50,
2189
  seed: int = 42,
2190
  variant: str = "base",
2191
  device: str = "cpu",
@@ -2489,8 +2489,8 @@ def train_lora_generator(
2489
  model.decoder.train()
2490
  yield f"[OK] Best model saved (epoch {epoch + 1}, loss: {best_loss:.4f})"
2491
 
2492
- # Periodic checkpoint
2493
- if (epoch + 1) % save_every_n_epochs == 0:
2494
  ckpt_path = str(out_path / "checkpoints" / f"epoch_{epoch + 1}")
2495
  model.decoder.eval()
2496
  save_lora_adapter(model, ckpt_path)
 
2185
  warmup_steps: int = 100,
2186
  weight_decay: float = 0.01,
2187
  max_grad_norm: float = 1.0,
2188
+ save_every_n_epochs: int = 0,
2189
  seed: int = 42,
2190
  variant: str = "base",
2191
  device: str = "cpu",
 
2489
  model.decoder.train()
2490
  yield f"[OK] Best model saved (epoch {epoch + 1}, loss: {best_loss:.4f})"
2491
 
2492
+ # Periodic checkpoint (0 = disabled, only save on cancel/finish)
2493
+ if save_every_n_epochs > 0 and (epoch + 1) % save_every_n_epochs == 0:
2494
  ckpt_path = str(out_path / "checkpoints" / f"epoch_{epoch + 1}")
2495
  model.decoder.eval()
2496
  save_lora_adapter(model, ckpt_path)