Text Generation
Transformers
Safetensors
qwen3
llama-factory
full
Generated from Trainer
conversational
text-generation-inference
Instructions to use DCAgent2/stack-swesmithseq with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use DCAgent2/stack-swesmithseq with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="DCAgent2/stack-swesmithseq") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("DCAgent2/stack-swesmithseq") model = AutoModelForCausalLM.from_pretrained("DCAgent2/stack-swesmithseq") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use DCAgent2/stack-swesmithseq with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "DCAgent2/stack-swesmithseq" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DCAgent2/stack-swesmithseq", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/DCAgent2/stack-swesmithseq
- SGLang
How to use DCAgent2/stack-swesmithseq with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "DCAgent2/stack-swesmithseq" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DCAgent2/stack-swesmithseq", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "DCAgent2/stack-swesmithseq" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DCAgent2/stack-swesmithseq", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use DCAgent2/stack-swesmithseq with Docker Model Runner:
docker model run hf.co/DCAgent2/stack-swesmithseq
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.0, | |
| "eval_steps": 500, | |
| "global_step": 6712, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005216484089723527, | |
| "grad_norm": 11.898422283807292, | |
| "learning_rate": 2.3809523809523811e-07, | |
| "loss": 0.7267, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40416043996810913, | |
| "step": 5, | |
| "valid_targets_mean": 5540.2, | |
| "valid_targets_min": 746 | |
| }, | |
| { | |
| "epoch": 0.010432968179447054, | |
| "grad_norm": 11.704966866608393, | |
| "learning_rate": 5.357142857142857e-07, | |
| "loss": 0.6894, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35301291942596436, | |
| "step": 10, | |
| "valid_targets_mean": 6751.5, | |
| "valid_targets_min": 2859 | |
| }, | |
| { | |
| "epoch": 0.01564945226917058, | |
| "grad_norm": 9.185439353912965, | |
| "learning_rate": 8.333333333333333e-07, | |
| "loss": 0.6827, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3006567060947418, | |
| "step": 15, | |
| "valid_targets_mean": 6099.8, | |
| "valid_targets_min": 1822 | |
| }, | |
| { | |
| "epoch": 0.020865936358894107, | |
| "grad_norm": 8.332725212643549, | |
| "learning_rate": 1.130952380952381e-06, | |
| "loss": 0.6636, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37705448269844055, | |
| "step": 20, | |
| "valid_targets_mean": 6526.8, | |
| "valid_targets_min": 2373 | |
| }, | |
| { | |
| "epoch": 0.02608242044861763, | |
| "grad_norm": 6.563048341767583, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 0.6824, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33541935682296753, | |
| "step": 25, | |
| "valid_targets_mean": 4286.5, | |
| "valid_targets_min": 2745 | |
| }, | |
| { | |
| "epoch": 0.03129890453834116, | |
| "grad_norm": 5.405160222705993, | |
| "learning_rate": 1.7261904761904764e-06, | |
| "loss": 0.6442, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3321383595466614, | |
| "step": 30, | |
| "valid_targets_mean": 4962.8, | |
| "valid_targets_min": 1089 | |
| }, | |
| { | |
| "epoch": 0.036515388628064686, | |
| "grad_norm": 4.3523814137339425, | |
| "learning_rate": 2.023809523809524e-06, | |
| "loss": 0.5875, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.336911678314209, | |
| "step": 35, | |
| "valid_targets_mean": 4189.5, | |
| "valid_targets_min": 627 | |
| }, | |
| { | |
| "epoch": 0.041731872717788214, | |
| "grad_norm": 3.5508420205295743, | |
| "learning_rate": 2.321428571428572e-06, | |
| "loss": 0.5944, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29606109857559204, | |
| "step": 40, | |
| "valid_targets_mean": 4491.2, | |
| "valid_targets_min": 2605 | |
| }, | |
| { | |
| "epoch": 0.046948356807511735, | |
| "grad_norm": 2.8414830909821163, | |
| "learning_rate": 2.6190476190476192e-06, | |
| "loss": 0.5768, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35838156938552856, | |
| "step": 45, | |
| "valid_targets_mean": 5947.6, | |
| "valid_targets_min": 1899 | |
| }, | |
| { | |
| "epoch": 0.05216484089723526, | |
| "grad_norm": 2.126711098686779, | |
| "learning_rate": 2.916666666666667e-06, | |
| "loss": 0.546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.259787380695343, | |
| "step": 50, | |
| "valid_targets_mean": 4468.6, | |
| "valid_targets_min": 2916 | |
| }, | |
| { | |
| "epoch": 0.05738132498695879, | |
| "grad_norm": 1.5542165115621722, | |
| "learning_rate": 3.2142857142857147e-06, | |
| "loss": 0.5296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26236891746520996, | |
| "step": 55, | |
| "valid_targets_mean": 5149.9, | |
| "valid_targets_min": 1939 | |
| }, | |
| { | |
| "epoch": 0.06259780907668232, | |
| "grad_norm": 1.397670161856647, | |
| "learning_rate": 3.511904761904762e-06, | |
| "loss": 0.4957, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2760200798511505, | |
| "step": 60, | |
| "valid_targets_mean": 4151.2, | |
| "valid_targets_min": 1461 | |
| }, | |
| { | |
| "epoch": 0.06781429316640585, | |
| "grad_norm": 0.8667986523783261, | |
| "learning_rate": 3.80952380952381e-06, | |
| "loss": 0.4921, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23918141424655914, | |
| "step": 65, | |
| "valid_targets_mean": 5063.0, | |
| "valid_targets_min": 1366 | |
| }, | |
| { | |
| "epoch": 0.07303077725612937, | |
| "grad_norm": 0.8894640010893528, | |
| "learning_rate": 4.107142857142857e-06, | |
| "loss": 0.4949, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20835909247398376, | |
| "step": 70, | |
| "valid_targets_mean": 3026.4, | |
| "valid_targets_min": 2050 | |
| }, | |
| { | |
| "epoch": 0.0782472613458529, | |
| "grad_norm": 0.9164134477165798, | |
| "learning_rate": 4.404761904761905e-06, | |
| "loss": 0.5065, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2392653524875641, | |
| "step": 75, | |
| "valid_targets_mean": 2961.8, | |
| "valid_targets_min": 746 | |
| }, | |
| { | |
| "epoch": 0.08346374543557643, | |
| "grad_norm": 0.7510575591954725, | |
| "learning_rate": 4.702380952380953e-06, | |
| "loss": 0.4608, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23678851127624512, | |
| "step": 80, | |
| "valid_targets_mean": 4270.5, | |
| "valid_targets_min": 1710 | |
| }, | |
| { | |
| "epoch": 0.08868022952529994, | |
| "grad_norm": 0.7048718077509903, | |
| "learning_rate": 5e-06, | |
| "loss": 0.482, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26397812366485596, | |
| "step": 85, | |
| "valid_targets_mean": 4471.6, | |
| "valid_targets_min": 2006 | |
| }, | |
| { | |
| "epoch": 0.09389671361502347, | |
| "grad_norm": 0.81324060638606, | |
| "learning_rate": 5.297619047619048e-06, | |
| "loss": 0.4894, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2475789487361908, | |
| "step": 90, | |
| "valid_targets_mean": 3130.4, | |
| "valid_targets_min": 2227 | |
| }, | |
| { | |
| "epoch": 0.099113197704747, | |
| "grad_norm": 0.6876518254917292, | |
| "learning_rate": 5.595238095238096e-06, | |
| "loss": 0.4771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2238778918981552, | |
| "step": 95, | |
| "valid_targets_mean": 3935.1, | |
| "valid_targets_min": 1323 | |
| }, | |
| { | |
| "epoch": 0.10432968179447052, | |
| "grad_norm": 0.6892407210754692, | |
| "learning_rate": 5.892857142857144e-06, | |
| "loss": 0.4517, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24841147661209106, | |
| "step": 100, | |
| "valid_targets_mean": 4362.8, | |
| "valid_targets_min": 2809 | |
| }, | |
| { | |
| "epoch": 0.10954616588419405, | |
| "grad_norm": 0.632530804997214, | |
| "learning_rate": 6.1904761904761914e-06, | |
| "loss": 0.4494, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17703276872634888, | |
| "step": 105, | |
| "valid_targets_mean": 3333.0, | |
| "valid_targets_min": 1365 | |
| }, | |
| { | |
| "epoch": 0.11476264997391758, | |
| "grad_norm": 0.6418128902339513, | |
| "learning_rate": 6.488095238095239e-06, | |
| "loss": 0.4327, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23275437951087952, | |
| "step": 110, | |
| "valid_targets_mean": 4949.2, | |
| "valid_targets_min": 2567 | |
| }, | |
| { | |
| "epoch": 0.11997913406364111, | |
| "grad_norm": 0.826604683066573, | |
| "learning_rate": 6.785714285714287e-06, | |
| "loss": 0.4118, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19350579380989075, | |
| "step": 115, | |
| "valid_targets_mean": 2560.8, | |
| "valid_targets_min": 1505 | |
| }, | |
| { | |
| "epoch": 0.12519561815336464, | |
| "grad_norm": 0.8019191606073482, | |
| "learning_rate": 7.083333333333335e-06, | |
| "loss": 0.4457, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20745006203651428, | |
| "step": 120, | |
| "valid_targets_mean": 3180.1, | |
| "valid_targets_min": 1978 | |
| }, | |
| { | |
| "epoch": 0.13041210224308816, | |
| "grad_norm": 0.6268190796009507, | |
| "learning_rate": 7.380952380952382e-06, | |
| "loss": 0.4241, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21665164828300476, | |
| "step": 125, | |
| "valid_targets_mean": 4338.9, | |
| "valid_targets_min": 699 | |
| }, | |
| { | |
| "epoch": 0.1356285863328117, | |
| "grad_norm": 0.663816093194422, | |
| "learning_rate": 7.67857142857143e-06, | |
| "loss": 0.4205, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20638859272003174, | |
| "step": 130, | |
| "valid_targets_mean": 3750.2, | |
| "valid_targets_min": 648 | |
| }, | |
| { | |
| "epoch": 0.14084507042253522, | |
| "grad_norm": 0.6505948442592261, | |
| "learning_rate": 7.976190476190477e-06, | |
| "loss": 0.4231, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21000896394252777, | |
| "step": 135, | |
| "valid_targets_mean": 4000.8, | |
| "valid_targets_min": 1786 | |
| }, | |
| { | |
| "epoch": 0.14606155451225875, | |
| "grad_norm": 0.7346940854717736, | |
| "learning_rate": 8.273809523809523e-06, | |
| "loss": 0.4058, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21634706854820251, | |
| "step": 140, | |
| "valid_targets_mean": 3459.1, | |
| "valid_targets_min": 1830 | |
| }, | |
| { | |
| "epoch": 0.15127803860198227, | |
| "grad_norm": 0.6743816271261498, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 0.4274, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19411733746528625, | |
| "step": 145, | |
| "valid_targets_mean": 3505.4, | |
| "valid_targets_min": 1825 | |
| }, | |
| { | |
| "epoch": 0.1564945226917058, | |
| "grad_norm": 0.7111122099798013, | |
| "learning_rate": 8.869047619047619e-06, | |
| "loss": 0.4052, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20607689023017883, | |
| "step": 150, | |
| "valid_targets_mean": 3360.8, | |
| "valid_targets_min": 1208 | |
| }, | |
| { | |
| "epoch": 0.16171100678142933, | |
| "grad_norm": 0.6484072558504601, | |
| "learning_rate": 9.166666666666666e-06, | |
| "loss": 0.4069, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16632631421089172, | |
| "step": 155, | |
| "valid_targets_mean": 3075.5, | |
| "valid_targets_min": 1501 | |
| }, | |
| { | |
| "epoch": 0.16692749087115286, | |
| "grad_norm": 0.8858011589833842, | |
| "learning_rate": 9.464285714285714e-06, | |
| "loss": 0.4173, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25384393334388733, | |
| "step": 160, | |
| "valid_targets_mean": 2868.9, | |
| "valid_targets_min": 1504 | |
| }, | |
| { | |
| "epoch": 0.17214397496087636, | |
| "grad_norm": 0.6408798192050236, | |
| "learning_rate": 9.761904761904762e-06, | |
| "loss": 0.4043, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23975665867328644, | |
| "step": 165, | |
| "valid_targets_mean": 4565.0, | |
| "valid_targets_min": 1941 | |
| }, | |
| { | |
| "epoch": 0.17736045905059988, | |
| "grad_norm": 0.7394867963407229, | |
| "learning_rate": 1.005952380952381e-05, | |
| "loss": 0.4, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18954622745513916, | |
| "step": 170, | |
| "valid_targets_mean": 3095.5, | |
| "valid_targets_min": 1281 | |
| }, | |
| { | |
| "epoch": 0.1825769431403234, | |
| "grad_norm": 0.6791013934973004, | |
| "learning_rate": 1.0357142857142859e-05, | |
| "loss": 0.4128, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19606086611747742, | |
| "step": 175, | |
| "valid_targets_mean": 3554.9, | |
| "valid_targets_min": 1697 | |
| }, | |
| { | |
| "epoch": 0.18779342723004694, | |
| "grad_norm": 0.6413569412853038, | |
| "learning_rate": 1.0654761904761905e-05, | |
| "loss": 0.4076, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17540468275547028, | |
| "step": 180, | |
| "valid_targets_mean": 3937.0, | |
| "valid_targets_min": 2306 | |
| }, | |
| { | |
| "epoch": 0.19300991131977047, | |
| "grad_norm": 0.6206530445898384, | |
| "learning_rate": 1.0952380952380955e-05, | |
| "loss": 0.3943, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22631549835205078, | |
| "step": 185, | |
| "valid_targets_mean": 4594.0, | |
| "valid_targets_min": 2534 | |
| }, | |
| { | |
| "epoch": 0.198226395409494, | |
| "grad_norm": 0.6814845466500326, | |
| "learning_rate": 1.125e-05, | |
| "loss": 0.3969, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16777460277080536, | |
| "step": 190, | |
| "valid_targets_mean": 2910.1, | |
| "valid_targets_min": 1184 | |
| }, | |
| { | |
| "epoch": 0.20344287949921752, | |
| "grad_norm": 0.7389186017915087, | |
| "learning_rate": 1.1547619047619047e-05, | |
| "loss": 0.397, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1736895740032196, | |
| "step": 195, | |
| "valid_targets_mean": 2873.5, | |
| "valid_targets_min": 1401 | |
| }, | |
| { | |
| "epoch": 0.20865936358894105, | |
| "grad_norm": 0.6630606599482963, | |
| "learning_rate": 1.1845238095238096e-05, | |
| "loss": 0.3987, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20959530770778656, | |
| "step": 200, | |
| "valid_targets_mean": 3721.0, | |
| "valid_targets_min": 2109 | |
| }, | |
| { | |
| "epoch": 0.21387584767866458, | |
| "grad_norm": 0.7500882597211516, | |
| "learning_rate": 1.2142857142857142e-05, | |
| "loss": 0.3933, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.171321302652359, | |
| "step": 205, | |
| "valid_targets_mean": 2451.2, | |
| "valid_targets_min": 1855 | |
| }, | |
| { | |
| "epoch": 0.2190923317683881, | |
| "grad_norm": 0.672965375087765, | |
| "learning_rate": 1.2440476190476192e-05, | |
| "loss": 0.3949, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22043120861053467, | |
| "step": 210, | |
| "valid_targets_mean": 4160.6, | |
| "valid_targets_min": 1580 | |
| }, | |
| { | |
| "epoch": 0.22430881585811163, | |
| "grad_norm": 0.7475568319343691, | |
| "learning_rate": 1.2738095238095238e-05, | |
| "loss": 0.3858, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18907323479652405, | |
| "step": 215, | |
| "valid_targets_mean": 3270.6, | |
| "valid_targets_min": 1577 | |
| }, | |
| { | |
| "epoch": 0.22952529994783516, | |
| "grad_norm": 0.6399346905741724, | |
| "learning_rate": 1.3035714285714287e-05, | |
| "loss": 0.3773, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15216752886772156, | |
| "step": 220, | |
| "valid_targets_mean": 3585.1, | |
| "valid_targets_min": 1856 | |
| }, | |
| { | |
| "epoch": 0.2347417840375587, | |
| "grad_norm": 0.7356751263385859, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.3816, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23159337043762207, | |
| "step": 225, | |
| "valid_targets_mean": 4419.6, | |
| "valid_targets_min": 2508 | |
| }, | |
| { | |
| "epoch": 0.23995826812728221, | |
| "grad_norm": 0.6158622657830694, | |
| "learning_rate": 1.3630952380952383e-05, | |
| "loss": 0.393, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2115560621023178, | |
| "step": 230, | |
| "valid_targets_mean": 4644.9, | |
| "valid_targets_min": 2964 | |
| }, | |
| { | |
| "epoch": 0.24517475221700574, | |
| "grad_norm": 0.622295599038362, | |
| "learning_rate": 1.3928571428571429e-05, | |
| "loss": 0.3771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15604719519615173, | |
| "step": 235, | |
| "valid_targets_mean": 3736.2, | |
| "valid_targets_min": 1817 | |
| }, | |
| { | |
| "epoch": 0.25039123630672927, | |
| "grad_norm": 0.6495002855928439, | |
| "learning_rate": 1.4226190476190478e-05, | |
| "loss": 0.4014, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18867361545562744, | |
| "step": 240, | |
| "valid_targets_mean": 3567.5, | |
| "valid_targets_min": 1913 | |
| }, | |
| { | |
| "epoch": 0.2556077203964528, | |
| "grad_norm": 0.7125088187599722, | |
| "learning_rate": 1.4523809523809524e-05, | |
| "loss": 0.3826, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18984279036521912, | |
| "step": 245, | |
| "valid_targets_mean": 4227.0, | |
| "valid_targets_min": 1757 | |
| }, | |
| { | |
| "epoch": 0.2608242044861763, | |
| "grad_norm": 0.9313614528740436, | |
| "learning_rate": 1.4821428571428574e-05, | |
| "loss": 0.3934, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20097716152668, | |
| "step": 250, | |
| "valid_targets_mean": 3979.1, | |
| "valid_targets_min": 2424 | |
| }, | |
| { | |
| "epoch": 0.26604068857589985, | |
| "grad_norm": 0.7601028631475797, | |
| "learning_rate": 1.511904761904762e-05, | |
| "loss": 0.3711, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20801891386508942, | |
| "step": 255, | |
| "valid_targets_mean": 3148.6, | |
| "valid_targets_min": 1955 | |
| }, | |
| { | |
| "epoch": 0.2712571726656234, | |
| "grad_norm": 0.6910802406105854, | |
| "learning_rate": 1.5416666666666668e-05, | |
| "loss": 0.3842, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17955368757247925, | |
| "step": 260, | |
| "valid_targets_mean": 3444.0, | |
| "valid_targets_min": 1221 | |
| }, | |
| { | |
| "epoch": 0.2764736567553469, | |
| "grad_norm": 0.6114071535415853, | |
| "learning_rate": 1.5714285714285715e-05, | |
| "loss": 0.3681, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15971292555332184, | |
| "step": 265, | |
| "valid_targets_mean": 4665.4, | |
| "valid_targets_min": 2506 | |
| }, | |
| { | |
| "epoch": 0.28169014084507044, | |
| "grad_norm": 0.701814397552766, | |
| "learning_rate": 1.6011904761904763e-05, | |
| "loss": 0.358, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16428491473197937, | |
| "step": 270, | |
| "valid_targets_mean": 4472.1, | |
| "valid_targets_min": 2579 | |
| }, | |
| { | |
| "epoch": 0.28690662493479396, | |
| "grad_norm": 0.6965026840724696, | |
| "learning_rate": 1.630952380952381e-05, | |
| "loss": 0.3634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22727182507514954, | |
| "step": 275, | |
| "valid_targets_mean": 3854.4, | |
| "valid_targets_min": 1679 | |
| }, | |
| { | |
| "epoch": 0.2921231090245175, | |
| "grad_norm": 0.6505168837062787, | |
| "learning_rate": 1.660714285714286e-05, | |
| "loss": 0.3805, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1859174519777298, | |
| "step": 280, | |
| "valid_targets_mean": 3779.6, | |
| "valid_targets_min": 1059 | |
| }, | |
| { | |
| "epoch": 0.297339593114241, | |
| "grad_norm": 0.6122909552839506, | |
| "learning_rate": 1.6904761904761906e-05, | |
| "loss": 0.371, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1606474667787552, | |
| "step": 285, | |
| "valid_targets_mean": 3221.6, | |
| "valid_targets_min": 964 | |
| }, | |
| { | |
| "epoch": 0.30255607720396455, | |
| "grad_norm": 0.6040954080915727, | |
| "learning_rate": 1.7202380952380954e-05, | |
| "loss": 0.3741, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1537097692489624, | |
| "step": 290, | |
| "valid_targets_mean": 4071.9, | |
| "valid_targets_min": 1105 | |
| }, | |
| { | |
| "epoch": 0.3077725612936881, | |
| "grad_norm": 0.617600097048369, | |
| "learning_rate": 1.7500000000000002e-05, | |
| "loss": 0.3697, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18365611135959625, | |
| "step": 295, | |
| "valid_targets_mean": 4799.2, | |
| "valid_targets_min": 1460 | |
| }, | |
| { | |
| "epoch": 0.3129890453834116, | |
| "grad_norm": 0.6069644348487128, | |
| "learning_rate": 1.779761904761905e-05, | |
| "loss": 0.3924, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1689816415309906, | |
| "step": 300, | |
| "valid_targets_mean": 3648.5, | |
| "valid_targets_min": 1172 | |
| }, | |
| { | |
| "epoch": 0.31820552947313513, | |
| "grad_norm": 0.6174253762722446, | |
| "learning_rate": 1.8095238095238097e-05, | |
| "loss": 0.3698, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20255890488624573, | |
| "step": 305, | |
| "valid_targets_mean": 5118.2, | |
| "valid_targets_min": 3004 | |
| }, | |
| { | |
| "epoch": 0.32342201356285866, | |
| "grad_norm": 0.5955995040214926, | |
| "learning_rate": 1.8392857142857142e-05, | |
| "loss": 0.3674, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16458579897880554, | |
| "step": 310, | |
| "valid_targets_mean": 4048.8, | |
| "valid_targets_min": 1535 | |
| }, | |
| { | |
| "epoch": 0.3286384976525822, | |
| "grad_norm": 0.6452460673061781, | |
| "learning_rate": 1.8690476190476193e-05, | |
| "loss": 0.3605, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1792895495891571, | |
| "step": 315, | |
| "valid_targets_mean": 4052.0, | |
| "valid_targets_min": 2075 | |
| }, | |
| { | |
| "epoch": 0.3338549817423057, | |
| "grad_norm": 0.7156274489563256, | |
| "learning_rate": 1.8988095238095237e-05, | |
| "loss": 0.3589, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16527575254440308, | |
| "step": 320, | |
| "valid_targets_mean": 3067.6, | |
| "valid_targets_min": 1629 | |
| }, | |
| { | |
| "epoch": 0.33907146583202924, | |
| "grad_norm": 0.6469296499474231, | |
| "learning_rate": 1.928571428571429e-05, | |
| "loss": 0.3583, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19072631001472473, | |
| "step": 325, | |
| "valid_targets_mean": 4654.1, | |
| "valid_targets_min": 3131 | |
| }, | |
| { | |
| "epoch": 0.3442879499217527, | |
| "grad_norm": 0.7337492204477304, | |
| "learning_rate": 1.9583333333333333e-05, | |
| "loss": 0.3501, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1527278572320938, | |
| "step": 330, | |
| "valid_targets_mean": 3172.0, | |
| "valid_targets_min": 1079 | |
| }, | |
| { | |
| "epoch": 0.34950443401147624, | |
| "grad_norm": 0.6665597791976955, | |
| "learning_rate": 1.9880952380952384e-05, | |
| "loss": 0.3471, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17258715629577637, | |
| "step": 335, | |
| "valid_targets_mean": 4661.1, | |
| "valid_targets_min": 2667 | |
| }, | |
| { | |
| "epoch": 0.35472091810119977, | |
| "grad_norm": 0.6269719721704452, | |
| "learning_rate": 2.0178571428571428e-05, | |
| "loss": 0.3603, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16327202320098877, | |
| "step": 340, | |
| "valid_targets_mean": 3484.2, | |
| "valid_targets_min": 1981 | |
| }, | |
| { | |
| "epoch": 0.3599374021909233, | |
| "grad_norm": 0.728260094186098, | |
| "learning_rate": 2.0476190476190476e-05, | |
| "loss": 0.3827, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1827574372291565, | |
| "step": 345, | |
| "valid_targets_mean": 3380.0, | |
| "valid_targets_min": 1774 | |
| }, | |
| { | |
| "epoch": 0.3651538862806468, | |
| "grad_norm": 0.6691140426996306, | |
| "learning_rate": 2.0773809523809527e-05, | |
| "loss": 0.3597, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16854047775268555, | |
| "step": 350, | |
| "valid_targets_mean": 3107.4, | |
| "valid_targets_min": 1691 | |
| }, | |
| { | |
| "epoch": 0.37037037037037035, | |
| "grad_norm": 0.6426398775655767, | |
| "learning_rate": 2.1071428571428575e-05, | |
| "loss": 0.3736, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19410286843776703, | |
| "step": 355, | |
| "valid_targets_mean": 4045.0, | |
| "valid_targets_min": 2318 | |
| }, | |
| { | |
| "epoch": 0.3755868544600939, | |
| "grad_norm": 0.5781340046876212, | |
| "learning_rate": 2.136904761904762e-05, | |
| "loss": 0.3526, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14779919385910034, | |
| "step": 360, | |
| "valid_targets_mean": 3686.9, | |
| "valid_targets_min": 1509 | |
| }, | |
| { | |
| "epoch": 0.3808033385498174, | |
| "grad_norm": 0.6919764983411971, | |
| "learning_rate": 2.1666666666666667e-05, | |
| "loss": 0.3531, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16663911938667297, | |
| "step": 365, | |
| "valid_targets_mean": 3588.1, | |
| "valid_targets_min": 1601 | |
| }, | |
| { | |
| "epoch": 0.38601982263954093, | |
| "grad_norm": 0.6573870309602741, | |
| "learning_rate": 2.1964285714285718e-05, | |
| "loss": 0.3467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1888006627559662, | |
| "step": 370, | |
| "valid_targets_mean": 4142.2, | |
| "valid_targets_min": 1823 | |
| }, | |
| { | |
| "epoch": 0.39123630672926446, | |
| "grad_norm": 0.6738936863642264, | |
| "learning_rate": 2.2261904761904766e-05, | |
| "loss": 0.3519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15684418380260468, | |
| "step": 375, | |
| "valid_targets_mean": 2910.9, | |
| "valid_targets_min": 1445 | |
| }, | |
| { | |
| "epoch": 0.396452790818988, | |
| "grad_norm": 0.8667647943093282, | |
| "learning_rate": 2.255952380952381e-05, | |
| "loss": 0.3546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19549183547496796, | |
| "step": 380, | |
| "valid_targets_mean": 3551.8, | |
| "valid_targets_min": 1722 | |
| }, | |
| { | |
| "epoch": 0.4016692749087115, | |
| "grad_norm": 0.7528317760076235, | |
| "learning_rate": 2.2857142857142858e-05, | |
| "loss": 0.3545, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18375492095947266, | |
| "step": 385, | |
| "valid_targets_mean": 4151.2, | |
| "valid_targets_min": 2121 | |
| }, | |
| { | |
| "epoch": 0.40688575899843504, | |
| "grad_norm": 0.6396976996059265, | |
| "learning_rate": 2.315476190476191e-05, | |
| "loss": 0.3498, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17908553779125214, | |
| "step": 390, | |
| "valid_targets_mean": 3703.8, | |
| "valid_targets_min": 1202 | |
| }, | |
| { | |
| "epoch": 0.41210224308815857, | |
| "grad_norm": 0.6671547374096402, | |
| "learning_rate": 2.3452380952380957e-05, | |
| "loss": 0.3727, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19075940549373627, | |
| "step": 395, | |
| "valid_targets_mean": 4232.8, | |
| "valid_targets_min": 3287 | |
| }, | |
| { | |
| "epoch": 0.4173187271778821, | |
| "grad_norm": 0.6231159972509208, | |
| "learning_rate": 2.375e-05, | |
| "loss": 0.367, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.137750044465065, | |
| "step": 400, | |
| "valid_targets_mean": 3408.4, | |
| "valid_targets_min": 1849 | |
| }, | |
| { | |
| "epoch": 0.4225352112676056, | |
| "grad_norm": 1.0272225935504828, | |
| "learning_rate": 2.404761904761905e-05, | |
| "loss": 0.3768, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2568487524986267, | |
| "step": 405, | |
| "valid_targets_mean": 4216.5, | |
| "valid_targets_min": 1857 | |
| }, | |
| { | |
| "epoch": 0.42775169535732915, | |
| "grad_norm": 0.6761504537294957, | |
| "learning_rate": 2.4345238095238093e-05, | |
| "loss": 0.3653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18525201082229614, | |
| "step": 410, | |
| "valid_targets_mean": 3993.6, | |
| "valid_targets_min": 2129 | |
| }, | |
| { | |
| "epoch": 0.4329681794470527, | |
| "grad_norm": 0.6948999150573621, | |
| "learning_rate": 2.4642857142857145e-05, | |
| "loss": 0.3602, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19655150175094604, | |
| "step": 415, | |
| "valid_targets_mean": 4451.1, | |
| "valid_targets_min": 2650 | |
| }, | |
| { | |
| "epoch": 0.4381846635367762, | |
| "grad_norm": 0.9088453848440142, | |
| "learning_rate": 2.4940476190476192e-05, | |
| "loss": 0.3689, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15923558175563812, | |
| "step": 420, | |
| "valid_targets_mean": 3303.0, | |
| "valid_targets_min": 802 | |
| }, | |
| { | |
| "epoch": 0.44340114762649974, | |
| "grad_norm": 0.6308019897211903, | |
| "learning_rate": 2.523809523809524e-05, | |
| "loss": 0.3773, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.196792334318161, | |
| "step": 425, | |
| "valid_targets_mean": 4286.2, | |
| "valid_targets_min": 2325 | |
| }, | |
| { | |
| "epoch": 0.44861763171622326, | |
| "grad_norm": 0.7104064834630404, | |
| "learning_rate": 2.5535714285714284e-05, | |
| "loss": 0.3483, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21335434913635254, | |
| "step": 430, | |
| "valid_targets_mean": 3906.0, | |
| "valid_targets_min": 2023 | |
| }, | |
| { | |
| "epoch": 0.4538341158059468, | |
| "grad_norm": 0.7229249074957009, | |
| "learning_rate": 2.5833333333333336e-05, | |
| "loss": 0.3572, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16322097182273865, | |
| "step": 435, | |
| "valid_targets_mean": 3225.9, | |
| "valid_targets_min": 1501 | |
| }, | |
| { | |
| "epoch": 0.4590505998956703, | |
| "grad_norm": 0.7860100998878066, | |
| "learning_rate": 2.6130952380952383e-05, | |
| "loss": 0.3713, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20680806040763855, | |
| "step": 440, | |
| "valid_targets_mean": 3023.4, | |
| "valid_targets_min": 2260 | |
| }, | |
| { | |
| "epoch": 0.46426708398539385, | |
| "grad_norm": 0.6751078061543765, | |
| "learning_rate": 2.642857142857143e-05, | |
| "loss": 0.3568, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21227988600730896, | |
| "step": 445, | |
| "valid_targets_mean": 4483.0, | |
| "valid_targets_min": 1835 | |
| }, | |
| { | |
| "epoch": 0.4694835680751174, | |
| "grad_norm": 0.5441273903582977, | |
| "learning_rate": 2.6726190476190475e-05, | |
| "loss": 0.3487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14872467517852783, | |
| "step": 450, | |
| "valid_targets_mean": 4982.4, | |
| "valid_targets_min": 2343 | |
| }, | |
| { | |
| "epoch": 0.4747000521648409, | |
| "grad_norm": 0.7353601300878371, | |
| "learning_rate": 2.7023809523809527e-05, | |
| "loss": 0.3398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1489342898130417, | |
| "step": 455, | |
| "valid_targets_mean": 3090.5, | |
| "valid_targets_min": 1951 | |
| }, | |
| { | |
| "epoch": 0.47991653625456443, | |
| "grad_norm": 0.6875254556802465, | |
| "learning_rate": 2.7321428571428574e-05, | |
| "loss": 0.3502, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17940430343151093, | |
| "step": 460, | |
| "valid_targets_mean": 3504.0, | |
| "valid_targets_min": 2077 | |
| }, | |
| { | |
| "epoch": 0.48513302034428796, | |
| "grad_norm": 0.6438204361199159, | |
| "learning_rate": 2.7619047619047622e-05, | |
| "loss": 0.3369, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14692270755767822, | |
| "step": 465, | |
| "valid_targets_mean": 3386.9, | |
| "valid_targets_min": 521 | |
| }, | |
| { | |
| "epoch": 0.4903495044340115, | |
| "grad_norm": 0.7683915677639948, | |
| "learning_rate": 2.7916666666666666e-05, | |
| "loss": 0.3626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16459304094314575, | |
| "step": 470, | |
| "valid_targets_mean": 3535.0, | |
| "valid_targets_min": 2185 | |
| }, | |
| { | |
| "epoch": 0.495565988523735, | |
| "grad_norm": 0.6270539164098722, | |
| "learning_rate": 2.8214285714285718e-05, | |
| "loss": 0.3508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17004793882369995, | |
| "step": 475, | |
| "valid_targets_mean": 4123.2, | |
| "valid_targets_min": 2024 | |
| }, | |
| { | |
| "epoch": 0.5007824726134585, | |
| "grad_norm": 0.6153725856259994, | |
| "learning_rate": 2.8511904761904765e-05, | |
| "loss": 0.3536, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1445433795452118, | |
| "step": 480, | |
| "valid_targets_mean": 3541.8, | |
| "valid_targets_min": 2282 | |
| }, | |
| { | |
| "epoch": 0.5059989567031821, | |
| "grad_norm": 0.7373370910363823, | |
| "learning_rate": 2.8809523809523813e-05, | |
| "loss": 0.3404, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1313253492116928, | |
| "step": 485, | |
| "valid_targets_mean": 2962.8, | |
| "valid_targets_min": 1965 | |
| }, | |
| { | |
| "epoch": 0.5112154407929056, | |
| "grad_norm": 0.750223075215435, | |
| "learning_rate": 2.9107142857142857e-05, | |
| "loss": 0.3567, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1805972456932068, | |
| "step": 490, | |
| "valid_targets_mean": 3577.2, | |
| "valid_targets_min": 1307 | |
| }, | |
| { | |
| "epoch": 0.5164319248826291, | |
| "grad_norm": 0.6455068089764313, | |
| "learning_rate": 2.940476190476191e-05, | |
| "loss": 0.3503, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.215504452586174, | |
| "step": 495, | |
| "valid_targets_mean": 4636.2, | |
| "valid_targets_min": 1968 | |
| }, | |
| { | |
| "epoch": 0.5216484089723527, | |
| "grad_norm": 0.7112927669230673, | |
| "learning_rate": 2.9702380952380956e-05, | |
| "loss": 0.3304, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14848318696022034, | |
| "step": 500, | |
| "valid_targets_mean": 2862.9, | |
| "valid_targets_min": 1035 | |
| }, | |
| { | |
| "epoch": 0.5268648930620762, | |
| "grad_norm": 0.6504530702897411, | |
| "learning_rate": 3.0000000000000004e-05, | |
| "loss": 0.3284, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1909797042608261, | |
| "step": 505, | |
| "valid_targets_mean": 4241.4, | |
| "valid_targets_min": 2513 | |
| }, | |
| { | |
| "epoch": 0.5320813771517997, | |
| "grad_norm": 0.6063164704772284, | |
| "learning_rate": 3.029761904761905e-05, | |
| "loss": 0.3342, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15677478909492493, | |
| "step": 510, | |
| "valid_targets_mean": 4271.6, | |
| "valid_targets_min": 2303 | |
| }, | |
| { | |
| "epoch": 0.5372978612415232, | |
| "grad_norm": 0.666145252020734, | |
| "learning_rate": 3.059523809523809e-05, | |
| "loss": 0.3371, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15673008561134338, | |
| "step": 515, | |
| "valid_targets_mean": 3762.1, | |
| "valid_targets_min": 1434 | |
| }, | |
| { | |
| "epoch": 0.5425143453312468, | |
| "grad_norm": 0.63392090044597, | |
| "learning_rate": 3.089285714285715e-05, | |
| "loss": 0.3324, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19646254181861877, | |
| "step": 520, | |
| "valid_targets_mean": 4333.0, | |
| "valid_targets_min": 2332 | |
| }, | |
| { | |
| "epoch": 0.5477308294209703, | |
| "grad_norm": 0.6998855303603239, | |
| "learning_rate": 3.1190476190476195e-05, | |
| "loss": 0.3352, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16803094744682312, | |
| "step": 525, | |
| "valid_targets_mean": 2995.6, | |
| "valid_targets_min": 1518 | |
| }, | |
| { | |
| "epoch": 0.5529473135106938, | |
| "grad_norm": 0.5850674234836659, | |
| "learning_rate": 3.148809523809524e-05, | |
| "loss": 0.3346, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14564329385757446, | |
| "step": 530, | |
| "valid_targets_mean": 4031.6, | |
| "valid_targets_min": 1152 | |
| }, | |
| { | |
| "epoch": 0.5581637976004173, | |
| "grad_norm": 0.6111681084145681, | |
| "learning_rate": 3.1785714285714284e-05, | |
| "loss": 0.3467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22183175384998322, | |
| "step": 535, | |
| "valid_targets_mean": 4547.6, | |
| "valid_targets_min": 1598 | |
| }, | |
| { | |
| "epoch": 0.5633802816901409, | |
| "grad_norm": 0.6745299886475277, | |
| "learning_rate": 3.208333333333334e-05, | |
| "loss": 0.3654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19307160377502441, | |
| "step": 540, | |
| "valid_targets_mean": 4027.6, | |
| "valid_targets_min": 1611 | |
| }, | |
| { | |
| "epoch": 0.5685967657798644, | |
| "grad_norm": 0.6040097096186956, | |
| "learning_rate": 3.2380952380952386e-05, | |
| "loss": 0.3308, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14207333326339722, | |
| "step": 545, | |
| "valid_targets_mean": 3423.5, | |
| "valid_targets_min": 1490 | |
| }, | |
| { | |
| "epoch": 0.5738132498695879, | |
| "grad_norm": 0.6519920946306585, | |
| "learning_rate": 3.2678571428571434e-05, | |
| "loss": 0.3425, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1509445309638977, | |
| "step": 550, | |
| "valid_targets_mean": 3238.2, | |
| "valid_targets_min": 2535 | |
| }, | |
| { | |
| "epoch": 0.5790297339593115, | |
| "grad_norm": 0.5979473390733931, | |
| "learning_rate": 3.2976190476190475e-05, | |
| "loss": 0.333, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15014365315437317, | |
| "step": 555, | |
| "valid_targets_mean": 3477.9, | |
| "valid_targets_min": 1743 | |
| }, | |
| { | |
| "epoch": 0.584246218049035, | |
| "grad_norm": 0.5489743808302442, | |
| "learning_rate": 3.327380952380953e-05, | |
| "loss": 0.3349, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16869285702705383, | |
| "step": 560, | |
| "valid_targets_mean": 5560.0, | |
| "valid_targets_min": 2933 | |
| }, | |
| { | |
| "epoch": 0.5894627021387585, | |
| "grad_norm": 0.8667460231572823, | |
| "learning_rate": 3.357142857142858e-05, | |
| "loss": 0.345, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20353971421718597, | |
| "step": 565, | |
| "valid_targets_mean": 3995.4, | |
| "valid_targets_min": 2433 | |
| }, | |
| { | |
| "epoch": 0.594679186228482, | |
| "grad_norm": 0.7210207335915371, | |
| "learning_rate": 3.386904761904762e-05, | |
| "loss": 0.3486, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1959480494260788, | |
| "step": 570, | |
| "valid_targets_mean": 3831.2, | |
| "valid_targets_min": 2184 | |
| }, | |
| { | |
| "epoch": 0.5998956703182056, | |
| "grad_norm": 0.6464988956009591, | |
| "learning_rate": 3.4166666666666666e-05, | |
| "loss": 0.3401, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15150097012519836, | |
| "step": 575, | |
| "valid_targets_mean": 3591.1, | |
| "valid_targets_min": 1503 | |
| }, | |
| { | |
| "epoch": 0.6051121544079291, | |
| "grad_norm": 0.5964216211513691, | |
| "learning_rate": 3.446428571428572e-05, | |
| "loss": 0.3411, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18338820338249207, | |
| "step": 580, | |
| "valid_targets_mean": 4813.1, | |
| "valid_targets_min": 1822 | |
| }, | |
| { | |
| "epoch": 0.6103286384976526, | |
| "grad_norm": 0.6436405007857757, | |
| "learning_rate": 3.476190476190477e-05, | |
| "loss": 0.3433, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22753724455833435, | |
| "step": 585, | |
| "valid_targets_mean": 5186.4, | |
| "valid_targets_min": 2743 | |
| }, | |
| { | |
| "epoch": 0.6155451225873761, | |
| "grad_norm": 0.6124671760975635, | |
| "learning_rate": 3.505952380952381e-05, | |
| "loss": 0.3005, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1334855556488037, | |
| "step": 590, | |
| "valid_targets_mean": 5314.2, | |
| "valid_targets_min": 4389 | |
| }, | |
| { | |
| "epoch": 0.6207616066770997, | |
| "grad_norm": 0.5192356521802616, | |
| "learning_rate": 3.535714285714286e-05, | |
| "loss": 0.2634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12781807780265808, | |
| "step": 595, | |
| "valid_targets_mean": 6811.5, | |
| "valid_targets_min": 4598 | |
| }, | |
| { | |
| "epoch": 0.6259780907668232, | |
| "grad_norm": 0.5979503977065781, | |
| "learning_rate": 3.565476190476191e-05, | |
| "loss": 0.2698, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16105909645557404, | |
| "step": 600, | |
| "valid_targets_mean": 6251.1, | |
| "valid_targets_min": 5341 | |
| }, | |
| { | |
| "epoch": 0.6311945748565467, | |
| "grad_norm": 0.5013514091178745, | |
| "learning_rate": 3.595238095238096e-05, | |
| "loss": 0.2562, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12659405171871185, | |
| "step": 605, | |
| "valid_targets_mean": 6485.6, | |
| "valid_targets_min": 4586 | |
| }, | |
| { | |
| "epoch": 0.6364110589462703, | |
| "grad_norm": 0.4959559456067435, | |
| "learning_rate": 3.625e-05, | |
| "loss": 0.2692, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12373220175504684, | |
| "step": 610, | |
| "valid_targets_mean": 5752.4, | |
| "valid_targets_min": 4211 | |
| }, | |
| { | |
| "epoch": 0.6416275430359938, | |
| "grad_norm": 0.5548953300692737, | |
| "learning_rate": 3.654761904761905e-05, | |
| "loss": 0.2635, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13150092959403992, | |
| "step": 615, | |
| "valid_targets_mean": 5376.8, | |
| "valid_targets_min": 4169 | |
| }, | |
| { | |
| "epoch": 0.6468440271257173, | |
| "grad_norm": 0.5169673418632462, | |
| "learning_rate": 3.6845238095238096e-05, | |
| "loss": 0.2555, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10056436061859131, | |
| "step": 620, | |
| "valid_targets_mean": 6118.5, | |
| "valid_targets_min": 4589 | |
| }, | |
| { | |
| "epoch": 0.6520605112154408, | |
| "grad_norm": 0.5476099567697311, | |
| "learning_rate": 3.714285714285715e-05, | |
| "loss": 0.2594, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12813040614128113, | |
| "step": 625, | |
| "valid_targets_mean": 5471.8, | |
| "valid_targets_min": 4525 | |
| }, | |
| { | |
| "epoch": 0.6572769953051644, | |
| "grad_norm": 0.5531772843250943, | |
| "learning_rate": 3.744047619047619e-05, | |
| "loss": 0.2673, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10611049830913544, | |
| "step": 630, | |
| "valid_targets_mean": 6151.6, | |
| "valid_targets_min": 4778 | |
| }, | |
| { | |
| "epoch": 0.6624934793948879, | |
| "grad_norm": 0.5583674038831471, | |
| "learning_rate": 3.773809523809524e-05, | |
| "loss": 0.2378, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11804410815238953, | |
| "step": 635, | |
| "valid_targets_mean": 6537.2, | |
| "valid_targets_min": 5289 | |
| }, | |
| { | |
| "epoch": 0.6677099634846114, | |
| "grad_norm": 0.4744191718991493, | |
| "learning_rate": 3.803571428571429e-05, | |
| "loss": 0.2599, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11829397082328796, | |
| "step": 640, | |
| "valid_targets_mean": 6323.9, | |
| "valid_targets_min": 5398 | |
| }, | |
| { | |
| "epoch": 0.672926447574335, | |
| "grad_norm": 0.4633272287351452, | |
| "learning_rate": 3.833333333333334e-05, | |
| "loss": 0.2387, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13967427611351013, | |
| "step": 645, | |
| "valid_targets_mean": 8419.1, | |
| "valid_targets_min": 5304 | |
| }, | |
| { | |
| "epoch": 0.6781429316640585, | |
| "grad_norm": 0.48586987082429806, | |
| "learning_rate": 3.863095238095238e-05, | |
| "loss": 0.2397, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11013352870941162, | |
| "step": 650, | |
| "valid_targets_mean": 5858.1, | |
| "valid_targets_min": 3815 | |
| }, | |
| { | |
| "epoch": 0.6833594157537819, | |
| "grad_norm": 0.5047817116212406, | |
| "learning_rate": 3.892857142857143e-05, | |
| "loss": 0.2247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11770049482584, | |
| "step": 655, | |
| "valid_targets_mean": 6356.6, | |
| "valid_targets_min": 4629 | |
| }, | |
| { | |
| "epoch": 0.6885758998435054, | |
| "grad_norm": 0.43170183731502365, | |
| "learning_rate": 3.922619047619048e-05, | |
| "loss": 0.2208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1048555001616478, | |
| "step": 660, | |
| "valid_targets_mean": 7511.5, | |
| "valid_targets_min": 4842 | |
| }, | |
| { | |
| "epoch": 0.693792383933229, | |
| "grad_norm": 0.49096465657148913, | |
| "learning_rate": 3.9523809523809526e-05, | |
| "loss": 0.243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09846343100070953, | |
| "step": 665, | |
| "valid_targets_mean": 6017.4, | |
| "valid_targets_min": 4618 | |
| }, | |
| { | |
| "epoch": 0.6990088680229525, | |
| "grad_norm": 0.49668266371845543, | |
| "learning_rate": 3.982142857142857e-05, | |
| "loss": 0.2412, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0989207774400711, | |
| "step": 670, | |
| "valid_targets_mean": 6133.8, | |
| "valid_targets_min": 4618 | |
| }, | |
| { | |
| "epoch": 0.704225352112676, | |
| "grad_norm": 0.4641004390838306, | |
| "learning_rate": 3.999998918212333e-05, | |
| "loss": 0.2189, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11310772597789764, | |
| "step": 675, | |
| "valid_targets_mean": 6665.2, | |
| "valid_targets_min": 4772 | |
| }, | |
| { | |
| "epoch": 0.7094418362023995, | |
| "grad_norm": 0.45143054338863337, | |
| "learning_rate": 3.999986748114514e-05, | |
| "loss": 0.2141, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09853895008563995, | |
| "step": 680, | |
| "valid_targets_mean": 6613.4, | |
| "valid_targets_min": 5243 | |
| }, | |
| { | |
| "epoch": 0.7146583202921231, | |
| "grad_norm": 0.46414834061851207, | |
| "learning_rate": 3.99996105576685e-05, | |
| "loss": 0.2265, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10851044207811356, | |
| "step": 685, | |
| "valid_targets_mean": 7256.1, | |
| "valid_targets_min": 5574 | |
| }, | |
| { | |
| "epoch": 0.7198748043818466, | |
| "grad_norm": 0.46950952199768986, | |
| "learning_rate": 3.999921841343052e-05, | |
| "loss": 0.256, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11483830958604813, | |
| "step": 690, | |
| "valid_targets_mean": 6051.2, | |
| "valid_targets_min": 647 | |
| }, | |
| { | |
| "epoch": 0.7250912884715701, | |
| "grad_norm": 0.5561152937148893, | |
| "learning_rate": 3.999869105108254e-05, | |
| "loss": 0.2268, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1280754804611206, | |
| "step": 695, | |
| "valid_targets_mean": 6290.5, | |
| "valid_targets_min": 4499 | |
| }, | |
| { | |
| "epoch": 0.7303077725612936, | |
| "grad_norm": 0.47951031189936266, | |
| "learning_rate": 3.9998028474190154e-05, | |
| "loss": 0.2333, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12890252470970154, | |
| "step": 700, | |
| "valid_targets_mean": 6375.5, | |
| "valid_targets_min": 5795 | |
| }, | |
| { | |
| "epoch": 0.7355242566510172, | |
| "grad_norm": 0.4732962862065466, | |
| "learning_rate": 3.999723068723316e-05, | |
| "loss": 0.2393, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11941637843847275, | |
| "step": 705, | |
| "valid_targets_mean": 6107.5, | |
| "valid_targets_min": 4619 | |
| }, | |
| { | |
| "epoch": 0.7407407407407407, | |
| "grad_norm": 0.49060566158311036, | |
| "learning_rate": 3.9996297695605534e-05, | |
| "loss": 0.2623, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11854396760463715, | |
| "step": 710, | |
| "valid_targets_mean": 6077.5, | |
| "valid_targets_min": 4596 | |
| }, | |
| { | |
| "epoch": 0.7459572248304642, | |
| "grad_norm": 0.5371773039007353, | |
| "learning_rate": 3.999522950561537e-05, | |
| "loss": 0.2637, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.106911800801754, | |
| "step": 715, | |
| "valid_targets_mean": 5823.2, | |
| "valid_targets_min": 4581 | |
| }, | |
| { | |
| "epoch": 0.7511737089201878, | |
| "grad_norm": 0.49188942566627325, | |
| "learning_rate": 3.999402612448491e-05, | |
| "loss": 0.2343, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1312708854675293, | |
| "step": 720, | |
| "valid_targets_mean": 6410.8, | |
| "valid_targets_min": 5086 | |
| }, | |
| { | |
| "epoch": 0.7563901930099113, | |
| "grad_norm": 0.543912985086522, | |
| "learning_rate": 3.99926875603504e-05, | |
| "loss": 0.2361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11862045526504517, | |
| "step": 725, | |
| "valid_targets_mean": 7107.5, | |
| "valid_targets_min": 5827 | |
| }, | |
| { | |
| "epoch": 0.7616066770996348, | |
| "grad_norm": 0.47312383974448224, | |
| "learning_rate": 3.9991213822262105e-05, | |
| "loss": 0.2326, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11791808903217316, | |
| "step": 730, | |
| "valid_targets_mean": 6158.5, | |
| "valid_targets_min": 4810 | |
| }, | |
| { | |
| "epoch": 0.7668231611893583, | |
| "grad_norm": 0.5471750445558776, | |
| "learning_rate": 3.9989604920184215e-05, | |
| "loss": 0.2405, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10992453247308731, | |
| "step": 735, | |
| "valid_targets_mean": 5886.2, | |
| "valid_targets_min": 4546 | |
| }, | |
| { | |
| "epoch": 0.7720396452790819, | |
| "grad_norm": 0.4176966638320051, | |
| "learning_rate": 3.99878608649948e-05, | |
| "loss": 0.228, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12207449972629547, | |
| "step": 740, | |
| "valid_targets_mean": 8312.5, | |
| "valid_targets_min": 5125 | |
| }, | |
| { | |
| "epoch": 0.7772561293688054, | |
| "grad_norm": 0.5078572508662734, | |
| "learning_rate": 3.9985981668485694e-05, | |
| "loss": 0.2443, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12259017676115036, | |
| "step": 745, | |
| "valid_targets_mean": 7113.1, | |
| "valid_targets_min": 5089 | |
| }, | |
| { | |
| "epoch": 0.7824726134585289, | |
| "grad_norm": 0.4806840529970493, | |
| "learning_rate": 3.9983967343362476e-05, | |
| "loss": 0.2315, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1073918491601944, | |
| "step": 750, | |
| "valid_targets_mean": 6532.9, | |
| "valid_targets_min": 4219 | |
| }, | |
| { | |
| "epoch": 0.7876890975482524, | |
| "grad_norm": 0.5407730585572066, | |
| "learning_rate": 3.998181790324434e-05, | |
| "loss": 0.2223, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11323493719100952, | |
| "step": 755, | |
| "valid_targets_mean": 5441.4, | |
| "valid_targets_min": 3556 | |
| }, | |
| { | |
| "epoch": 0.792905581637976, | |
| "grad_norm": 0.46513453175661434, | |
| "learning_rate": 3.997953336266402e-05, | |
| "loss": 0.2028, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10463686287403107, | |
| "step": 760, | |
| "valid_targets_mean": 6978.2, | |
| "valid_targets_min": 3281 | |
| }, | |
| { | |
| "epoch": 0.7981220657276995, | |
| "grad_norm": 0.4515990108744587, | |
| "learning_rate": 3.997711373706768e-05, | |
| "loss": 0.2292, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14620834589004517, | |
| "step": 765, | |
| "valid_targets_mean": 8108.8, | |
| "valid_targets_min": 6178 | |
| }, | |
| { | |
| "epoch": 0.803338549817423, | |
| "grad_norm": 0.5045800106994308, | |
| "learning_rate": 3.997455904281481e-05, | |
| "loss": 0.2363, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11089476197957993, | |
| "step": 770, | |
| "valid_targets_mean": 6134.0, | |
| "valid_targets_min": 5308 | |
| }, | |
| { | |
| "epoch": 0.8085550339071466, | |
| "grad_norm": 0.4701540545545427, | |
| "learning_rate": 3.997186929717814e-05, | |
| "loss": 0.2356, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.114922896027565, | |
| "step": 775, | |
| "valid_targets_mean": 6336.0, | |
| "valid_targets_min": 4404 | |
| }, | |
| { | |
| "epoch": 0.8137715179968701, | |
| "grad_norm": 0.449804411920331, | |
| "learning_rate": 3.996904451834349e-05, | |
| "loss": 0.2177, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10100569576025009, | |
| "step": 780, | |
| "valid_targets_mean": 6028.5, | |
| "valid_targets_min": 4123 | |
| }, | |
| { | |
| "epoch": 0.8189880020865936, | |
| "grad_norm": 0.4517935578186838, | |
| "learning_rate": 3.9966084725409675e-05, | |
| "loss": 0.2144, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10242894291877747, | |
| "step": 785, | |
| "valid_targets_mean": 5918.5, | |
| "valid_targets_min": 4110 | |
| }, | |
| { | |
| "epoch": 0.8242044861763171, | |
| "grad_norm": 0.47360656921204075, | |
| "learning_rate": 3.996298993838836e-05, | |
| "loss": 0.2266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09789572656154633, | |
| "step": 790, | |
| "valid_targets_mean": 5972.8, | |
| "valid_targets_min": 3722 | |
| }, | |
| { | |
| "epoch": 0.8294209702660407, | |
| "grad_norm": 0.4596424691521172, | |
| "learning_rate": 3.995976017820392e-05, | |
| "loss": 0.231, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10415824502706528, | |
| "step": 795, | |
| "valid_targets_mean": 5983.5, | |
| "valid_targets_min": 5322 | |
| }, | |
| { | |
| "epoch": 0.8346374543557642, | |
| "grad_norm": 0.47944444287089183, | |
| "learning_rate": 3.995639546669331e-05, | |
| "loss": 0.2177, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10461558401584625, | |
| "step": 800, | |
| "valid_targets_mean": 5483.4, | |
| "valid_targets_min": 4716 | |
| }, | |
| { | |
| "epoch": 0.8398539384454877, | |
| "grad_norm": 0.5170137007543419, | |
| "learning_rate": 3.995289582660593e-05, | |
| "loss": 0.2243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1283588707447052, | |
| "step": 805, | |
| "valid_targets_mean": 6165.9, | |
| "valid_targets_min": 4711 | |
| }, | |
| { | |
| "epoch": 0.8450704225352113, | |
| "grad_norm": 0.4463895250398355, | |
| "learning_rate": 3.9949261281603415e-05, | |
| "loss": 0.2288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09253406524658203, | |
| "step": 810, | |
| "valid_targets_mean": 5928.9, | |
| "valid_targets_min": 4670 | |
| }, | |
| { | |
| "epoch": 0.8502869066249348, | |
| "grad_norm": 0.4651574483030032, | |
| "learning_rate": 3.9945491856259556e-05, | |
| "loss": 0.1962, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10833393782377243, | |
| "step": 815, | |
| "valid_targets_mean": 6224.0, | |
| "valid_targets_min": 4852 | |
| }, | |
| { | |
| "epoch": 0.8555033907146583, | |
| "grad_norm": 0.4382655668762648, | |
| "learning_rate": 3.994158757606009e-05, | |
| "loss": 0.1929, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10754010081291199, | |
| "step": 820, | |
| "valid_targets_mean": 5841.4, | |
| "valid_targets_min": 4162 | |
| }, | |
| { | |
| "epoch": 0.8607198748043818, | |
| "grad_norm": 0.3894395406653489, | |
| "learning_rate": 3.993754846740249e-05, | |
| "loss": 0.2062, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10286466777324677, | |
| "step": 825, | |
| "valid_targets_mean": 7206.6, | |
| "valid_targets_min": 4924 | |
| }, | |
| { | |
| "epoch": 0.8659363588941054, | |
| "grad_norm": 0.48099092498176166, | |
| "learning_rate": 3.9933374557595875e-05, | |
| "loss": 0.2148, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10085944831371307, | |
| "step": 830, | |
| "valid_targets_mean": 5832.1, | |
| "valid_targets_min": 3853 | |
| }, | |
| { | |
| "epoch": 0.8711528429838289, | |
| "grad_norm": 0.47184422919834834, | |
| "learning_rate": 3.992906587486076e-05, | |
| "loss": 0.2429, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10797993838787079, | |
| "step": 835, | |
| "valid_targets_mean": 5730.9, | |
| "valid_targets_min": 4650 | |
| }, | |
| { | |
| "epoch": 0.8763693270735524, | |
| "grad_norm": 0.46782257061436877, | |
| "learning_rate": 3.992462244832886e-05, | |
| "loss": 0.2603, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11959731578826904, | |
| "step": 840, | |
| "valid_targets_mean": 6447.1, | |
| "valid_targets_min": 5258 | |
| }, | |
| { | |
| "epoch": 0.881585811163276, | |
| "grad_norm": 0.4651290289668339, | |
| "learning_rate": 3.9920044308042956e-05, | |
| "loss": 0.214, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11168893426656723, | |
| "step": 845, | |
| "valid_targets_mean": 5665.6, | |
| "valid_targets_min": 4873 | |
| }, | |
| { | |
| "epoch": 0.8868022952529995, | |
| "grad_norm": 0.5932031818522853, | |
| "learning_rate": 3.991533148495662e-05, | |
| "loss": 0.2236, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11731742322444916, | |
| "step": 850, | |
| "valid_targets_mean": 5372.9, | |
| "valid_targets_min": 4540 | |
| }, | |
| { | |
| "epoch": 0.892018779342723, | |
| "grad_norm": 0.43961559014436263, | |
| "learning_rate": 3.991048401093405e-05, | |
| "loss": 0.2171, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12583346664905548, | |
| "step": 855, | |
| "valid_targets_mean": 6109.0, | |
| "valid_targets_min": 4826 | |
| }, | |
| { | |
| "epoch": 0.8972352634324465, | |
| "grad_norm": 0.6418919220085479, | |
| "learning_rate": 3.990550191874985e-05, | |
| "loss": 0.2435, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12164339423179626, | |
| "step": 860, | |
| "valid_targets_mean": 7653.9, | |
| "valid_targets_min": 5227 | |
| }, | |
| { | |
| "epoch": 0.9024517475221701, | |
| "grad_norm": 0.42543275340022146, | |
| "learning_rate": 3.990038524208878e-05, | |
| "loss": 0.2206, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10280366986989975, | |
| "step": 865, | |
| "valid_targets_mean": 6947.5, | |
| "valid_targets_min": 4757 | |
| }, | |
| { | |
| "epoch": 0.9076682316118936, | |
| "grad_norm": 0.4803748859867178, | |
| "learning_rate": 3.9895134015545565e-05, | |
| "loss": 0.2257, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11008107662200928, | |
| "step": 870, | |
| "valid_targets_mean": 5976.1, | |
| "valid_targets_min": 4475 | |
| }, | |
| { | |
| "epoch": 0.9128847157016171, | |
| "grad_norm": 0.4273786843010214, | |
| "learning_rate": 3.988974827462464e-05, | |
| "loss": 0.2237, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1067737489938736, | |
| "step": 875, | |
| "valid_targets_mean": 6807.8, | |
| "valid_targets_min": 5634 | |
| }, | |
| { | |
| "epoch": 0.9181011997913406, | |
| "grad_norm": 0.44013597696925655, | |
| "learning_rate": 3.98842280557399e-05, | |
| "loss": 0.2014, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10496058315038681, | |
| "step": 880, | |
| "valid_targets_mean": 6153.1, | |
| "valid_targets_min": 3621 | |
| }, | |
| { | |
| "epoch": 0.9233176838810642, | |
| "grad_norm": 0.4227190404874564, | |
| "learning_rate": 3.98785733962145e-05, | |
| "loss": 0.2138, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10483935475349426, | |
| "step": 885, | |
| "valid_targets_mean": 6605.1, | |
| "valid_targets_min": 4758 | |
| }, | |
| { | |
| "epoch": 0.9285341679707877, | |
| "grad_norm": 0.43576429344018913, | |
| "learning_rate": 3.9872784334280555e-05, | |
| "loss": 0.2633, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10156159102916718, | |
| "step": 890, | |
| "valid_targets_mean": 5819.9, | |
| "valid_targets_min": 4140 | |
| }, | |
| { | |
| "epoch": 0.9337506520605112, | |
| "grad_norm": 0.46514985205585935, | |
| "learning_rate": 3.9866860909078876e-05, | |
| "loss": 0.2061, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1024516150355339, | |
| "step": 895, | |
| "valid_targets_mean": 5897.2, | |
| "valid_targets_min": 5062 | |
| }, | |
| { | |
| "epoch": 0.9389671361502347, | |
| "grad_norm": 0.5208774493378518, | |
| "learning_rate": 3.9860803160658756e-05, | |
| "loss": 0.2235, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11040855944156647, | |
| "step": 900, | |
| "valid_targets_mean": 5454.0, | |
| "valid_targets_min": 4267 | |
| }, | |
| { | |
| "epoch": 0.9441836202399583, | |
| "grad_norm": 0.514050129387818, | |
| "learning_rate": 3.985461112997766e-05, | |
| "loss": 0.2298, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12151403725147247, | |
| "step": 905, | |
| "valid_targets_mean": 6005.0, | |
| "valid_targets_min": 4615 | |
| }, | |
| { | |
| "epoch": 0.9494001043296818, | |
| "grad_norm": 0.44201908315431143, | |
| "learning_rate": 3.9848284858900955e-05, | |
| "loss": 0.2068, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0945250540971756, | |
| "step": 910, | |
| "valid_targets_mean": 6020.8, | |
| "valid_targets_min": 5087 | |
| }, | |
| { | |
| "epoch": 0.9546165884194053, | |
| "grad_norm": 0.4554599833118601, | |
| "learning_rate": 3.984182439020164e-05, | |
| "loss": 0.2023, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11378178000450134, | |
| "step": 915, | |
| "valid_targets_mean": 6847.1, | |
| "valid_targets_min": 3685 | |
| }, | |
| { | |
| "epoch": 0.9598330725091289, | |
| "grad_norm": 0.4833818162489285, | |
| "learning_rate": 3.9835229767560034e-05, | |
| "loss": 0.1998, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08089777082204819, | |
| "step": 920, | |
| "valid_targets_mean": 5624.6, | |
| "valid_targets_min": 3437 | |
| }, | |
| { | |
| "epoch": 0.9650495565988524, | |
| "grad_norm": 0.42418357022867553, | |
| "learning_rate": 3.982850103556351e-05, | |
| "loss": 0.216, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10691383481025696, | |
| "step": 925, | |
| "valid_targets_mean": 7199.6, | |
| "valid_targets_min": 5647 | |
| }, | |
| { | |
| "epoch": 0.9702660406885759, | |
| "grad_norm": 0.4029607236424348, | |
| "learning_rate": 3.982163823970615e-05, | |
| "loss": 0.2048, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10468834638595581, | |
| "step": 930, | |
| "valid_targets_mean": 7090.4, | |
| "valid_targets_min": 4651 | |
| }, | |
| { | |
| "epoch": 0.9754825247782994, | |
| "grad_norm": 0.39126665905309094, | |
| "learning_rate": 3.98146414263885e-05, | |
| "loss": 0.2077, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09553714096546173, | |
| "step": 935, | |
| "valid_targets_mean": 6706.9, | |
| "valid_targets_min": 5050 | |
| }, | |
| { | |
| "epoch": 0.980699008868023, | |
| "grad_norm": 0.41170206086106825, | |
| "learning_rate": 3.980751064291719e-05, | |
| "loss": 0.216, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11391007900238037, | |
| "step": 940, | |
| "valid_targets_mean": 7697.8, | |
| "valid_targets_min": 6388 | |
| }, | |
| { | |
| "epoch": 0.9859154929577465, | |
| "grad_norm": 0.505372747693757, | |
| "learning_rate": 3.980024593750466e-05, | |
| "loss": 0.2001, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10809037834405899, | |
| "step": 945, | |
| "valid_targets_mean": 5380.2, | |
| "valid_targets_min": 3662 | |
| }, | |
| { | |
| "epoch": 0.99113197704747, | |
| "grad_norm": 0.4451499174958982, | |
| "learning_rate": 3.979284735926882e-05, | |
| "loss": 0.2235, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10708364844322205, | |
| "step": 950, | |
| "valid_targets_mean": 6321.4, | |
| "valid_targets_min": 5334 | |
| }, | |
| { | |
| "epoch": 0.9963484611371936, | |
| "grad_norm": 0.4917382160537446, | |
| "learning_rate": 3.978531495823271e-05, | |
| "loss": 0.2288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10692022740840912, | |
| "step": 955, | |
| "valid_targets_mean": 5324.9, | |
| "valid_targets_min": 3545 | |
| }, | |
| { | |
| "epoch": 1.0010432968179448, | |
| "grad_norm": 0.8378955993928049, | |
| "learning_rate": 3.977764878532418e-05, | |
| "loss": 0.2745, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16950902342796326, | |
| "step": 960, | |
| "valid_targets_mean": 2562.9, | |
| "valid_targets_min": 954 | |
| }, | |
| { | |
| "epoch": 1.0062597809076683, | |
| "grad_norm": 0.6446257081141612, | |
| "learning_rate": 3.976984889237551e-05, | |
| "loss": 0.3429, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15429827570915222, | |
| "step": 965, | |
| "valid_targets_mean": 3426.9, | |
| "valid_targets_min": 533 | |
| }, | |
| { | |
| "epoch": 1.0114762649973918, | |
| "grad_norm": 0.6305372437013509, | |
| "learning_rate": 3.9761915332123113e-05, | |
| "loss": 0.335, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14412429928779602, | |
| "step": 970, | |
| "valid_targets_mean": 3435.4, | |
| "valid_targets_min": 1752 | |
| }, | |
| { | |
| "epoch": 1.0166927490871154, | |
| "grad_norm": 0.7466803095951341, | |
| "learning_rate": 3.975384815820713e-05, | |
| "loss": 0.3327, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20105507969856262, | |
| "step": 975, | |
| "valid_targets_mean": 3926.2, | |
| "valid_targets_min": 777 | |
| }, | |
| { | |
| "epoch": 1.021909233176839, | |
| "grad_norm": 0.5621346691856898, | |
| "learning_rate": 3.974564742517109e-05, | |
| "loss": 0.3211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1472310721874237, | |
| "step": 980, | |
| "valid_targets_mean": 3980.1, | |
| "valid_targets_min": 3308 | |
| }, | |
| { | |
| "epoch": 1.0271257172665624, | |
| "grad_norm": 0.5957802492642164, | |
| "learning_rate": 3.973731318846155e-05, | |
| "loss": 0.3406, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17282098531723022, | |
| "step": 985, | |
| "valid_targets_mean": 4755.1, | |
| "valid_targets_min": 1432 | |
| }, | |
| { | |
| "epoch": 1.032342201356286, | |
| "grad_norm": 0.5221474758180245, | |
| "learning_rate": 3.97288455044277e-05, | |
| "loss": 0.332, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1489165723323822, | |
| "step": 990, | |
| "valid_targets_mean": 4481.8, | |
| "valid_targets_min": 1541 | |
| }, | |
| { | |
| "epoch": 1.0375586854460095, | |
| "grad_norm": 0.5719731030961381, | |
| "learning_rate": 3.972024443032098e-05, | |
| "loss": 0.325, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21812763810157776, | |
| "step": 995, | |
| "valid_targets_mean": 6505.9, | |
| "valid_targets_min": 4293 | |
| }, | |
| { | |
| "epoch": 1.042775169535733, | |
| "grad_norm": 0.5691300314579961, | |
| "learning_rate": 3.971151002429471e-05, | |
| "loss": 0.329, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12242749333381653, | |
| "step": 1000, | |
| "valid_targets_mean": 3263.4, | |
| "valid_targets_min": 1732 | |
| }, | |
| { | |
| "epoch": 1.0479916536254565, | |
| "grad_norm": 0.5408297211939609, | |
| "learning_rate": 3.97026423454037e-05, | |
| "loss": 0.3368, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14512574672698975, | |
| "step": 1005, | |
| "valid_targets_mean": 4176.4, | |
| "valid_targets_min": 2309 | |
| }, | |
| { | |
| "epoch": 1.05320813771518, | |
| "grad_norm": 0.601945353445737, | |
| "learning_rate": 3.969364145360383e-05, | |
| "loss": 0.3346, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17231485247612, | |
| "step": 1010, | |
| "valid_targets_mean": 4178.6, | |
| "valid_targets_min": 2823 | |
| }, | |
| { | |
| "epoch": 1.0584246218049036, | |
| "grad_norm": 0.42852454510667437, | |
| "learning_rate": 3.9684507409751655e-05, | |
| "loss": 0.3213, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16687265038490295, | |
| "step": 1015, | |
| "valid_targets_mean": 7147.0, | |
| "valid_targets_min": 2745 | |
| }, | |
| { | |
| "epoch": 1.063641105894627, | |
| "grad_norm": 0.5521174900420562, | |
| "learning_rate": 3.967524027560401e-05, | |
| "loss": 0.3337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16266818344593048, | |
| "step": 1020, | |
| "valid_targets_mean": 4228.4, | |
| "valid_targets_min": 1329 | |
| }, | |
| { | |
| "epoch": 1.0688575899843507, | |
| "grad_norm": 0.5257080424201093, | |
| "learning_rate": 3.966584011381753e-05, | |
| "loss": 0.3269, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14823390543460846, | |
| "step": 1025, | |
| "valid_targets_mean": 3960.5, | |
| "valid_targets_min": 1412 | |
| }, | |
| { | |
| "epoch": 1.074074074074074, | |
| "grad_norm": 0.6389896834673456, | |
| "learning_rate": 3.965630698794833e-05, | |
| "loss": 0.334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13912534713745117, | |
| "step": 1030, | |
| "valid_targets_mean": 2968.0, | |
| "valid_targets_min": 1334 | |
| }, | |
| { | |
| "epoch": 1.0792905581637977, | |
| "grad_norm": 0.5636494649464656, | |
| "learning_rate": 3.964664096245149e-05, | |
| "loss": 0.3447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1615818440914154, | |
| "step": 1035, | |
| "valid_targets_mean": 4079.0, | |
| "valid_targets_min": 1762 | |
| }, | |
| { | |
| "epoch": 1.084507042253521, | |
| "grad_norm": 0.6094705127582944, | |
| "learning_rate": 3.9636842102680635e-05, | |
| "loss": 0.3233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1759353131055832, | |
| "step": 1040, | |
| "valid_targets_mean": 4510.4, | |
| "valid_targets_min": 2458 | |
| }, | |
| { | |
| "epoch": 1.0897235263432448, | |
| "grad_norm": 0.6154159746623598, | |
| "learning_rate": 3.962691047488754e-05, | |
| "loss": 0.3428, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22893056273460388, | |
| "step": 1045, | |
| "valid_targets_mean": 4449.2, | |
| "valid_targets_min": 1058 | |
| }, | |
| { | |
| "epoch": 1.094940010432968, | |
| "grad_norm": 0.7033685369065921, | |
| "learning_rate": 3.9616846146221606e-05, | |
| "loss": 0.348, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1754748523235321, | |
| "step": 1050, | |
| "valid_targets_mean": 3653.0, | |
| "valid_targets_min": 1830 | |
| }, | |
| { | |
| "epoch": 1.1001564945226918, | |
| "grad_norm": 0.5948368516282443, | |
| "learning_rate": 3.960664918472947e-05, | |
| "loss": 0.3455, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1763189136981964, | |
| "step": 1055, | |
| "valid_targets_mean": 3991.8, | |
| "valid_targets_min": 671 | |
| }, | |
| { | |
| "epoch": 1.1053729786124151, | |
| "grad_norm": 0.6229669186541902, | |
| "learning_rate": 3.959631965935453e-05, | |
| "loss": 0.3395, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1391282081604004, | |
| "step": 1060, | |
| "valid_targets_mean": 2786.0, | |
| "valid_targets_min": 1291 | |
| }, | |
| { | |
| "epoch": 1.1105894627021389, | |
| "grad_norm": 0.5864075857416087, | |
| "learning_rate": 3.958585763993646e-05, | |
| "loss": 0.3228, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12115520238876343, | |
| "step": 1065, | |
| "valid_targets_mean": 2281.9, | |
| "valid_targets_min": 1133 | |
| }, | |
| { | |
| "epoch": 1.1158059467918622, | |
| "grad_norm": 0.6413925441508026, | |
| "learning_rate": 3.9575263197210736e-05, | |
| "loss": 0.3268, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18672391772270203, | |
| "step": 1070, | |
| "valid_targets_mean": 4559.8, | |
| "valid_targets_min": 2259 | |
| }, | |
| { | |
| "epoch": 1.1210224308815857, | |
| "grad_norm": 0.5544188595252468, | |
| "learning_rate": 3.9564536402808194e-05, | |
| "loss": 0.3078, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17214182019233704, | |
| "step": 1075, | |
| "valid_targets_mean": 4101.8, | |
| "valid_targets_min": 2223 | |
| }, | |
| { | |
| "epoch": 1.1262389149713092, | |
| "grad_norm": 0.5874963646035859, | |
| "learning_rate": 3.955367732925451e-05, | |
| "loss": 0.3415, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14104487001895905, | |
| "step": 1080, | |
| "valid_targets_mean": 4128.5, | |
| "valid_targets_min": 1957 | |
| }, | |
| { | |
| "epoch": 1.131455399061033, | |
| "grad_norm": 0.5928993387257582, | |
| "learning_rate": 3.954268604996972e-05, | |
| "loss": 0.3367, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23198959231376648, | |
| "step": 1085, | |
| "valid_targets_mean": 4823.2, | |
| "valid_targets_min": 3425 | |
| }, | |
| { | |
| "epoch": 1.1366718831507563, | |
| "grad_norm": 0.5936265624865743, | |
| "learning_rate": 3.953156263926773e-05, | |
| "loss": 0.3167, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1535320281982422, | |
| "step": 1090, | |
| "valid_targets_mean": 3564.6, | |
| "valid_targets_min": 2353 | |
| }, | |
| { | |
| "epoch": 1.14188836724048, | |
| "grad_norm": 0.6869345587603003, | |
| "learning_rate": 3.952030717235581e-05, | |
| "loss": 0.3271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15886715054512024, | |
| "step": 1095, | |
| "valid_targets_mean": 3194.1, | |
| "valid_targets_min": 1009 | |
| }, | |
| { | |
| "epoch": 1.1471048513302033, | |
| "grad_norm": 0.6344387401593671, | |
| "learning_rate": 3.950891972533408e-05, | |
| "loss": 0.3159, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18219539523124695, | |
| "step": 1100, | |
| "valid_targets_mean": 3638.1, | |
| "valid_targets_min": 1717 | |
| }, | |
| { | |
| "epoch": 1.1523213354199269, | |
| "grad_norm": 0.586578554627154, | |
| "learning_rate": 3.9497400375195e-05, | |
| "loss": 0.327, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14436474442481995, | |
| "step": 1105, | |
| "valid_targets_mean": 3436.4, | |
| "valid_targets_min": 1847 | |
| }, | |
| { | |
| "epoch": 1.1575378195096504, | |
| "grad_norm": 0.6649594487774892, | |
| "learning_rate": 3.948574919982286e-05, | |
| "loss": 0.3315, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18231745064258575, | |
| "step": 1110, | |
| "valid_targets_mean": 3374.8, | |
| "valid_targets_min": 1190 | |
| }, | |
| { | |
| "epoch": 1.162754303599374, | |
| "grad_norm": 0.6851041431488697, | |
| "learning_rate": 3.947396627799322e-05, | |
| "loss": 0.3178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18925002217292786, | |
| "step": 1115, | |
| "valid_targets_mean": 3286.4, | |
| "valid_targets_min": 571 | |
| }, | |
| { | |
| "epoch": 1.1679707876890975, | |
| "grad_norm": 0.6134568002242069, | |
| "learning_rate": 3.946205168937243e-05, | |
| "loss": 0.3262, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16592437028884888, | |
| "step": 1120, | |
| "valid_targets_mean": 3616.6, | |
| "valid_targets_min": 2175 | |
| }, | |
| { | |
| "epoch": 1.173187271778821, | |
| "grad_norm": 0.6070267903835361, | |
| "learning_rate": 3.945000551451703e-05, | |
| "loss": 0.3179, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.167659193277359, | |
| "step": 1125, | |
| "valid_targets_mean": 4052.4, | |
| "valid_targets_min": 3176 | |
| }, | |
| { | |
| "epoch": 1.1784037558685445, | |
| "grad_norm": 0.6330205969711841, | |
| "learning_rate": 3.9437827834873265e-05, | |
| "loss": 0.3195, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15645045042037964, | |
| "step": 1130, | |
| "valid_targets_mean": 3447.2, | |
| "valid_targets_min": 513 | |
| }, | |
| { | |
| "epoch": 1.183620239958268, | |
| "grad_norm": 0.6204503869163316, | |
| "learning_rate": 3.942551873277649e-05, | |
| "loss": 0.3237, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18151849508285522, | |
| "step": 1135, | |
| "valid_targets_mean": 4041.1, | |
| "valid_targets_min": 2558 | |
| }, | |
| { | |
| "epoch": 1.1888367240479916, | |
| "grad_norm": 0.6354317358087163, | |
| "learning_rate": 3.941307829145063e-05, | |
| "loss": 0.3336, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15205983817577362, | |
| "step": 1140, | |
| "valid_targets_mean": 3804.2, | |
| "valid_targets_min": 2295 | |
| }, | |
| { | |
| "epoch": 1.194053208137715, | |
| "grad_norm": 0.6025462323032041, | |
| "learning_rate": 3.9400506595007624e-05, | |
| "loss": 0.3198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14049166440963745, | |
| "step": 1145, | |
| "valid_targets_mean": 3320.9, | |
| "valid_targets_min": 1860 | |
| }, | |
| { | |
| "epoch": 1.1992696922274386, | |
| "grad_norm": 0.6398549687933319, | |
| "learning_rate": 3.938780372844685e-05, | |
| "loss": 0.3196, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20037490129470825, | |
| "step": 1150, | |
| "valid_targets_mean": 5827.2, | |
| "valid_targets_min": 2495 | |
| }, | |
| { | |
| "epoch": 1.2044861763171621, | |
| "grad_norm": 0.570622453965453, | |
| "learning_rate": 3.9374969777654535e-05, | |
| "loss": 0.3123, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1401764154434204, | |
| "step": 1155, | |
| "valid_targets_mean": 3450.2, | |
| "valid_targets_min": 1292 | |
| }, | |
| { | |
| "epoch": 1.2097026604068857, | |
| "grad_norm": 0.733687803044232, | |
| "learning_rate": 3.93620048294032e-05, | |
| "loss": 0.3377, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16313032805919647, | |
| "step": 1160, | |
| "valid_targets_mean": 3142.2, | |
| "valid_targets_min": 1265 | |
| }, | |
| { | |
| "epoch": 1.2149191444966092, | |
| "grad_norm": 0.560796966341423, | |
| "learning_rate": 3.9348908971351065e-05, | |
| "loss": 0.3157, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16817793250083923, | |
| "step": 1165, | |
| "valid_targets_mean": 4196.9, | |
| "valid_targets_min": 1757 | |
| }, | |
| { | |
| "epoch": 1.2201356285863327, | |
| "grad_norm": 0.5912305372810629, | |
| "learning_rate": 3.933568229204145e-05, | |
| "loss": 0.3227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16732439398765564, | |
| "step": 1170, | |
| "valid_targets_mean": 4234.8, | |
| "valid_targets_min": 959 | |
| }, | |
| { | |
| "epoch": 1.2253521126760563, | |
| "grad_norm": 0.6397956440517107, | |
| "learning_rate": 3.9322324880902177e-05, | |
| "loss": 0.3166, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13434633612632751, | |
| "step": 1175, | |
| "valid_targets_mean": 3103.6, | |
| "valid_targets_min": 1097 | |
| }, | |
| { | |
| "epoch": 1.2305685967657798, | |
| "grad_norm": 0.5841121801763032, | |
| "learning_rate": 3.930883682824496e-05, | |
| "loss": 0.3105, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16110514104366302, | |
| "step": 1180, | |
| "valid_targets_mean": 3483.9, | |
| "valid_targets_min": 1833 | |
| }, | |
| { | |
| "epoch": 1.2357850808555033, | |
| "grad_norm": 0.6517696368902286, | |
| "learning_rate": 3.929521822526483e-05, | |
| "loss": 0.3155, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17525508999824524, | |
| "step": 1185, | |
| "valid_targets_mean": 3176.0, | |
| "valid_targets_min": 1200 | |
| }, | |
| { | |
| "epoch": 1.2410015649452268, | |
| "grad_norm": 0.5804702856187468, | |
| "learning_rate": 3.928146916403946e-05, | |
| "loss": 0.3165, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14085638523101807, | |
| "step": 1190, | |
| "valid_targets_mean": 4101.2, | |
| "valid_targets_min": 2135 | |
| }, | |
| { | |
| "epoch": 1.2462180490349504, | |
| "grad_norm": 0.7583587502296718, | |
| "learning_rate": 3.926758973752859e-05, | |
| "loss": 0.3132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17744779586791992, | |
| "step": 1195, | |
| "valid_targets_mean": 3469.1, | |
| "valid_targets_min": 2518 | |
| }, | |
| { | |
| "epoch": 1.251434533124674, | |
| "grad_norm": 0.5845399060121019, | |
| "learning_rate": 3.925358003957338e-05, | |
| "loss": 0.3353, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14477834105491638, | |
| "step": 1200, | |
| "valid_targets_mean": 3631.1, | |
| "valid_targets_min": 2182 | |
| }, | |
| { | |
| "epoch": 1.2566510172143974, | |
| "grad_norm": 0.6065030920065625, | |
| "learning_rate": 3.923944016489578e-05, | |
| "loss": 0.3161, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1663917750120163, | |
| "step": 1205, | |
| "valid_targets_mean": 3587.1, | |
| "valid_targets_min": 1361 | |
| }, | |
| { | |
| "epoch": 1.261867501304121, | |
| "grad_norm": 0.5701395822551645, | |
| "learning_rate": 3.9225170209097865e-05, | |
| "loss": 0.3191, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14398005604743958, | |
| "step": 1210, | |
| "valid_targets_mean": 3687.1, | |
| "valid_targets_min": 1685 | |
| }, | |
| { | |
| "epoch": 1.2670839853938445, | |
| "grad_norm": 0.6855611566760392, | |
| "learning_rate": 3.921077026866125e-05, | |
| "loss": 0.3198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1499272882938385, | |
| "step": 1215, | |
| "valid_targets_mean": 2796.0, | |
| "valid_targets_min": 474 | |
| }, | |
| { | |
| "epoch": 1.272300469483568, | |
| "grad_norm": 0.6356454623808746, | |
| "learning_rate": 3.919624044094636e-05, | |
| "loss": 0.3195, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15461692214012146, | |
| "step": 1220, | |
| "valid_targets_mean": 3220.1, | |
| "valid_targets_min": 2200 | |
| }, | |
| { | |
| "epoch": 1.2775169535732915, | |
| "grad_norm": 0.5555325401302529, | |
| "learning_rate": 3.918158082419184e-05, | |
| "loss": 0.3029, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1733340173959732, | |
| "step": 1225, | |
| "valid_targets_mean": 4378.4, | |
| "valid_targets_min": 2241 | |
| }, | |
| { | |
| "epoch": 1.282733437663015, | |
| "grad_norm": 0.5799804925757468, | |
| "learning_rate": 3.916679151751383e-05, | |
| "loss": 0.3049, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18741272389888763, | |
| "step": 1230, | |
| "valid_targets_mean": 4750.9, | |
| "valid_targets_min": 2623 | |
| }, | |
| { | |
| "epoch": 1.2879499217527386, | |
| "grad_norm": 0.5241116773515914, | |
| "learning_rate": 3.9151872620905356e-05, | |
| "loss": 0.2921, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13187400996685028, | |
| "step": 1235, | |
| "valid_targets_mean": 4176.5, | |
| "valid_targets_min": 1387 | |
| }, | |
| { | |
| "epoch": 1.2931664058424621, | |
| "grad_norm": 0.5919650170259568, | |
| "learning_rate": 3.913682423523561e-05, | |
| "loss": 0.331, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16752025485038757, | |
| "step": 1240, | |
| "valid_targets_mean": 3480.9, | |
| "valid_targets_min": 1220 | |
| }, | |
| { | |
| "epoch": 1.2983828899321856, | |
| "grad_norm": 0.6002772380216232, | |
| "learning_rate": 3.912164646224927e-05, | |
| "loss": 0.3084, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1587284505367279, | |
| "step": 1245, | |
| "valid_targets_mean": 4020.6, | |
| "valid_targets_min": 1988 | |
| }, | |
| { | |
| "epoch": 1.3035993740219092, | |
| "grad_norm": 0.5962199336975232, | |
| "learning_rate": 3.910633940456585e-05, | |
| "loss": 0.3151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.158216193318367, | |
| "step": 1250, | |
| "valid_targets_mean": 4179.1, | |
| "valid_targets_min": 2913 | |
| }, | |
| { | |
| "epoch": 1.3088158581116327, | |
| "grad_norm": 0.7398403344872735, | |
| "learning_rate": 3.909090316567896e-05, | |
| "loss": 0.3155, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1846354752779007, | |
| "step": 1255, | |
| "valid_targets_mean": 2762.0, | |
| "valid_targets_min": 1608 | |
| }, | |
| { | |
| "epoch": 1.3140323422013562, | |
| "grad_norm": 0.683250642718712, | |
| "learning_rate": 3.9075337849955614e-05, | |
| "loss": 0.3258, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15074422955513, | |
| "step": 1260, | |
| "valid_targets_mean": 3711.9, | |
| "valid_targets_min": 1091 | |
| }, | |
| { | |
| "epoch": 1.3192488262910798, | |
| "grad_norm": 0.6301675314140326, | |
| "learning_rate": 3.905964356263558e-05, | |
| "loss": 0.3136, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16197679936885834, | |
| "step": 1265, | |
| "valid_targets_mean": 3489.2, | |
| "valid_targets_min": 1344 | |
| }, | |
| { | |
| "epoch": 1.3244653103808033, | |
| "grad_norm": 0.7209487425067365, | |
| "learning_rate": 3.9043820409830585e-05, | |
| "loss": 0.3065, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16962775588035583, | |
| "step": 1270, | |
| "valid_targets_mean": 3501.9, | |
| "valid_targets_min": 2528 | |
| }, | |
| { | |
| "epoch": 1.3296817944705268, | |
| "grad_norm": 0.5979297912196475, | |
| "learning_rate": 3.9027868498523645e-05, | |
| "loss": 0.3017, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12032784521579742, | |
| "step": 1275, | |
| "valid_targets_mean": 2761.8, | |
| "valid_targets_min": 854 | |
| }, | |
| { | |
| "epoch": 1.3348982785602503, | |
| "grad_norm": 0.6868505230637457, | |
| "learning_rate": 3.901178793656836e-05, | |
| "loss": 0.3082, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1563318967819214, | |
| "step": 1280, | |
| "valid_targets_mean": 3061.0, | |
| "valid_targets_min": 1369 | |
| }, | |
| { | |
| "epoch": 1.3401147626499739, | |
| "grad_norm": 0.6394674431184237, | |
| "learning_rate": 3.899557883268811e-05, | |
| "loss": 0.295, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14201496541500092, | |
| "step": 1285, | |
| "valid_targets_mean": 3701.0, | |
| "valid_targets_min": 1703 | |
| }, | |
| { | |
| "epoch": 1.3453312467396974, | |
| "grad_norm": 0.5748072380366843, | |
| "learning_rate": 3.897924129647542e-05, | |
| "loss": 0.2981, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15046973526477814, | |
| "step": 1290, | |
| "valid_targets_mean": 3942.5, | |
| "valid_targets_min": 1524 | |
| }, | |
| { | |
| "epoch": 1.350547730829421, | |
| "grad_norm": 0.6387971120173105, | |
| "learning_rate": 3.896277543839114e-05, | |
| "loss": 0.2935, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16212090849876404, | |
| "step": 1295, | |
| "valid_targets_mean": 3333.9, | |
| "valid_targets_min": 2054 | |
| }, | |
| { | |
| "epoch": 1.3557642149191445, | |
| "grad_norm": 0.5921519478346233, | |
| "learning_rate": 3.894618136976372e-05, | |
| "loss": 0.3044, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16046512126922607, | |
| "step": 1300, | |
| "valid_targets_mean": 3478.1, | |
| "valid_targets_min": 914 | |
| }, | |
| { | |
| "epoch": 1.360980699008868, | |
| "grad_norm": 0.611214928458059, | |
| "learning_rate": 3.89294592027885e-05, | |
| "loss": 0.3345, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18627116084098816, | |
| "step": 1305, | |
| "valid_targets_mean": 4058.5, | |
| "valid_targets_min": 2093 | |
| }, | |
| { | |
| "epoch": 1.3661971830985915, | |
| "grad_norm": 0.6519709053240794, | |
| "learning_rate": 3.891260905052685e-05, | |
| "loss": 0.2996, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17116042971611023, | |
| "step": 1310, | |
| "valid_targets_mean": 3423.1, | |
| "valid_targets_min": 1621 | |
| }, | |
| { | |
| "epoch": 1.371413667188315, | |
| "grad_norm": 0.5586579674425736, | |
| "learning_rate": 3.8895631026905546e-05, | |
| "loss": 0.3129, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13891048729419708, | |
| "step": 1315, | |
| "valid_targets_mean": 4211.0, | |
| "valid_targets_min": 2947 | |
| }, | |
| { | |
| "epoch": 1.3766301512780386, | |
| "grad_norm": 0.5780546610258079, | |
| "learning_rate": 3.887852524671587e-05, | |
| "loss": 0.2991, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1390090137720108, | |
| "step": 1320, | |
| "valid_targets_mean": 4113.5, | |
| "valid_targets_min": 1789 | |
| }, | |
| { | |
| "epoch": 1.381846635367762, | |
| "grad_norm": 0.5896078555056574, | |
| "learning_rate": 3.886129182561291e-05, | |
| "loss": 0.3085, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17480570077896118, | |
| "step": 1325, | |
| "valid_targets_mean": 4228.5, | |
| "valid_targets_min": 1586 | |
| }, | |
| { | |
| "epoch": 1.3870631194574856, | |
| "grad_norm": 0.6099379201099256, | |
| "learning_rate": 3.8843930880114745e-05, | |
| "loss": 0.2989, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16223594546318054, | |
| "step": 1330, | |
| "valid_targets_mean": 3750.8, | |
| "valid_targets_min": 2542 | |
| }, | |
| { | |
| "epoch": 1.3922796035472091, | |
| "grad_norm": 0.5403410745008206, | |
| "learning_rate": 3.882644252760168e-05, | |
| "loss": 0.2922, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1450849026441574, | |
| "step": 1335, | |
| "valid_targets_mean": 4209.8, | |
| "valid_targets_min": 2513 | |
| }, | |
| { | |
| "epoch": 1.3974960876369327, | |
| "grad_norm": 0.5322096871735392, | |
| "learning_rate": 3.8808826886315426e-05, | |
| "loss": 0.308, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12635044753551483, | |
| "step": 1340, | |
| "valid_targets_mean": 4118.8, | |
| "valid_targets_min": 2156 | |
| }, | |
| { | |
| "epoch": 1.4027125717266562, | |
| "grad_norm": 0.6370268176167063, | |
| "learning_rate": 3.8791084075358344e-05, | |
| "loss": 0.3, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12790429592132568, | |
| "step": 1345, | |
| "valid_targets_mean": 3040.5, | |
| "valid_targets_min": 1423 | |
| }, | |
| { | |
| "epoch": 1.4079290558163797, | |
| "grad_norm": 0.7152849011234534, | |
| "learning_rate": 3.8773214214692564e-05, | |
| "loss": 0.3069, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17192471027374268, | |
| "step": 1350, | |
| "valid_targets_mean": 3219.5, | |
| "valid_targets_min": 2126 | |
| }, | |
| { | |
| "epoch": 1.4131455399061033, | |
| "grad_norm": 0.585658952372805, | |
| "learning_rate": 3.8755217425139264e-05, | |
| "loss": 0.3163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16190892457962036, | |
| "step": 1355, | |
| "valid_targets_mean": 3743.2, | |
| "valid_targets_min": 1834 | |
| }, | |
| { | |
| "epoch": 1.4183620239958268, | |
| "grad_norm": 0.5660076809460085, | |
| "learning_rate": 3.8737093828377785e-05, | |
| "loss": 0.3084, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16197505593299866, | |
| "step": 1360, | |
| "valid_targets_mean": 4402.9, | |
| "valid_targets_min": 1571 | |
| }, | |
| { | |
| "epoch": 1.4235785080855503, | |
| "grad_norm": 0.5610578456411199, | |
| "learning_rate": 3.8718843546944844e-05, | |
| "loss": 0.3264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14896772801876068, | |
| "step": 1365, | |
| "valid_targets_mean": 3430.1, | |
| "valid_targets_min": 1055 | |
| }, | |
| { | |
| "epoch": 1.4287949921752738, | |
| "grad_norm": 0.5925402013623761, | |
| "learning_rate": 3.870046670423369e-05, | |
| "loss": 0.3131, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1571653187274933, | |
| "step": 1370, | |
| "valid_targets_mean": 4407.6, | |
| "valid_targets_min": 1241 | |
| }, | |
| { | |
| "epoch": 1.4340114762649974, | |
| "grad_norm": 0.5607841666091471, | |
| "learning_rate": 3.868196342449327e-05, | |
| "loss": 0.3159, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17004147171974182, | |
| "step": 1375, | |
| "valid_targets_mean": 4384.6, | |
| "valid_targets_min": 2492 | |
| }, | |
| { | |
| "epoch": 1.439227960354721, | |
| "grad_norm": 0.5981394789508077, | |
| "learning_rate": 3.8663333832827415e-05, | |
| "loss": 0.3158, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1658860146999359, | |
| "step": 1380, | |
| "valid_targets_mean": 4149.1, | |
| "valid_targets_min": 2664 | |
| }, | |
| { | |
| "epoch": 1.4444444444444444, | |
| "grad_norm": 0.6026191767443296, | |
| "learning_rate": 3.864457805519395e-05, | |
| "loss": 0.3169, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.136149600148201, | |
| "step": 1385, | |
| "valid_targets_mean": 3506.6, | |
| "valid_targets_min": 1868 | |
| }, | |
| { | |
| "epoch": 1.449660928534168, | |
| "grad_norm": 0.6362502035872915, | |
| "learning_rate": 3.862569621840385e-05, | |
| "loss": 0.3043, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19045168161392212, | |
| "step": 1390, | |
| "valid_targets_mean": 4142.5, | |
| "valid_targets_min": 2348 | |
| }, | |
| { | |
| "epoch": 1.4548774126238915, | |
| "grad_norm": 0.5876997474432417, | |
| "learning_rate": 3.860668845012044e-05, | |
| "loss": 0.3122, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19057908654212952, | |
| "step": 1395, | |
| "valid_targets_mean": 4413.2, | |
| "valid_targets_min": 1878 | |
| }, | |
| { | |
| "epoch": 1.460093896713615, | |
| "grad_norm": 1.5418578601236932, | |
| "learning_rate": 3.8587554878858466e-05, | |
| "loss": 0.3163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13348877429962158, | |
| "step": 1400, | |
| "valid_targets_mean": 3051.0, | |
| "valid_targets_min": 484 | |
| }, | |
| { | |
| "epoch": 1.4653103808033385, | |
| "grad_norm": 0.5628545409416917, | |
| "learning_rate": 3.8568295633983244e-05, | |
| "loss": 0.3028, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14873261749744415, | |
| "step": 1405, | |
| "valid_targets_mean": 4212.6, | |
| "valid_targets_min": 2305 | |
| }, | |
| { | |
| "epoch": 1.470526864893062, | |
| "grad_norm": 0.6202358483981598, | |
| "learning_rate": 3.85489108457098e-05, | |
| "loss": 0.3005, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13567596673965454, | |
| "step": 1410, | |
| "valid_targets_mean": 3063.2, | |
| "valid_targets_min": 953 | |
| }, | |
| { | |
| "epoch": 1.4757433489827856, | |
| "grad_norm": 0.5813043102348048, | |
| "learning_rate": 3.8529400645101984e-05, | |
| "loss": 0.2916, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14660485088825226, | |
| "step": 1415, | |
| "valid_targets_mean": 4380.8, | |
| "valid_targets_min": 1659 | |
| }, | |
| { | |
| "epoch": 1.4809598330725091, | |
| "grad_norm": 0.595605860850473, | |
| "learning_rate": 3.850976516407157e-05, | |
| "loss": 0.3029, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11824309825897217, | |
| "step": 1420, | |
| "valid_targets_mean": 3453.9, | |
| "valid_targets_min": 1258 | |
| }, | |
| { | |
| "epoch": 1.4861763171622326, | |
| "grad_norm": 0.5511349167240491, | |
| "learning_rate": 3.8490004535377356e-05, | |
| "loss": 0.2898, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.134815976023674, | |
| "step": 1425, | |
| "valid_targets_mean": 4094.1, | |
| "valid_targets_min": 2032 | |
| }, | |
| { | |
| "epoch": 1.4913928012519562, | |
| "grad_norm": 0.5606097930932913, | |
| "learning_rate": 3.8470118892624345e-05, | |
| "loss": 0.3118, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13073913753032684, | |
| "step": 1430, | |
| "valid_targets_mean": 3808.8, | |
| "valid_targets_min": 1464 | |
| }, | |
| { | |
| "epoch": 1.4966092853416797, | |
| "grad_norm": 0.6335350150165068, | |
| "learning_rate": 3.8450108370262714e-05, | |
| "loss": 0.3025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1560460478067398, | |
| "step": 1435, | |
| "valid_targets_mean": 3471.8, | |
| "valid_targets_min": 1826 | |
| }, | |
| { | |
| "epoch": 1.5018257694314032, | |
| "grad_norm": 0.5593670597801185, | |
| "learning_rate": 3.8429973103587016e-05, | |
| "loss": 0.3011, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13078095018863678, | |
| "step": 1440, | |
| "valid_targets_mean": 3982.6, | |
| "valid_targets_min": 2448 | |
| }, | |
| { | |
| "epoch": 1.5070422535211268, | |
| "grad_norm": 0.6762515476108246, | |
| "learning_rate": 3.840971322873519e-05, | |
| "loss": 0.3005, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17632314562797546, | |
| "step": 1445, | |
| "valid_targets_mean": 3853.2, | |
| "valid_targets_min": 2194 | |
| }, | |
| { | |
| "epoch": 1.5122587376108503, | |
| "grad_norm": 0.5639202236995347, | |
| "learning_rate": 3.838932888268771e-05, | |
| "loss": 0.3081, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15243171155452728, | |
| "step": 1450, | |
| "valid_targets_mean": 4452.6, | |
| "valid_targets_min": 1931 | |
| }, | |
| { | |
| "epoch": 1.5174752217005738, | |
| "grad_norm": 0.5092280006318584, | |
| "learning_rate": 3.836882020326658e-05, | |
| "loss": 0.2962, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14420181512832642, | |
| "step": 1455, | |
| "valid_targets_mean": 5409.0, | |
| "valid_targets_min": 2251 | |
| }, | |
| { | |
| "epoch": 1.5226917057902973, | |
| "grad_norm": 0.5326342407810176, | |
| "learning_rate": 3.834818732913448e-05, | |
| "loss": 0.2851, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1596958041191101, | |
| "step": 1460, | |
| "valid_targets_mean": 5085.1, | |
| "valid_targets_min": 3257 | |
| }, | |
| { | |
| "epoch": 1.5279081898800209, | |
| "grad_norm": 0.6178023248148726, | |
| "learning_rate": 3.8327430399793754e-05, | |
| "loss": 0.2865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13015152513980865, | |
| "step": 1465, | |
| "valid_targets_mean": 3407.0, | |
| "valid_targets_min": 1873 | |
| }, | |
| { | |
| "epoch": 1.5331246739697444, | |
| "grad_norm": 0.5739656944118157, | |
| "learning_rate": 3.8306549555585536e-05, | |
| "loss": 0.287, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13849002122879028, | |
| "step": 1470, | |
| "valid_targets_mean": 3681.6, | |
| "valid_targets_min": 1614 | |
| }, | |
| { | |
| "epoch": 1.538341158059468, | |
| "grad_norm": 0.5401366858857004, | |
| "learning_rate": 3.828554493768876e-05, | |
| "loss": 0.2874, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16584202647209167, | |
| "step": 1475, | |
| "valid_targets_mean": 5232.4, | |
| "valid_targets_min": 2532 | |
| }, | |
| { | |
| "epoch": 1.5435576421491914, | |
| "grad_norm": 0.6052437719171743, | |
| "learning_rate": 3.826441668811921e-05, | |
| "loss": 0.287, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14197075366973877, | |
| "step": 1480, | |
| "valid_targets_mean": 3577.1, | |
| "valid_targets_min": 2054 | |
| }, | |
| { | |
| "epoch": 1.548774126238915, | |
| "grad_norm": 0.6460584522406361, | |
| "learning_rate": 3.8243164949728565e-05, | |
| "loss": 0.2919, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1488100290298462, | |
| "step": 1485, | |
| "valid_targets_mean": 3497.2, | |
| "valid_targets_min": 1333 | |
| }, | |
| { | |
| "epoch": 1.5539906103286385, | |
| "grad_norm": 0.6164857042201527, | |
| "learning_rate": 3.8221789866203434e-05, | |
| "loss": 0.2875, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1157347708940506, | |
| "step": 1490, | |
| "valid_targets_mean": 2877.4, | |
| "valid_targets_min": 1515 | |
| }, | |
| { | |
| "epoch": 1.559207094418362, | |
| "grad_norm": 1.0244435204665334, | |
| "learning_rate": 3.820029158206438e-05, | |
| "loss": 0.3025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1597973108291626, | |
| "step": 1495, | |
| "valid_targets_mean": 2994.8, | |
| "valid_targets_min": 1128 | |
| }, | |
| { | |
| "epoch": 1.5644235785080856, | |
| "grad_norm": 0.6611187118475268, | |
| "learning_rate": 3.817867024266497e-05, | |
| "loss": 0.3064, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12763801217079163, | |
| "step": 1500, | |
| "valid_targets_mean": 2844.6, | |
| "valid_targets_min": 1123 | |
| }, | |
| { | |
| "epoch": 1.569640062597809, | |
| "grad_norm": 0.6750848813759216, | |
| "learning_rate": 3.8156925994190735e-05, | |
| "loss": 0.294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17121607065200806, | |
| "step": 1505, | |
| "valid_targets_mean": 3376.0, | |
| "valid_targets_min": 1975 | |
| }, | |
| { | |
| "epoch": 1.5748565466875326, | |
| "grad_norm": 0.5791177713129599, | |
| "learning_rate": 3.813505898365822e-05, | |
| "loss": 0.2849, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14418327808380127, | |
| "step": 1510, | |
| "valid_targets_mean": 4036.9, | |
| "valid_targets_min": 1071 | |
| }, | |
| { | |
| "epoch": 1.5800730307772561, | |
| "grad_norm": 0.6440689774080632, | |
| "learning_rate": 3.8113069358914024e-05, | |
| "loss": 0.2949, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1597634255886078, | |
| "step": 1515, | |
| "valid_targets_mean": 3313.1, | |
| "valid_targets_min": 1882 | |
| }, | |
| { | |
| "epoch": 1.5852895148669797, | |
| "grad_norm": 0.6426823727491069, | |
| "learning_rate": 3.80909572686337e-05, | |
| "loss": 0.282, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1470492035150528, | |
| "step": 1520, | |
| "valid_targets_mean": 3570.2, | |
| "valid_targets_min": 1605 | |
| }, | |
| { | |
| "epoch": 1.5905059989567032, | |
| "grad_norm": 0.7392017553648957, | |
| "learning_rate": 3.806872286232086e-05, | |
| "loss": 0.3052, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12523463368415833, | |
| "step": 1525, | |
| "valid_targets_mean": 2624.1, | |
| "valid_targets_min": 815 | |
| }, | |
| { | |
| "epoch": 1.5957224830464267, | |
| "grad_norm": 0.6155330018742518, | |
| "learning_rate": 3.80463662903061e-05, | |
| "loss": 0.2991, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1514369547367096, | |
| "step": 1530, | |
| "valid_targets_mean": 3692.8, | |
| "valid_targets_min": 1878 | |
| }, | |
| { | |
| "epoch": 1.6009389671361502, | |
| "grad_norm": 0.5645293102390446, | |
| "learning_rate": 3.802388770374598e-05, | |
| "loss": 0.2918, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14762212336063385, | |
| "step": 1535, | |
| "valid_targets_mean": 4223.1, | |
| "valid_targets_min": 929 | |
| }, | |
| { | |
| "epoch": 1.6061554512258738, | |
| "grad_norm": 0.9112007356749344, | |
| "learning_rate": 3.8001287254622064e-05, | |
| "loss": 0.2989, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16939708590507507, | |
| "step": 1540, | |
| "valid_targets_mean": 4445.0, | |
| "valid_targets_min": 2695 | |
| }, | |
| { | |
| "epoch": 1.6113719353155973, | |
| "grad_norm": 0.537376318768028, | |
| "learning_rate": 3.797856509573981e-05, | |
| "loss": 0.2874, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14612159132957458, | |
| "step": 1545, | |
| "valid_targets_mean": 7016.8, | |
| "valid_targets_min": 5239 | |
| }, | |
| { | |
| "epoch": 1.6165884194053208, | |
| "grad_norm": 0.5853536318197711, | |
| "learning_rate": 3.795572138072759e-05, | |
| "loss": 0.2099, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09021882712841034, | |
| "step": 1550, | |
| "valid_targets_mean": 5891.2, | |
| "valid_targets_min": 3108 | |
| }, | |
| { | |
| "epoch": 1.6218049034950444, | |
| "grad_norm": 0.43101082771168603, | |
| "learning_rate": 3.793275626403564e-05, | |
| "loss": 0.2034, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09195680171251297, | |
| "step": 1555, | |
| "valid_targets_mean": 6355.6, | |
| "valid_targets_min": 4660 | |
| }, | |
| { | |
| "epoch": 1.6270213875847679, | |
| "grad_norm": 0.4274161876320681, | |
| "learning_rate": 3.790966990093503e-05, | |
| "loss": 0.1988, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09221642464399338, | |
| "step": 1560, | |
| "valid_targets_mean": 5890.5, | |
| "valid_targets_min": 3138 | |
| }, | |
| { | |
| "epoch": 1.6322378716744914, | |
| "grad_norm": 0.4344050904927902, | |
| "learning_rate": 3.788646244751654e-05, | |
| "loss": 0.1935, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11164269596338272, | |
| "step": 1565, | |
| "valid_targets_mean": 7187.2, | |
| "valid_targets_min": 4893 | |
| }, | |
| { | |
| "epoch": 1.637454355764215, | |
| "grad_norm": 0.47704150904274567, | |
| "learning_rate": 3.7863134060689734e-05, | |
| "loss": 0.214, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.103435218334198, | |
| "step": 1570, | |
| "valid_targets_mean": 5556.0, | |
| "valid_targets_min": 4370 | |
| }, | |
| { | |
| "epoch": 1.6426708398539385, | |
| "grad_norm": 0.4592595337139086, | |
| "learning_rate": 3.783968489818179e-05, | |
| "loss": 0.2081, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11303484439849854, | |
| "step": 1575, | |
| "valid_targets_mean": 5570.5, | |
| "valid_targets_min": 4576 | |
| }, | |
| { | |
| "epoch": 1.647887323943662, | |
| "grad_norm": 0.44340025538266453, | |
| "learning_rate": 3.781611511853646e-05, | |
| "loss": 0.1949, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09767629206180573, | |
| "step": 1580, | |
| "valid_targets_mean": 5741.8, | |
| "valid_targets_min": 4542 | |
| }, | |
| { | |
| "epoch": 1.6531038080333855, | |
| "grad_norm": 0.787196829233227, | |
| "learning_rate": 3.779242488111304e-05, | |
| "loss": 0.2354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13173669576644897, | |
| "step": 1585, | |
| "valid_targets_mean": 1503.9, | |
| "valid_targets_min": 976 | |
| }, | |
| { | |
| "epoch": 1.658320292123109, | |
| "grad_norm": 0.42800775593161355, | |
| "learning_rate": 3.776861434608524e-05, | |
| "loss": 0.1963, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11318528652191162, | |
| "step": 1590, | |
| "valid_targets_mean": 6904.1, | |
| "valid_targets_min": 5520 | |
| }, | |
| { | |
| "epoch": 1.6635367762128326, | |
| "grad_norm": 0.4554658711790615, | |
| "learning_rate": 3.774468367444012e-05, | |
| "loss": 0.1964, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10117735713720322, | |
| "step": 1595, | |
| "valid_targets_mean": 6056.1, | |
| "valid_targets_min": 4291 | |
| }, | |
| { | |
| "epoch": 1.668753260302556, | |
| "grad_norm": 0.4100742439352916, | |
| "learning_rate": 3.7720633027977034e-05, | |
| "loss": 0.2103, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09915327280759811, | |
| "step": 1600, | |
| "valid_targets_mean": 6861.9, | |
| "valid_targets_min": 4993 | |
| }, | |
| { | |
| "epoch": 1.6739697443922796, | |
| "grad_norm": 0.4578963977470057, | |
| "learning_rate": 3.7696462569306467e-05, | |
| "loss": 0.2025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11283793300390244, | |
| "step": 1605, | |
| "valid_targets_mean": 5696.2, | |
| "valid_targets_min": 4268 | |
| }, | |
| { | |
| "epoch": 1.6791862284820032, | |
| "grad_norm": 0.4117765877664186, | |
| "learning_rate": 3.7672172461849e-05, | |
| "loss": 0.1881, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09532284736633301, | |
| "step": 1610, | |
| "valid_targets_mean": 6006.2, | |
| "valid_targets_min": 4627 | |
| }, | |
| { | |
| "epoch": 1.6844027125717267, | |
| "grad_norm": 0.4225915524224307, | |
| "learning_rate": 3.7647762869834164e-05, | |
| "loss": 0.188, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11200286448001862, | |
| "step": 1615, | |
| "valid_targets_mean": 5225.6, | |
| "valid_targets_min": 4617 | |
| }, | |
| { | |
| "epoch": 1.6896191966614502, | |
| "grad_norm": 0.4032096937585039, | |
| "learning_rate": 3.7623233958299364e-05, | |
| "loss": 0.1788, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10491953790187836, | |
| "step": 1620, | |
| "valid_targets_mean": 7617.8, | |
| "valid_targets_min": 5121 | |
| }, | |
| { | |
| "epoch": 1.6948356807511737, | |
| "grad_norm": 0.4080608113146244, | |
| "learning_rate": 3.7598585893088726e-05, | |
| "loss": 0.1993, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07689738273620605, | |
| "step": 1625, | |
| "valid_targets_mean": 5419.1, | |
| "valid_targets_min": 3873 | |
| }, | |
| { | |
| "epoch": 1.7000521648408973, | |
| "grad_norm": 0.45682469304233975, | |
| "learning_rate": 3.7573818840852004e-05, | |
| "loss": 0.2012, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08160264790058136, | |
| "step": 1630, | |
| "valid_targets_mean": 5971.9, | |
| "valid_targets_min": 3898 | |
| }, | |
| { | |
| "epoch": 1.7052686489306208, | |
| "grad_norm": 0.3895065724674227, | |
| "learning_rate": 3.754893296904344e-05, | |
| "loss": 0.1766, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07516547292470932, | |
| "step": 1635, | |
| "valid_targets_mean": 5725.6, | |
| "valid_targets_min": 4480 | |
| }, | |
| { | |
| "epoch": 1.7104851330203443, | |
| "grad_norm": 0.38201282216729315, | |
| "learning_rate": 3.752392844592064e-05, | |
| "loss": 0.1809, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09296509623527527, | |
| "step": 1640, | |
| "valid_targets_mean": 7431.5, | |
| "valid_targets_min": 6180 | |
| }, | |
| { | |
| "epoch": 1.7157016171100679, | |
| "grad_norm": 0.45903234597137954, | |
| "learning_rate": 3.7498805440543436e-05, | |
| "loss": 0.2052, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1376444548368454, | |
| "step": 1645, | |
| "valid_targets_mean": 6257.8, | |
| "valid_targets_min": 5397 | |
| }, | |
| { | |
| "epoch": 1.7209181011997914, | |
| "grad_norm": 0.42704414927617246, | |
| "learning_rate": 3.747356412277272e-05, | |
| "loss": 0.2096, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09394072741270065, | |
| "step": 1650, | |
| "valid_targets_mean": 5946.0, | |
| "valid_targets_min": 4549 | |
| }, | |
| { | |
| "epoch": 1.726134585289515, | |
| "grad_norm": 0.4330351660273763, | |
| "learning_rate": 3.744820466326933e-05, | |
| "loss": 0.1866, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11312469840049744, | |
| "step": 1655, | |
| "valid_targets_mean": 6416.8, | |
| "valid_targets_min": 4840 | |
| }, | |
| { | |
| "epoch": 1.7313510693792384, | |
| "grad_norm": 0.4292964634343375, | |
| "learning_rate": 3.7422727233492876e-05, | |
| "loss": 0.1933, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11249202489852905, | |
| "step": 1660, | |
| "valid_targets_mean": 6426.0, | |
| "valid_targets_min": 5225 | |
| }, | |
| { | |
| "epoch": 1.736567553468962, | |
| "grad_norm": 0.4648762130385942, | |
| "learning_rate": 3.739713200570058e-05, | |
| "loss": 0.2099, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10980630666017532, | |
| "step": 1665, | |
| "valid_targets_mean": 5544.6, | |
| "valid_targets_min": 3296 | |
| }, | |
| { | |
| "epoch": 1.7417840375586855, | |
| "grad_norm": 0.40571926096497307, | |
| "learning_rate": 3.737141915294612e-05, | |
| "loss": 0.227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10378562659025192, | |
| "step": 1670, | |
| "valid_targets_mean": 6941.2, | |
| "valid_targets_min": 5464 | |
| }, | |
| { | |
| "epoch": 1.747000521648409, | |
| "grad_norm": 0.45891695292466544, | |
| "learning_rate": 3.734558884907847e-05, | |
| "loss": 0.2197, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09927622973918915, | |
| "step": 1675, | |
| "valid_targets_mean": 6252.2, | |
| "valid_targets_min": 4740 | |
| }, | |
| { | |
| "epoch": 1.7522170057381325, | |
| "grad_norm": 0.4293727904249753, | |
| "learning_rate": 3.7319641268740684e-05, | |
| "loss": 0.1978, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09710176289081573, | |
| "step": 1680, | |
| "valid_targets_mean": 6288.8, | |
| "valid_targets_min": 4913 | |
| }, | |
| { | |
| "epoch": 1.757433489827856, | |
| "grad_norm": 0.4455747067262708, | |
| "learning_rate": 3.729357658736877e-05, | |
| "loss": 0.1991, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09660390019416809, | |
| "step": 1685, | |
| "valid_targets_mean": 6687.2, | |
| "valid_targets_min": 4751 | |
| }, | |
| { | |
| "epoch": 1.7626499739175796, | |
| "grad_norm": 0.42151393404623727, | |
| "learning_rate": 3.7267394981190456e-05, | |
| "loss": 0.2015, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1136154755949974, | |
| "step": 1690, | |
| "valid_targets_mean": 6800.4, | |
| "valid_targets_min": 4900 | |
| }, | |
| { | |
| "epoch": 1.7678664580073031, | |
| "grad_norm": 0.46012702482558265, | |
| "learning_rate": 3.724109662722402e-05, | |
| "loss": 0.2025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10592508316040039, | |
| "step": 1695, | |
| "valid_targets_mean": 6907.8, | |
| "valid_targets_min": 4850 | |
| }, | |
| { | |
| "epoch": 1.7730829420970267, | |
| "grad_norm": 0.4340741164890611, | |
| "learning_rate": 3.72146817032771e-05, | |
| "loss": 0.1992, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0979345291852951, | |
| "step": 1700, | |
| "valid_targets_mean": 5823.6, | |
| "valid_targets_min": 2910 | |
| }, | |
| { | |
| "epoch": 1.77829942618675, | |
| "grad_norm": 0.4382382578256167, | |
| "learning_rate": 3.718815038794549e-05, | |
| "loss": 0.2059, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09111063182353973, | |
| "step": 1705, | |
| "valid_targets_mean": 6160.4, | |
| "valid_targets_min": 4196 | |
| }, | |
| { | |
| "epoch": 1.7835159102764737, | |
| "grad_norm": 0.4926022562906489, | |
| "learning_rate": 3.71615028606119e-05, | |
| "loss": 0.1999, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09664182364940643, | |
| "step": 1710, | |
| "valid_targets_mean": 5796.0, | |
| "valid_targets_min": 3865 | |
| }, | |
| { | |
| "epoch": 1.788732394366197, | |
| "grad_norm": 0.3713097913094652, | |
| "learning_rate": 3.713473930144479e-05, | |
| "loss": 0.1861, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08732146769762039, | |
| "step": 1715, | |
| "valid_targets_mean": 7570.9, | |
| "valid_targets_min": 3627 | |
| }, | |
| { | |
| "epoch": 1.7939488784559208, | |
| "grad_norm": 0.38603668036720135, | |
| "learning_rate": 3.710785989139713e-05, | |
| "loss": 0.1732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08235106617212296, | |
| "step": 1720, | |
| "valid_targets_mean": 6602.2, | |
| "valid_targets_min": 3533 | |
| }, | |
| { | |
| "epoch": 1.799165362545644, | |
| "grad_norm": 0.4467926880314484, | |
| "learning_rate": 3.7080864812205176e-05, | |
| "loss": 0.2059, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10759907960891724, | |
| "step": 1725, | |
| "valid_targets_mean": 6297.9, | |
| "valid_targets_min": 3502 | |
| }, | |
| { | |
| "epoch": 1.8043818466353678, | |
| "grad_norm": 0.4743030917232681, | |
| "learning_rate": 3.705375424638723e-05, | |
| "loss": 0.2044, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1024462878704071, | |
| "step": 1730, | |
| "valid_targets_mean": 5275.9, | |
| "valid_targets_min": 3249 | |
| }, | |
| { | |
| "epoch": 1.8095983307250911, | |
| "grad_norm": 0.41136883243929073, | |
| "learning_rate": 3.702652837724244e-05, | |
| "loss": 0.1953, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08283460140228271, | |
| "step": 1735, | |
| "valid_targets_mean": 5957.2, | |
| "valid_targets_min": 4283 | |
| }, | |
| { | |
| "epoch": 1.8148148148148149, | |
| "grad_norm": 0.4220117236083122, | |
| "learning_rate": 3.6999187388849517e-05, | |
| "loss": 0.1947, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10291680693626404, | |
| "step": 1740, | |
| "valid_targets_mean": 6440.6, | |
| "valid_targets_min": 4537 | |
| }, | |
| { | |
| "epoch": 1.8200312989045382, | |
| "grad_norm": 0.4105362820011357, | |
| "learning_rate": 3.697173146606553e-05, | |
| "loss": 0.1814, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09742873907089233, | |
| "step": 1745, | |
| "valid_targets_mean": 6846.6, | |
| "valid_targets_min": 4858 | |
| }, | |
| { | |
| "epoch": 1.825247782994262, | |
| "grad_norm": 0.44417451216695897, | |
| "learning_rate": 3.694416079452463e-05, | |
| "loss": 0.1971, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08390513062477112, | |
| "step": 1750, | |
| "valid_targets_mean": 5634.8, | |
| "valid_targets_min": 4570 | |
| }, | |
| { | |
| "epoch": 1.8304642670839852, | |
| "grad_norm": 0.39954698961172996, | |
| "learning_rate": 3.6916475560636806e-05, | |
| "loss": 0.2013, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09311801940202713, | |
| "step": 1755, | |
| "valid_targets_mean": 5889.1, | |
| "valid_targets_min": 3082 | |
| }, | |
| { | |
| "epoch": 1.835680751173709, | |
| "grad_norm": 0.40206414657510214, | |
| "learning_rate": 3.688867595158663e-05, | |
| "loss": 0.1877, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09258824586868286, | |
| "step": 1760, | |
| "valid_targets_mean": 6502.8, | |
| "valid_targets_min": 5362 | |
| }, | |
| { | |
| "epoch": 1.8408972352634323, | |
| "grad_norm": 0.4218561046914957, | |
| "learning_rate": 3.686076215533198e-05, | |
| "loss": 0.2065, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10750134289264679, | |
| "step": 1765, | |
| "valid_targets_mean": 6521.1, | |
| "valid_targets_min": 4056 | |
| }, | |
| { | |
| "epoch": 1.846113719353156, | |
| "grad_norm": 0.38851700029486014, | |
| "learning_rate": 3.683273436060275e-05, | |
| "loss": 0.1863, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08495507389307022, | |
| "step": 1770, | |
| "valid_targets_mean": 5720.8, | |
| "valid_targets_min": 4499 | |
| }, | |
| { | |
| "epoch": 1.8513302034428794, | |
| "grad_norm": 0.3670412030182634, | |
| "learning_rate": 3.680459275689964e-05, | |
| "loss": 0.1678, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07839922606945038, | |
| "step": 1775, | |
| "valid_targets_mean": 6249.8, | |
| "valid_targets_min": 5268 | |
| }, | |
| { | |
| "epoch": 1.856546687532603, | |
| "grad_norm": 0.39427706609538643, | |
| "learning_rate": 3.677633753449278e-05, | |
| "loss": 0.1726, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09388808161020279, | |
| "step": 1780, | |
| "valid_targets_mean": 6830.9, | |
| "valid_targets_min": 5851 | |
| }, | |
| { | |
| "epoch": 1.8617631716223264, | |
| "grad_norm": 0.39552277161910715, | |
| "learning_rate": 3.674796888442056e-05, | |
| "loss": 0.1767, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08311046659946442, | |
| "step": 1785, | |
| "valid_targets_mean": 5873.2, | |
| "valid_targets_min": 5106 | |
| }, | |
| { | |
| "epoch": 1.8669796557120502, | |
| "grad_norm": 0.4144484018126494, | |
| "learning_rate": 3.671948699848822e-05, | |
| "loss": 0.1935, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10961011797189713, | |
| "step": 1790, | |
| "valid_targets_mean": 6956.4, | |
| "valid_targets_min": 5383 | |
| }, | |
| { | |
| "epoch": 1.8721961398017735, | |
| "grad_norm": 0.5384983977207991, | |
| "learning_rate": 3.6690892069266624e-05, | |
| "loss": 0.217, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08142735064029694, | |
| "step": 1795, | |
| "valid_targets_mean": 2994.9, | |
| "valid_targets_min": 1884 | |
| }, | |
| { | |
| "epoch": 1.8774126238914972, | |
| "grad_norm": 0.41650668017540887, | |
| "learning_rate": 3.666218429009094e-05, | |
| "loss": 0.2198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08723096549510956, | |
| "step": 1800, | |
| "valid_targets_mean": 5446.6, | |
| "valid_targets_min": 3598 | |
| }, | |
| { | |
| "epoch": 1.8826291079812205, | |
| "grad_norm": 0.8287817135252892, | |
| "learning_rate": 3.6633363855059336e-05, | |
| "loss": 0.1858, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08482405543327332, | |
| "step": 1805, | |
| "valid_targets_mean": 5429.4, | |
| "valid_targets_min": 3689 | |
| }, | |
| { | |
| "epoch": 1.8878455920709443, | |
| "grad_norm": 0.4099789460184055, | |
| "learning_rate": 3.6604430959031676e-05, | |
| "loss": 0.1983, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10547837615013123, | |
| "step": 1810, | |
| "valid_targets_mean": 7193.6, | |
| "valid_targets_min": 5487 | |
| }, | |
| { | |
| "epoch": 1.8930620761606676, | |
| "grad_norm": 0.42776269624761293, | |
| "learning_rate": 3.6575385797628166e-05, | |
| "loss": 0.1968, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10120788216590881, | |
| "step": 1815, | |
| "valid_targets_mean": 5895.0, | |
| "valid_targets_min": 4891 | |
| }, | |
| { | |
| "epoch": 1.8982785602503913, | |
| "grad_norm": 0.41145236453690903, | |
| "learning_rate": 3.654622856722808e-05, | |
| "loss": 0.2085, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09616370499134064, | |
| "step": 1820, | |
| "valid_targets_mean": 6422.0, | |
| "valid_targets_min": 4072 | |
| }, | |
| { | |
| "epoch": 1.9034950443401146, | |
| "grad_norm": 0.41101202035916035, | |
| "learning_rate": 3.651695946496839e-05, | |
| "loss": 0.1926, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10501502454280853, | |
| "step": 1825, | |
| "valid_targets_mean": 6154.8, | |
| "valid_targets_min": 3781 | |
| }, | |
| { | |
| "epoch": 1.9087115284298384, | |
| "grad_norm": 0.40214364915835493, | |
| "learning_rate": 3.6487578688742485e-05, | |
| "loss": 0.2025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10611806809902191, | |
| "step": 1830, | |
| "valid_targets_mean": 6498.2, | |
| "valid_targets_min": 4563 | |
| }, | |
| { | |
| "epoch": 1.9139280125195617, | |
| "grad_norm": 0.3822753113405647, | |
| "learning_rate": 3.6458086437198764e-05, | |
| "loss": 0.1924, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09240502119064331, | |
| "step": 1835, | |
| "valid_targets_mean": 6525.0, | |
| "valid_targets_min": 4200 | |
| }, | |
| { | |
| "epoch": 1.9191444966092854, | |
| "grad_norm": 0.3952127607055477, | |
| "learning_rate": 3.642848290973934e-05, | |
| "loss": 0.1774, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08918770402669907, | |
| "step": 1840, | |
| "valid_targets_mean": 5802.8, | |
| "valid_targets_min": 4458 | |
| }, | |
| { | |
| "epoch": 1.9243609806990087, | |
| "grad_norm": 0.43848042842587903, | |
| "learning_rate": 3.6398768306518706e-05, | |
| "loss": 0.1951, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11818303167819977, | |
| "step": 1845, | |
| "valid_targets_mean": 5912.8, | |
| "valid_targets_min": 4081 | |
| }, | |
| { | |
| "epoch": 1.9295774647887325, | |
| "grad_norm": 0.44832676027008517, | |
| "learning_rate": 3.636894282844233e-05, | |
| "loss": 0.2356, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11068180203437805, | |
| "step": 1850, | |
| "valid_targets_mean": 6106.0, | |
| "valid_targets_min": 4298 | |
| }, | |
| { | |
| "epoch": 1.9347939488784558, | |
| "grad_norm": 0.4040466583848472, | |
| "learning_rate": 3.6339006677165316e-05, | |
| "loss": 0.1824, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1127871721982956, | |
| "step": 1855, | |
| "valid_targets_mean": 6761.4, | |
| "valid_targets_min": 4844 | |
| }, | |
| { | |
| "epoch": 1.9400104329681795, | |
| "grad_norm": 0.42110428673038935, | |
| "learning_rate": 3.630896005509108e-05, | |
| "loss": 0.1972, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10931040346622467, | |
| "step": 1860, | |
| "valid_targets_mean": 6613.2, | |
| "valid_targets_min": 4212 | |
| }, | |
| { | |
| "epoch": 1.9452269170579028, | |
| "grad_norm": 0.44344264144694756, | |
| "learning_rate": 3.6278803165369926e-05, | |
| "loss": 0.2023, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09482496976852417, | |
| "step": 1865, | |
| "valid_targets_mean": 6438.2, | |
| "valid_targets_min": 3762 | |
| }, | |
| { | |
| "epoch": 1.9504434011476266, | |
| "grad_norm": 0.3930994142136601, | |
| "learning_rate": 3.6248536211897715e-05, | |
| "loss": 0.1752, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0831356793642044, | |
| "step": 1870, | |
| "valid_targets_mean": 6420.6, | |
| "valid_targets_min": 4726 | |
| }, | |
| { | |
| "epoch": 1.95565988523735, | |
| "grad_norm": 0.4444208706947674, | |
| "learning_rate": 3.621815939931444e-05, | |
| "loss": 0.1872, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11019587516784668, | |
| "step": 1875, | |
| "valid_targets_mean": 6689.5, | |
| "valid_targets_min": 3724 | |
| }, | |
| { | |
| "epoch": 1.9608763693270737, | |
| "grad_norm": 0.363115490342611, | |
| "learning_rate": 3.6187672933002904e-05, | |
| "loss": 0.1687, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10221219062805176, | |
| "step": 1880, | |
| "valid_targets_mean": 7317.8, | |
| "valid_targets_min": 5655 | |
| }, | |
| { | |
| "epoch": 1.966092853416797, | |
| "grad_norm": 0.3705333891375108, | |
| "learning_rate": 3.6157077019087254e-05, | |
| "loss": 0.1927, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09324866533279419, | |
| "step": 1885, | |
| "valid_targets_mean": 6667.6, | |
| "valid_targets_min": 5185 | |
| }, | |
| { | |
| "epoch": 1.9713093375065207, | |
| "grad_norm": 0.3815702013091896, | |
| "learning_rate": 3.612637186443169e-05, | |
| "loss": 0.1797, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07910025119781494, | |
| "step": 1890, | |
| "valid_targets_mean": 6522.6, | |
| "valid_targets_min": 4656 | |
| }, | |
| { | |
| "epoch": 1.976525821596244, | |
| "grad_norm": 0.42133047519207856, | |
| "learning_rate": 3.609555767663895e-05, | |
| "loss": 0.1871, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08193907141685486, | |
| "step": 1895, | |
| "valid_targets_mean": 5751.9, | |
| "valid_targets_min": 4920 | |
| }, | |
| { | |
| "epoch": 1.9817423056859678, | |
| "grad_norm": 0.340082011641411, | |
| "learning_rate": 3.6064634664048996e-05, | |
| "loss": 0.1866, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07420343160629272, | |
| "step": 1900, | |
| "valid_targets_mean": 7976.4, | |
| "valid_targets_min": 5856 | |
| }, | |
| { | |
| "epoch": 1.986958789775691, | |
| "grad_norm": 0.416688551007578, | |
| "learning_rate": 3.603360303573757e-05, | |
| "loss": 0.1827, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09343363344669342, | |
| "step": 1905, | |
| "valid_targets_mean": 6129.6, | |
| "valid_targets_min": 3939 | |
| }, | |
| { | |
| "epoch": 1.9921752738654148, | |
| "grad_norm": 0.4156816132870302, | |
| "learning_rate": 3.600246300151476e-05, | |
| "loss": 0.1982, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10199901461601257, | |
| "step": 1910, | |
| "valid_targets_mean": 6461.5, | |
| "valid_targets_min": 5219 | |
| }, | |
| { | |
| "epoch": 1.9973917579551381, | |
| "grad_norm": 0.47064533851948465, | |
| "learning_rate": 3.597121477192364e-05, | |
| "loss": 0.2058, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11399467289447784, | |
| "step": 1915, | |
| "valid_targets_mean": 5153.0, | |
| "valid_targets_min": 2301 | |
| }, | |
| { | |
| "epoch": 2.0020865936358896, | |
| "grad_norm": 0.5296542155533016, | |
| "learning_rate": 3.593985855823878e-05, | |
| "loss": 0.2499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15288572013378143, | |
| "step": 1920, | |
| "valid_targets_mean": 5843.4, | |
| "valid_targets_min": 3674 | |
| }, | |
| { | |
| "epoch": 2.007303077725613, | |
| "grad_norm": 0.5605090741860184, | |
| "learning_rate": 3.590839457246487e-05, | |
| "loss": 0.2897, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08586505055427551, | |
| "step": 1925, | |
| "valid_targets_mean": 2402.5, | |
| "valid_targets_min": 981 | |
| }, | |
| { | |
| "epoch": 2.0125195618153366, | |
| "grad_norm": 0.5423205673196174, | |
| "learning_rate": 3.587682302733527e-05, | |
| "loss": 0.2907, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14211821556091309, | |
| "step": 1930, | |
| "valid_targets_mean": 4376.8, | |
| "valid_targets_min": 1877 | |
| }, | |
| { | |
| "epoch": 2.01773604590506, | |
| "grad_norm": 0.5568328040482612, | |
| "learning_rate": 3.584514413631054e-05, | |
| "loss": 0.2863, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1583961695432663, | |
| "step": 1935, | |
| "valid_targets_mean": 5255.8, | |
| "valid_targets_min": 2973 | |
| }, | |
| { | |
| "epoch": 2.0229525299947837, | |
| "grad_norm": 0.5270072385685527, | |
| "learning_rate": 3.581335811357703e-05, | |
| "loss": 0.28, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15370258688926697, | |
| "step": 1940, | |
| "valid_targets_mean": 4958.8, | |
| "valid_targets_min": 980 | |
| }, | |
| { | |
| "epoch": 2.028169014084507, | |
| "grad_norm": 0.5552507606916607, | |
| "learning_rate": 3.578146517404546e-05, | |
| "loss": 0.3033, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14639443159103394, | |
| "step": 1945, | |
| "valid_targets_mean": 4641.2, | |
| "valid_targets_min": 1512 | |
| }, | |
| { | |
| "epoch": 2.0333854981742308, | |
| "grad_norm": 0.514073948857167, | |
| "learning_rate": 3.574946553334938e-05, | |
| "loss": 0.2736, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09214585274457932, | |
| "step": 1950, | |
| "valid_targets_mean": 3139.4, | |
| "valid_targets_min": 1848 | |
| }, | |
| { | |
| "epoch": 2.038601982263954, | |
| "grad_norm": 0.45230549215815197, | |
| "learning_rate": 3.571735940784381e-05, | |
| "loss": 0.2834, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1191449761390686, | |
| "step": 1955, | |
| "valid_targets_mean": 4646.2, | |
| "valid_targets_min": 1381 | |
| }, | |
| { | |
| "epoch": 2.043818466353678, | |
| "grad_norm": 0.5084960555593179, | |
| "learning_rate": 3.5685147014603705e-05, | |
| "loss": 0.2861, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14859934151172638, | |
| "step": 1960, | |
| "valid_targets_mean": 6490.9, | |
| "valid_targets_min": 3600 | |
| }, | |
| { | |
| "epoch": 2.049034950443401, | |
| "grad_norm": 0.5203320778694672, | |
| "learning_rate": 3.5652828571422536e-05, | |
| "loss": 0.2977, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13395559787750244, | |
| "step": 1965, | |
| "valid_targets_mean": 4469.8, | |
| "valid_targets_min": 2540 | |
| }, | |
| { | |
| "epoch": 2.054251434533125, | |
| "grad_norm": 0.5545847156166325, | |
| "learning_rate": 3.5620404296810766e-05, | |
| "loss": 0.2876, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1349387913942337, | |
| "step": 1970, | |
| "valid_targets_mean": 5648.5, | |
| "valid_targets_min": 1409 | |
| }, | |
| { | |
| "epoch": 2.059467918622848, | |
| "grad_norm": 0.5065930700781758, | |
| "learning_rate": 3.558787440999442e-05, | |
| "loss": 0.283, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13748890161514282, | |
| "step": 1975, | |
| "valid_targets_mean": 4748.8, | |
| "valid_targets_min": 1720 | |
| }, | |
| { | |
| "epoch": 2.064684402712572, | |
| "grad_norm": 0.5621344894216197, | |
| "learning_rate": 3.55552391309136e-05, | |
| "loss": 0.2919, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15613138675689697, | |
| "step": 1980, | |
| "valid_targets_mean": 4611.0, | |
| "valid_targets_min": 1982 | |
| }, | |
| { | |
| "epoch": 2.0699008868022952, | |
| "grad_norm": 0.5228824919407831, | |
| "learning_rate": 3.5522498680220954e-05, | |
| "loss": 0.2847, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12328225374221802, | |
| "step": 1985, | |
| "valid_targets_mean": 4092.4, | |
| "valid_targets_min": 1598 | |
| }, | |
| { | |
| "epoch": 2.075117370892019, | |
| "grad_norm": 0.5863375864396916, | |
| "learning_rate": 3.5489653279280225e-05, | |
| "loss": 0.2921, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14512383937835693, | |
| "step": 1990, | |
| "valid_targets_mean": 3817.2, | |
| "valid_targets_min": 1451 | |
| }, | |
| { | |
| "epoch": 2.0803338549817423, | |
| "grad_norm": 0.5368081801996578, | |
| "learning_rate": 3.545670315016475e-05, | |
| "loss": 0.29, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13162657618522644, | |
| "step": 1995, | |
| "valid_targets_mean": 3677.9, | |
| "valid_targets_min": 1861 | |
| }, | |
| { | |
| "epoch": 2.085550339071466, | |
| "grad_norm": 0.6254817693960798, | |
| "learning_rate": 3.5423648515655934e-05, | |
| "loss": 0.2864, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14520707726478577, | |
| "step": 2000, | |
| "valid_targets_mean": 3359.1, | |
| "valid_targets_min": 855 | |
| }, | |
| { | |
| "epoch": 2.0907668231611893, | |
| "grad_norm": 0.5951361046311903, | |
| "learning_rate": 3.539048959924178e-05, | |
| "loss": 0.2934, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16328895092010498, | |
| "step": 2005, | |
| "valid_targets_mean": 4017.1, | |
| "valid_targets_min": 2724 | |
| }, | |
| { | |
| "epoch": 2.095983307250913, | |
| "grad_norm": 0.6504117289566479, | |
| "learning_rate": 3.535722662511535e-05, | |
| "loss": 0.3042, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16033607721328735, | |
| "step": 2010, | |
| "valid_targets_mean": 3765.0, | |
| "valid_targets_min": 1362 | |
| }, | |
| { | |
| "epoch": 2.1011997913406364, | |
| "grad_norm": 0.570159196823098, | |
| "learning_rate": 3.532385981817326e-05, | |
| "loss": 0.292, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1155758947134018, | |
| "step": 2015, | |
| "valid_targets_mean": 3980.6, | |
| "valid_targets_min": 1477 | |
| }, | |
| { | |
| "epoch": 2.10641627543036, | |
| "grad_norm": 0.592722439875945, | |
| "learning_rate": 3.5290389404014136e-05, | |
| "loss": 0.2973, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1470147967338562, | |
| "step": 2020, | |
| "valid_targets_mean": 4153.5, | |
| "valid_targets_min": 1020 | |
| }, | |
| { | |
| "epoch": 2.1116327595200834, | |
| "grad_norm": 0.5992388704436196, | |
| "learning_rate": 3.5256815608937155e-05, | |
| "loss": 0.2823, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14256763458251953, | |
| "step": 2025, | |
| "valid_targets_mean": 3231.6, | |
| "valid_targets_min": 1915 | |
| }, | |
| { | |
| "epoch": 2.116849243609807, | |
| "grad_norm": 0.5775594893009676, | |
| "learning_rate": 3.522313865994043e-05, | |
| "loss": 0.2745, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1322719156742096, | |
| "step": 2030, | |
| "valid_targets_mean": 3469.8, | |
| "valid_targets_min": 865 | |
| }, | |
| { | |
| "epoch": 2.1220657276995305, | |
| "grad_norm": 0.6581978566347938, | |
| "learning_rate": 3.518935878471952e-05, | |
| "loss": 0.2767, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15938004851341248, | |
| "step": 2035, | |
| "valid_targets_mean": 3280.4, | |
| "valid_targets_min": 1693 | |
| }, | |
| { | |
| "epoch": 2.127282211789254, | |
| "grad_norm": 0.5700797946917613, | |
| "learning_rate": 3.515547621166591e-05, | |
| "loss": 0.2926, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12681075930595398, | |
| "step": 2040, | |
| "valid_targets_mean": 3160.6, | |
| "valid_targets_min": 2050 | |
| }, | |
| { | |
| "epoch": 2.1324986958789776, | |
| "grad_norm": 0.5480431924967637, | |
| "learning_rate": 3.5121491169865425e-05, | |
| "loss": 0.2905, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13935476541519165, | |
| "step": 2045, | |
| "valid_targets_mean": 3768.2, | |
| "valid_targets_min": 1739 | |
| }, | |
| { | |
| "epoch": 2.1377151799687013, | |
| "grad_norm": 0.5737281927689749, | |
| "learning_rate": 3.508740388909669e-05, | |
| "loss": 0.2725, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13549292087554932, | |
| "step": 2050, | |
| "valid_targets_mean": 4639.2, | |
| "valid_targets_min": 1795 | |
| }, | |
| { | |
| "epoch": 2.1429316640584246, | |
| "grad_norm": 0.5882600849703047, | |
| "learning_rate": 3.505321459982961e-05, | |
| "loss": 0.2825, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16310137510299683, | |
| "step": 2055, | |
| "valid_targets_mean": 4694.4, | |
| "valid_targets_min": 1934 | |
| }, | |
| { | |
| "epoch": 2.148148148148148, | |
| "grad_norm": 0.6344029388133648, | |
| "learning_rate": 3.501892353322376e-05, | |
| "loss": 0.2829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1576867401599884, | |
| "step": 2060, | |
| "valid_targets_mean": 3986.6, | |
| "valid_targets_min": 2557 | |
| }, | |
| { | |
| "epoch": 2.1533646322378717, | |
| "grad_norm": 0.642942092747691, | |
| "learning_rate": 3.498453092112687e-05, | |
| "loss": 0.2847, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18935127556324005, | |
| "step": 2065, | |
| "valid_targets_mean": 4261.6, | |
| "valid_targets_min": 2663 | |
| }, | |
| { | |
| "epoch": 2.1585811163275954, | |
| "grad_norm": 0.5918471315187003, | |
| "learning_rate": 3.495003699607322e-05, | |
| "loss": 0.2761, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13827744126319885, | |
| "step": 2070, | |
| "valid_targets_mean": 3949.8, | |
| "valid_targets_min": 2016 | |
| }, | |
| { | |
| "epoch": 2.1637976004173187, | |
| "grad_norm": 0.7824082820178958, | |
| "learning_rate": 3.49154419912821e-05, | |
| "loss": 0.28, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12572389841079712, | |
| "step": 2075, | |
| "valid_targets_mean": 2903.9, | |
| "valid_targets_min": 1367 | |
| }, | |
| { | |
| "epoch": 2.169014084507042, | |
| "grad_norm": 0.6065215803339213, | |
| "learning_rate": 3.48807461406562e-05, | |
| "loss": 0.2878, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1366393268108368, | |
| "step": 2080, | |
| "valid_targets_mean": 3871.6, | |
| "valid_targets_min": 2279 | |
| }, | |
| { | |
| "epoch": 2.174230568596766, | |
| "grad_norm": 0.6159300091568073, | |
| "learning_rate": 3.484594967878007e-05, | |
| "loss": 0.2704, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13009457290172577, | |
| "step": 2085, | |
| "valid_targets_mean": 3322.5, | |
| "valid_targets_min": 2419 | |
| }, | |
| { | |
| "epoch": 2.1794470526864895, | |
| "grad_norm": 0.6574102934800521, | |
| "learning_rate": 3.481105284091847e-05, | |
| "loss": 0.286, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12333754450082779, | |
| "step": 2090, | |
| "valid_targets_mean": 2314.0, | |
| "valid_targets_min": 1283 | |
| }, | |
| { | |
| "epoch": 2.184663536776213, | |
| "grad_norm": 0.7014751804078182, | |
| "learning_rate": 3.4776055863014864e-05, | |
| "loss": 0.2851, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17281542718410492, | |
| "step": 2095, | |
| "valid_targets_mean": 3027.4, | |
| "valid_targets_min": 1636 | |
| }, | |
| { | |
| "epoch": 2.189880020865936, | |
| "grad_norm": 0.602013541802384, | |
| "learning_rate": 3.474095898168975e-05, | |
| "loss": 0.2811, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.161719411611557, | |
| "step": 2100, | |
| "valid_targets_mean": 4359.0, | |
| "valid_targets_min": 1629 | |
| }, | |
| { | |
| "epoch": 2.19509650495566, | |
| "grad_norm": 0.5970601246461388, | |
| "learning_rate": 3.470576243423911e-05, | |
| "loss": 0.2786, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13313499093055725, | |
| "step": 2105, | |
| "valid_targets_mean": 3364.5, | |
| "valid_targets_min": 2080 | |
| }, | |
| { | |
| "epoch": 2.2003129890453836, | |
| "grad_norm": 0.61051494978652, | |
| "learning_rate": 3.467046645863276e-05, | |
| "loss": 0.2806, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12296639382839203, | |
| "step": 2110, | |
| "valid_targets_mean": 3020.6, | |
| "valid_targets_min": 1145 | |
| }, | |
| { | |
| "epoch": 2.205529473135107, | |
| "grad_norm": 0.5838453907419531, | |
| "learning_rate": 3.463507129351279e-05, | |
| "loss": 0.272, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15435731410980225, | |
| "step": 2115, | |
| "valid_targets_mean": 4282.6, | |
| "valid_targets_min": 1858 | |
| }, | |
| { | |
| "epoch": 2.2107459572248302, | |
| "grad_norm": 0.5620362772995038, | |
| "learning_rate": 3.459957717819191e-05, | |
| "loss": 0.29, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12687022984027863, | |
| "step": 2120, | |
| "valid_targets_mean": 3524.8, | |
| "valid_targets_min": 1639 | |
| }, | |
| { | |
| "epoch": 2.215962441314554, | |
| "grad_norm": 0.5934963204819119, | |
| "learning_rate": 3.4563984352651874e-05, | |
| "loss": 0.2797, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1262321025133133, | |
| "step": 2125, | |
| "valid_targets_mean": 3797.8, | |
| "valid_targets_min": 1394 | |
| }, | |
| { | |
| "epoch": 2.2211789254042777, | |
| "grad_norm": 0.5972401991412557, | |
| "learning_rate": 3.45282930575418e-05, | |
| "loss": 0.279, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1583259105682373, | |
| "step": 2130, | |
| "valid_targets_mean": 4671.1, | |
| "valid_targets_min": 1844 | |
| }, | |
| { | |
| "epoch": 2.226395409494001, | |
| "grad_norm": 0.6034758033565939, | |
| "learning_rate": 3.449250353417661e-05, | |
| "loss": 0.2756, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11870335042476654, | |
| "step": 2135, | |
| "valid_targets_mean": 3266.2, | |
| "valid_targets_min": 1305 | |
| }, | |
| { | |
| "epoch": 2.2316118935837244, | |
| "grad_norm": 0.5883252683322544, | |
| "learning_rate": 3.445661602453533e-05, | |
| "loss": 0.2759, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12820032238960266, | |
| "step": 2140, | |
| "valid_targets_mean": 3436.2, | |
| "valid_targets_min": 1168 | |
| }, | |
| { | |
| "epoch": 2.236828377673448, | |
| "grad_norm": 0.6201487750058823, | |
| "learning_rate": 3.44206307712595e-05, | |
| "loss": 0.2781, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15465371310710907, | |
| "step": 2145, | |
| "valid_targets_mean": 3810.1, | |
| "valid_targets_min": 1412 | |
| }, | |
| { | |
| "epoch": 2.2420448617631714, | |
| "grad_norm": 0.5146605285730406, | |
| "learning_rate": 3.4384548017651544e-05, | |
| "loss": 0.2654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10008621215820312, | |
| "step": 2150, | |
| "valid_targets_mean": 3273.1, | |
| "valid_targets_min": 1607 | |
| }, | |
| { | |
| "epoch": 2.247261345852895, | |
| "grad_norm": 0.6163932161851912, | |
| "learning_rate": 3.4348368007673065e-05, | |
| "loss": 0.2864, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1330852210521698, | |
| "step": 2155, | |
| "valid_targets_mean": 3412.0, | |
| "valid_targets_min": 1507 | |
| }, | |
| { | |
| "epoch": 2.2524778299426185, | |
| "grad_norm": 0.5703664343943993, | |
| "learning_rate": 3.4312090985943266e-05, | |
| "loss": 0.2852, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12091439962387085, | |
| "step": 2160, | |
| "valid_targets_mean": 3766.6, | |
| "valid_targets_min": 1317 | |
| }, | |
| { | |
| "epoch": 2.257694314032342, | |
| "grad_norm": 0.5266030677617946, | |
| "learning_rate": 3.4275717197737234e-05, | |
| "loss": 0.2776, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13142439723014832, | |
| "step": 2165, | |
| "valid_targets_mean": 4007.0, | |
| "valid_targets_min": 3218 | |
| }, | |
| { | |
| "epoch": 2.262910798122066, | |
| "grad_norm": 0.5665429867955046, | |
| "learning_rate": 3.423924688898433e-05, | |
| "loss": 0.2832, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17378059029579163, | |
| "step": 2170, | |
| "valid_targets_mean": 4801.1, | |
| "valid_targets_min": 2999 | |
| }, | |
| { | |
| "epoch": 2.2681272822117893, | |
| "grad_norm": 0.5990844487418092, | |
| "learning_rate": 3.420268030626651e-05, | |
| "loss": 0.2786, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14651526510715485, | |
| "step": 2175, | |
| "valid_targets_mean": 3517.0, | |
| "valid_targets_min": 1739 | |
| }, | |
| { | |
| "epoch": 2.2733437663015126, | |
| "grad_norm": 0.5699902051403354, | |
| "learning_rate": 3.416601769681663e-05, | |
| "loss": 0.2747, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14224934577941895, | |
| "step": 2180, | |
| "valid_targets_mean": 4233.4, | |
| "valid_targets_min": 1140 | |
| }, | |
| { | |
| "epoch": 2.2785602503912363, | |
| "grad_norm": 0.5700287677358951, | |
| "learning_rate": 3.412925930851683e-05, | |
| "loss": 0.2638, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13151955604553223, | |
| "step": 2185, | |
| "valid_targets_mean": 3580.4, | |
| "valid_targets_min": 2272 | |
| }, | |
| { | |
| "epoch": 2.28377673448096, | |
| "grad_norm": 0.6275656348699551, | |
| "learning_rate": 3.40924053898968e-05, | |
| "loss": 0.2722, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1569211333990097, | |
| "step": 2190, | |
| "valid_targets_mean": 3325.6, | |
| "valid_targets_min": 1183 | |
| }, | |
| { | |
| "epoch": 2.2889932185706834, | |
| "grad_norm": 0.5986781997947559, | |
| "learning_rate": 3.405545619013214e-05, | |
| "loss": 0.2561, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18150749802589417, | |
| "step": 2195, | |
| "valid_targets_mean": 4843.9, | |
| "valid_targets_min": 3041 | |
| }, | |
| { | |
| "epoch": 2.2942097026604067, | |
| "grad_norm": 0.5928267090004791, | |
| "learning_rate": 3.401841195904267e-05, | |
| "loss": 0.2848, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14741823077201843, | |
| "step": 2200, | |
| "valid_targets_mean": 3706.8, | |
| "valid_targets_min": 1726 | |
| }, | |
| { | |
| "epoch": 2.2994261867501304, | |
| "grad_norm": 0.4742838618877209, | |
| "learning_rate": 3.398127294709072e-05, | |
| "loss": 0.2711, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1398695707321167, | |
| "step": 2205, | |
| "valid_targets_mean": 5485.0, | |
| "valid_targets_min": 1743 | |
| }, | |
| { | |
| "epoch": 2.3046426708398537, | |
| "grad_norm": 0.5503009026697306, | |
| "learning_rate": 3.3944039405379444e-05, | |
| "loss": 0.2779, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15395748615264893, | |
| "step": 2210, | |
| "valid_targets_mean": 4971.1, | |
| "valid_targets_min": 2258 | |
| }, | |
| { | |
| "epoch": 2.3098591549295775, | |
| "grad_norm": 0.6366637148244674, | |
| "learning_rate": 3.390671158565115e-05, | |
| "loss": 0.2832, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12728239595890045, | |
| "step": 2215, | |
| "valid_targets_mean": 3890.1, | |
| "valid_targets_min": 1971 | |
| }, | |
| { | |
| "epoch": 2.315075639019301, | |
| "grad_norm": 0.5879383190689246, | |
| "learning_rate": 3.386928974028555e-05, | |
| "loss": 0.2861, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15988336503505707, | |
| "step": 2220, | |
| "valid_targets_mean": 4203.0, | |
| "valid_targets_min": 1651 | |
| }, | |
| { | |
| "epoch": 2.3202921231090246, | |
| "grad_norm": 0.6291797041959577, | |
| "learning_rate": 3.3831774122298106e-05, | |
| "loss": 0.2737, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1280360370874405, | |
| "step": 2225, | |
| "valid_targets_mean": 2842.0, | |
| "valid_targets_min": 1374 | |
| }, | |
| { | |
| "epoch": 2.325508607198748, | |
| "grad_norm": 0.6108545246898451, | |
| "learning_rate": 3.3794164985338266e-05, | |
| "loss": 0.2681, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13130611181259155, | |
| "step": 2230, | |
| "valid_targets_mean": 3439.2, | |
| "valid_targets_min": 1625 | |
| }, | |
| { | |
| "epoch": 2.3307250912884716, | |
| "grad_norm": 0.5727219805451983, | |
| "learning_rate": 3.37564625836878e-05, | |
| "loss": 0.261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12657243013381958, | |
| "step": 2235, | |
| "valid_targets_mean": 4143.6, | |
| "valid_targets_min": 997 | |
| }, | |
| { | |
| "epoch": 2.335941575378195, | |
| "grad_norm": 0.6219791732922988, | |
| "learning_rate": 3.3718667172259026e-05, | |
| "loss": 0.2679, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12359379231929779, | |
| "step": 2240, | |
| "valid_targets_mean": 3539.4, | |
| "valid_targets_min": 1179 | |
| }, | |
| { | |
| "epoch": 2.3411580594679187, | |
| "grad_norm": 0.6171600277133966, | |
| "learning_rate": 3.368077900659315e-05, | |
| "loss": 0.2651, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1577104926109314, | |
| "step": 2245, | |
| "valid_targets_mean": 3322.1, | |
| "valid_targets_min": 1632 | |
| }, | |
| { | |
| "epoch": 2.346374543557642, | |
| "grad_norm": 0.5653157219860382, | |
| "learning_rate": 3.364279834285848e-05, | |
| "loss": 0.2525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12218950688838959, | |
| "step": 2250, | |
| "valid_targets_mean": 3812.1, | |
| "valid_targets_min": 1792 | |
| }, | |
| { | |
| "epoch": 2.3515910276473657, | |
| "grad_norm": 0.6099988175797327, | |
| "learning_rate": 3.360472543784875e-05, | |
| "loss": 0.2591, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15166360139846802, | |
| "step": 2255, | |
| "valid_targets_mean": 4096.8, | |
| "valid_targets_min": 2148 | |
| }, | |
| { | |
| "epoch": 2.356807511737089, | |
| "grad_norm": 0.5862896231325757, | |
| "learning_rate": 3.356656054898132e-05, | |
| "loss": 0.2704, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15353241562843323, | |
| "step": 2260, | |
| "valid_targets_mean": 3878.5, | |
| "valid_targets_min": 1818 | |
| }, | |
| { | |
| "epoch": 2.3620239958268128, | |
| "grad_norm": 0.5314025671686089, | |
| "learning_rate": 3.352830393429547e-05, | |
| "loss": 0.2871, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12921863794326782, | |
| "step": 2265, | |
| "valid_targets_mean": 4388.8, | |
| "valid_targets_min": 1987 | |
| }, | |
| { | |
| "epoch": 2.367240479916536, | |
| "grad_norm": 0.694238702118014, | |
| "learning_rate": 3.3489955852450675e-05, | |
| "loss": 0.2685, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14981994032859802, | |
| "step": 2270, | |
| "valid_targets_mean": 3343.8, | |
| "valid_targets_min": 2481 | |
| }, | |
| { | |
| "epoch": 2.37245696400626, | |
| "grad_norm": 0.6365866969539306, | |
| "learning_rate": 3.3451516562724834e-05, | |
| "loss": 0.2715, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1292153000831604, | |
| "step": 2275, | |
| "valid_targets_mean": 3440.0, | |
| "valid_targets_min": 2013 | |
| }, | |
| { | |
| "epoch": 2.377673448095983, | |
| "grad_norm": 0.5565115806386123, | |
| "learning_rate": 3.341298632501249e-05, | |
| "loss": 0.2626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13988377153873444, | |
| "step": 2280, | |
| "valid_targets_mean": 4068.9, | |
| "valid_targets_min": 1860 | |
| }, | |
| { | |
| "epoch": 2.382889932185707, | |
| "grad_norm": 0.5576050416431582, | |
| "learning_rate": 3.3374365399823134e-05, | |
| "loss": 0.2729, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13859406113624573, | |
| "step": 2285, | |
| "valid_targets_mean": 4278.2, | |
| "valid_targets_min": 1914 | |
| }, | |
| { | |
| "epoch": 2.38810641627543, | |
| "grad_norm": 0.6448682852539553, | |
| "learning_rate": 3.3335654048279395e-05, | |
| "loss": 0.263, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14563745260238647, | |
| "step": 2290, | |
| "valid_targets_mean": 3908.8, | |
| "valid_targets_min": 3065 | |
| }, | |
| { | |
| "epoch": 2.393322900365154, | |
| "grad_norm": 0.564751501591018, | |
| "learning_rate": 3.329685253211528e-05, | |
| "loss": 0.2508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1404622495174408, | |
| "step": 2295, | |
| "valid_targets_mean": 3868.6, | |
| "valid_targets_min": 1109 | |
| }, | |
| { | |
| "epoch": 2.3985393844548772, | |
| "grad_norm": 0.6149190659906749, | |
| "learning_rate": 3.325796111367444e-05, | |
| "loss": 0.2725, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14148470759391785, | |
| "step": 2300, | |
| "valid_targets_mean": 3548.1, | |
| "valid_targets_min": 2359 | |
| }, | |
| { | |
| "epoch": 2.403755868544601, | |
| "grad_norm": 0.6419027929043597, | |
| "learning_rate": 3.321898005590835e-05, | |
| "loss": 0.2639, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13625851273536682, | |
| "step": 2305, | |
| "valid_targets_mean": 3140.6, | |
| "valid_targets_min": 1243 | |
| }, | |
| { | |
| "epoch": 2.4089723526343243, | |
| "grad_norm": 0.6668150730767709, | |
| "learning_rate": 3.317990962237454e-05, | |
| "loss": 0.2721, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12895992398262024, | |
| "step": 2310, | |
| "valid_targets_mean": 3165.6, | |
| "valid_targets_min": 1426 | |
| }, | |
| { | |
| "epoch": 2.414188836724048, | |
| "grad_norm": 0.681543456226893, | |
| "learning_rate": 3.314075007723487e-05, | |
| "loss": 0.2745, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12114736437797546, | |
| "step": 2315, | |
| "valid_targets_mean": 3397.1, | |
| "valid_targets_min": 1661 | |
| }, | |
| { | |
| "epoch": 2.4194053208137714, | |
| "grad_norm": 0.5598824225027148, | |
| "learning_rate": 3.3101501685253657e-05, | |
| "loss": 0.2703, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14699427783489227, | |
| "step": 2320, | |
| "valid_targets_mean": 4858.0, | |
| "valid_targets_min": 1529 | |
| }, | |
| { | |
| "epoch": 2.424621804903495, | |
| "grad_norm": 0.6725496219818118, | |
| "learning_rate": 3.306216471179594e-05, | |
| "loss": 0.2912, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16363482177257538, | |
| "step": 2325, | |
| "valid_targets_mean": 3518.6, | |
| "valid_targets_min": 1450 | |
| }, | |
| { | |
| "epoch": 2.4298382889932184, | |
| "grad_norm": 0.5961908168336248, | |
| "learning_rate": 3.3022739422825686e-05, | |
| "loss": 0.2681, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.135379821062088, | |
| "step": 2330, | |
| "valid_targets_mean": 3491.0, | |
| "valid_targets_min": 1625 | |
| }, | |
| { | |
| "epoch": 2.435054773082942, | |
| "grad_norm": 0.5676978037295951, | |
| "learning_rate": 3.2983226084903944e-05, | |
| "loss": 0.2785, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14006897807121277, | |
| "step": 2335, | |
| "valid_targets_mean": 4356.9, | |
| "valid_targets_min": 1174 | |
| }, | |
| { | |
| "epoch": 2.4402712571726655, | |
| "grad_norm": 0.8114836753803415, | |
| "learning_rate": 3.294362496518711e-05, | |
| "loss": 0.2759, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14607083797454834, | |
| "step": 2340, | |
| "valid_targets_mean": 3630.6, | |
| "valid_targets_min": 2882 | |
| }, | |
| { | |
| "epoch": 2.445487741262389, | |
| "grad_norm": 0.622867394688811, | |
| "learning_rate": 3.290393633142507e-05, | |
| "loss": 0.2777, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14275982975959778, | |
| "step": 2345, | |
| "valid_targets_mean": 4041.8, | |
| "valid_targets_min": 1670 | |
| }, | |
| { | |
| "epoch": 2.4507042253521125, | |
| "grad_norm": 0.5874999766195824, | |
| "learning_rate": 3.286416045195943e-05, | |
| "loss": 0.27, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10459715873003006, | |
| "step": 2350, | |
| "valid_targets_mean": 2842.4, | |
| "valid_targets_min": 1852 | |
| }, | |
| { | |
| "epoch": 2.4559207094418363, | |
| "grad_norm": 0.6531423599312236, | |
| "learning_rate": 3.282429759572164e-05, | |
| "loss": 0.2765, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16255587339401245, | |
| "step": 2355, | |
| "valid_targets_mean": 4498.4, | |
| "valid_targets_min": 1296 | |
| }, | |
| { | |
| "epoch": 2.4611371935315596, | |
| "grad_norm": 0.6164878602858105, | |
| "learning_rate": 3.2784348032231245e-05, | |
| "loss": 0.2781, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13083836436271667, | |
| "step": 2360, | |
| "valid_targets_mean": 3104.5, | |
| "valid_targets_min": 719 | |
| }, | |
| { | |
| "epoch": 2.4663536776212833, | |
| "grad_norm": 0.5561329069441622, | |
| "learning_rate": 3.274431203159402e-05, | |
| "loss": 0.2728, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13875705003738403, | |
| "step": 2365, | |
| "valid_targets_mean": 4356.2, | |
| "valid_targets_min": 3238 | |
| }, | |
| { | |
| "epoch": 2.4715701617110066, | |
| "grad_norm": 0.5852445394713213, | |
| "learning_rate": 3.270418986450017e-05, | |
| "loss": 0.257, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11462438106536865, | |
| "step": 2370, | |
| "valid_targets_mean": 3871.8, | |
| "valid_targets_min": 1372 | |
| }, | |
| { | |
| "epoch": 2.4767866458007304, | |
| "grad_norm": 0.6453440740144214, | |
| "learning_rate": 3.266398180222247e-05, | |
| "loss": 0.2556, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13220545649528503, | |
| "step": 2375, | |
| "valid_targets_mean": 3531.6, | |
| "valid_targets_min": 2370 | |
| }, | |
| { | |
| "epoch": 2.4820031298904537, | |
| "grad_norm": 0.5649229448593387, | |
| "learning_rate": 3.262368811661446e-05, | |
| "loss": 0.2662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12727907299995422, | |
| "step": 2380, | |
| "valid_targets_mean": 4020.2, | |
| "valid_targets_min": 2453 | |
| }, | |
| { | |
| "epoch": 2.4872196139801774, | |
| "grad_norm": 0.6395417642192527, | |
| "learning_rate": 3.2583309080108576e-05, | |
| "loss": 0.26, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13773152232170105, | |
| "step": 2385, | |
| "valid_targets_mean": 3526.0, | |
| "valid_targets_min": 1426 | |
| }, | |
| { | |
| "epoch": 2.4924360980699007, | |
| "grad_norm": 0.6421369647984996, | |
| "learning_rate": 3.254284496571434e-05, | |
| "loss": 0.2759, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12437285482883453, | |
| "step": 2390, | |
| "valid_targets_mean": 3186.2, | |
| "valid_targets_min": 1728 | |
| }, | |
| { | |
| "epoch": 2.4976525821596245, | |
| "grad_norm": 0.6926522814946685, | |
| "learning_rate": 3.25022960470165e-05, | |
| "loss": 0.2656, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15149444341659546, | |
| "step": 2395, | |
| "valid_targets_mean": 3751.8, | |
| "valid_targets_min": 1941 | |
| }, | |
| { | |
| "epoch": 2.502869066249348, | |
| "grad_norm": 0.5441131285434808, | |
| "learning_rate": 3.246166259817318e-05, | |
| "loss": 0.2625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1081734150648117, | |
| "step": 2400, | |
| "valid_targets_mean": 3928.8, | |
| "valid_targets_min": 852 | |
| }, | |
| { | |
| "epoch": 2.5080855503390715, | |
| "grad_norm": 0.6752301529328438, | |
| "learning_rate": 3.242094489391402e-05, | |
| "loss": 0.2797, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16730421781539917, | |
| "step": 2405, | |
| "valid_targets_mean": 2807.4, | |
| "valid_targets_min": 1025 | |
| }, | |
| { | |
| "epoch": 2.513302034428795, | |
| "grad_norm": 0.5412061454613628, | |
| "learning_rate": 3.238014320953832e-05, | |
| "loss": 0.2619, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14784061908721924, | |
| "step": 2410, | |
| "valid_targets_mean": 4664.0, | |
| "valid_targets_min": 1564 | |
| }, | |
| { | |
| "epoch": 2.5185185185185186, | |
| "grad_norm": 0.5431818993714034, | |
| "learning_rate": 3.233925782091322e-05, | |
| "loss": 0.2607, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13156089186668396, | |
| "step": 2415, | |
| "valid_targets_mean": 4929.6, | |
| "valid_targets_min": 2762 | |
| }, | |
| { | |
| "epoch": 2.523735002608242, | |
| "grad_norm": 0.5534826639340052, | |
| "learning_rate": 3.229828900447174e-05, | |
| "loss": 0.248, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12213470786809921, | |
| "step": 2420, | |
| "valid_targets_mean": 3609.4, | |
| "valid_targets_min": 2426 | |
| }, | |
| { | |
| "epoch": 2.5289514866979657, | |
| "grad_norm": 0.49414721139107365, | |
| "learning_rate": 3.2257237037211026e-05, | |
| "loss": 0.2539, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11549164354801178, | |
| "step": 2425, | |
| "valid_targets_mean": 5444.6, | |
| "valid_targets_min": 2678 | |
| }, | |
| { | |
| "epoch": 2.534167970787689, | |
| "grad_norm": 0.5572143901270232, | |
| "learning_rate": 3.221610219669038e-05, | |
| "loss": 0.2607, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11582082509994507, | |
| "step": 2430, | |
| "valid_targets_mean": 3776.9, | |
| "valid_targets_min": 2769 | |
| }, | |
| { | |
| "epoch": 2.5393844548774127, | |
| "grad_norm": 0.5961417593998901, | |
| "learning_rate": 3.2174884761029456e-05, | |
| "loss": 0.2512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1326468288898468, | |
| "step": 2435, | |
| "valid_targets_mean": 3707.6, | |
| "valid_targets_min": 993 | |
| }, | |
| { | |
| "epoch": 2.544600938967136, | |
| "grad_norm": 0.5838232175638725, | |
| "learning_rate": 3.2133585008906307e-05, | |
| "loss": 0.2515, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12981246411800385, | |
| "step": 2440, | |
| "valid_targets_mean": 3851.9, | |
| "valid_targets_min": 1672 | |
| }, | |
| { | |
| "epoch": 2.5498174230568598, | |
| "grad_norm": 0.5757379607286798, | |
| "learning_rate": 3.209220321955559e-05, | |
| "loss": 0.2519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10258600115776062, | |
| "step": 2445, | |
| "valid_targets_mean": 3157.9, | |
| "valid_targets_min": 1243 | |
| }, | |
| { | |
| "epoch": 2.555033907146583, | |
| "grad_norm": 0.5332145384348479, | |
| "learning_rate": 3.205073967276659e-05, | |
| "loss": 0.2542, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11794015765190125, | |
| "step": 2450, | |
| "valid_targets_mean": 4257.1, | |
| "valid_targets_min": 2637 | |
| }, | |
| { | |
| "epoch": 2.560250391236307, | |
| "grad_norm": 0.60106624258005, | |
| "learning_rate": 3.20091946488814e-05, | |
| "loss": 0.2736, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12551817297935486, | |
| "step": 2455, | |
| "valid_targets_mean": 3795.2, | |
| "valid_targets_min": 1465 | |
| }, | |
| { | |
| "epoch": 2.56546687532603, | |
| "grad_norm": 0.6133597201953852, | |
| "learning_rate": 3.196756842879297e-05, | |
| "loss": 0.2675, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12127897888422012, | |
| "step": 2460, | |
| "valid_targets_mean": 3117.1, | |
| "valid_targets_min": 1583 | |
| }, | |
| { | |
| "epoch": 2.570683359415754, | |
| "grad_norm": 0.6549840026040402, | |
| "learning_rate": 3.1925861293943234e-05, | |
| "loss": 0.2599, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1293744146823883, | |
| "step": 2465, | |
| "valid_targets_mean": 2977.4, | |
| "valid_targets_min": 1621 | |
| }, | |
| { | |
| "epoch": 2.575899843505477, | |
| "grad_norm": 0.6300007375018762, | |
| "learning_rate": 3.1884073526321216e-05, | |
| "loss": 0.2487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13807639479637146, | |
| "step": 2470, | |
| "valid_targets_mean": 3406.8, | |
| "valid_targets_min": 2156 | |
| }, | |
| { | |
| "epoch": 2.581116327595201, | |
| "grad_norm": 0.6003514952076949, | |
| "learning_rate": 3.18422054084611e-05, | |
| "loss": 0.2615, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1338767111301422, | |
| "step": 2475, | |
| "valid_targets_mean": 4144.6, | |
| "valid_targets_min": 2756 | |
| }, | |
| { | |
| "epoch": 2.5863328116849242, | |
| "grad_norm": 0.585834610611976, | |
| "learning_rate": 3.180025722344034e-05, | |
| "loss": 0.2527, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.151723712682724, | |
| "step": 2480, | |
| "valid_targets_mean": 3816.6, | |
| "valid_targets_min": 1776 | |
| }, | |
| { | |
| "epoch": 2.591549295774648, | |
| "grad_norm": 0.6043346006652774, | |
| "learning_rate": 3.175822925487774e-05, | |
| "loss": 0.2717, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19020023941993713, | |
| "step": 2485, | |
| "valid_targets_mean": 4737.0, | |
| "valid_targets_min": 2526 | |
| }, | |
| { | |
| "epoch": 2.5967657798643713, | |
| "grad_norm": 0.5689517725256622, | |
| "learning_rate": 3.171612178693151e-05, | |
| "loss": 0.2554, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14061452448368073, | |
| "step": 2490, | |
| "valid_targets_mean": 3746.9, | |
| "valid_targets_min": 2409 | |
| }, | |
| { | |
| "epoch": 2.601982263954095, | |
| "grad_norm": 0.6140104409006371, | |
| "learning_rate": 3.1673935104297414e-05, | |
| "loss": 0.2612, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11127382516860962, | |
| "step": 2495, | |
| "valid_targets_mean": 3389.6, | |
| "valid_targets_min": 1931 | |
| }, | |
| { | |
| "epoch": 2.6071987480438183, | |
| "grad_norm": 0.6212451566952797, | |
| "learning_rate": 3.163166949220675e-05, | |
| "loss": 0.2692, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15133298933506012, | |
| "step": 2500, | |
| "valid_targets_mean": 4183.8, | |
| "valid_targets_min": 2179 | |
| }, | |
| { | |
| "epoch": 2.612415232133542, | |
| "grad_norm": 0.4664519229232764, | |
| "learning_rate": 3.158932523642451e-05, | |
| "loss": 0.2375, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08863572031259537, | |
| "step": 2505, | |
| "valid_targets_mean": 6285.2, | |
| "valid_targets_min": 4772 | |
| }, | |
| { | |
| "epoch": 2.6176317162232654, | |
| "grad_norm": 0.40811345366876906, | |
| "learning_rate": 3.1546902623247385e-05, | |
| "loss": 0.1862, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08338138461112976, | |
| "step": 2510, | |
| "valid_targets_mean": 6168.4, | |
| "valid_targets_min": 4703 | |
| }, | |
| { | |
| "epoch": 2.622848200312989, | |
| "grad_norm": 0.3990728183919375, | |
| "learning_rate": 3.1504401939501866e-05, | |
| "loss": 0.1786, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07173222303390503, | |
| "step": 2515, | |
| "valid_targets_mean": 5867.9, | |
| "valid_targets_min": 4896 | |
| }, | |
| { | |
| "epoch": 2.6280646844027125, | |
| "grad_norm": 0.4509788125779184, | |
| "learning_rate": 3.146182347254228e-05, | |
| "loss": 0.176, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08958551287651062, | |
| "step": 2520, | |
| "valid_targets_mean": 6087.8, | |
| "valid_targets_min": 3783 | |
| }, | |
| { | |
| "epoch": 2.633281168492436, | |
| "grad_norm": 0.43549411058119636, | |
| "learning_rate": 3.141916751024889e-05, | |
| "loss": 0.1751, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08282480388879776, | |
| "step": 2525, | |
| "valid_targets_mean": 6401.1, | |
| "valid_targets_min": 5374 | |
| }, | |
| { | |
| "epoch": 2.6384976525821595, | |
| "grad_norm": 0.3993495557623095, | |
| "learning_rate": 3.137643434102588e-05, | |
| "loss": 0.1912, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08493853360414505, | |
| "step": 2530, | |
| "valid_targets_mean": 6622.6, | |
| "valid_targets_min": 4482 | |
| }, | |
| { | |
| "epoch": 2.6437141366718833, | |
| "grad_norm": 0.4773064661759171, | |
| "learning_rate": 3.1333624253799464e-05, | |
| "loss": 0.1848, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07911388576030731, | |
| "step": 2535, | |
| "valid_targets_mean": 6003.2, | |
| "valid_targets_min": 2956 | |
| }, | |
| { | |
| "epoch": 2.6489306207616066, | |
| "grad_norm": 0.39707409115866044, | |
| "learning_rate": 3.1290737538015904e-05, | |
| "loss": 0.1757, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08442457020282745, | |
| "step": 2540, | |
| "valid_targets_mean": 6501.6, | |
| "valid_targets_min": 5720 | |
| }, | |
| { | |
| "epoch": 2.6541471048513303, | |
| "grad_norm": 0.4036580972909262, | |
| "learning_rate": 3.1247774483639575e-05, | |
| "loss": 0.2141, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08250235766172409, | |
| "step": 2545, | |
| "valid_targets_mean": 6840.0, | |
| "valid_targets_min": 4198 | |
| }, | |
| { | |
| "epoch": 2.6593635889410536, | |
| "grad_norm": 0.40105832871347663, | |
| "learning_rate": 3.120473538115096e-05, | |
| "loss": 0.1755, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08231037855148315, | |
| "step": 2550, | |
| "valid_targets_mean": 6326.4, | |
| "valid_targets_min": 4837 | |
| }, | |
| { | |
| "epoch": 2.6645800730307774, | |
| "grad_norm": 0.4763695289000698, | |
| "learning_rate": 3.116162052154476e-05, | |
| "loss": 0.1888, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06817681342363358, | |
| "step": 2555, | |
| "valid_targets_mean": 3354.9, | |
| "valid_targets_min": 2790 | |
| }, | |
| { | |
| "epoch": 2.6697965571205007, | |
| "grad_norm": 0.4105910004176056, | |
| "learning_rate": 3.111843019632784e-05, | |
| "loss": 0.1751, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09132213890552521, | |
| "step": 2560, | |
| "valid_targets_mean": 5954.2, | |
| "valid_targets_min": 3635 | |
| }, | |
| { | |
| "epoch": 2.6750130412102244, | |
| "grad_norm": 0.41215729759912895, | |
| "learning_rate": 3.1075164697517326e-05, | |
| "loss": 0.1842, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07849319279193878, | |
| "step": 2565, | |
| "valid_targets_mean": 6036.0, | |
| "valid_targets_min": 4393 | |
| }, | |
| { | |
| "epoch": 2.6802295252999477, | |
| "grad_norm": 0.36133332469753043, | |
| "learning_rate": 3.10318243176386e-05, | |
| "loss": 0.1675, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09840060770511627, | |
| "step": 2570, | |
| "valid_targets_mean": 8602.2, | |
| "valid_targets_min": 6322 | |
| }, | |
| { | |
| "epoch": 2.6854460093896715, | |
| "grad_norm": 0.5252726508587914, | |
| "learning_rate": 3.0988409349723317e-05, | |
| "loss": 0.1649, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07567226141691208, | |
| "step": 2575, | |
| "valid_targets_mean": 1596.1, | |
| "valid_targets_min": 501 | |
| }, | |
| { | |
| "epoch": 2.690662493479395, | |
| "grad_norm": 0.46209832248640675, | |
| "learning_rate": 3.094492008730746e-05, | |
| "loss": 0.1689, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09561724960803986, | |
| "step": 2580, | |
| "valid_targets_mean": 5933.8, | |
| "valid_targets_min": 3625 | |
| }, | |
| { | |
| "epoch": 2.6958789775691185, | |
| "grad_norm": 0.41568373308151746, | |
| "learning_rate": 3.09013568244293e-05, | |
| "loss": 0.1798, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09148001670837402, | |
| "step": 2585, | |
| "valid_targets_mean": 4973.1, | |
| "valid_targets_min": 3623 | |
| }, | |
| { | |
| "epoch": 2.701095461658842, | |
| "grad_norm": 0.4171019968227014, | |
| "learning_rate": 3.085771985562745e-05, | |
| "loss": 0.1766, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08243361860513687, | |
| "step": 2590, | |
| "valid_targets_mean": 6099.6, | |
| "valid_targets_min": 4625 | |
| }, | |
| { | |
| "epoch": 2.7063119457485656, | |
| "grad_norm": 0.36908215868866356, | |
| "learning_rate": 3.081400947593887e-05, | |
| "loss": 0.1569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07484658807516098, | |
| "step": 2595, | |
| "valid_targets_mean": 5994.6, | |
| "valid_targets_min": 4720 | |
| }, | |
| { | |
| "epoch": 2.711528429838289, | |
| "grad_norm": 0.38923769586537305, | |
| "learning_rate": 3.077022598089685e-05, | |
| "loss": 0.1651, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09050647914409637, | |
| "step": 2600, | |
| "valid_targets_mean": 7425.9, | |
| "valid_targets_min": 5948 | |
| }, | |
| { | |
| "epoch": 2.7167449139280127, | |
| "grad_norm": 0.4025352486181996, | |
| "learning_rate": 3.072636966652904e-05, | |
| "loss": 0.1899, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11153525114059448, | |
| "step": 2605, | |
| "valid_targets_mean": 5896.1, | |
| "valid_targets_min": 4690 | |
| }, | |
| { | |
| "epoch": 2.721961398017736, | |
| "grad_norm": 0.38438874298229836, | |
| "learning_rate": 3.0682440829355416e-05, | |
| "loss": 0.1824, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07914496958255768, | |
| "step": 2610, | |
| "valid_targets_mean": 5856.2, | |
| "valid_targets_min": 4262 | |
| }, | |
| { | |
| "epoch": 2.7271778821074597, | |
| "grad_norm": 0.4494951047300392, | |
| "learning_rate": 3.06384397663863e-05, | |
| "loss": 0.1703, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08625908941030502, | |
| "step": 2615, | |
| "valid_targets_mean": 5960.1, | |
| "valid_targets_min": 5149 | |
| }, | |
| { | |
| "epoch": 2.732394366197183, | |
| "grad_norm": 0.41874678203461924, | |
| "learning_rate": 3.059436677512035e-05, | |
| "loss": 0.1773, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08661193400621414, | |
| "step": 2620, | |
| "valid_targets_mean": 5329.4, | |
| "valid_targets_min": 3450 | |
| }, | |
| { | |
| "epoch": 2.7376108502869068, | |
| "grad_norm": 0.43339480977004696, | |
| "learning_rate": 3.055022215354254e-05, | |
| "loss": 0.1931, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10664729028940201, | |
| "step": 2625, | |
| "valid_targets_mean": 6738.6, | |
| "valid_targets_min": 3531 | |
| }, | |
| { | |
| "epoch": 2.74282733437663, | |
| "grad_norm": 0.43527758000135186, | |
| "learning_rate": 3.0506006200122154e-05, | |
| "loss": 0.2076, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11015541851520538, | |
| "step": 2630, | |
| "valid_targets_mean": 7328.4, | |
| "valid_targets_min": 4617 | |
| }, | |
| { | |
| "epoch": 2.748043818466354, | |
| "grad_norm": 0.44019508677444874, | |
| "learning_rate": 3.0461719213810756e-05, | |
| "loss": 0.1952, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07948707044124603, | |
| "step": 2635, | |
| "valid_targets_mean": 5306.0, | |
| "valid_targets_min": 3685 | |
| }, | |
| { | |
| "epoch": 2.753260302556077, | |
| "grad_norm": 0.4324155091620953, | |
| "learning_rate": 3.0417361494040167e-05, | |
| "loss": 0.1797, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08587181568145752, | |
| "step": 2640, | |
| "valid_targets_mean": 5627.4, | |
| "valid_targets_min": 1902 | |
| }, | |
| { | |
| "epoch": 2.758476786645801, | |
| "grad_norm": 0.38141407569588004, | |
| "learning_rate": 3.037293334072047e-05, | |
| "loss": 0.1789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0870828926563263, | |
| "step": 2645, | |
| "valid_targets_mean": 7195.9, | |
| "valid_targets_min": 4412 | |
| }, | |
| { | |
| "epoch": 2.763693270735524, | |
| "grad_norm": 0.46117019179354823, | |
| "learning_rate": 3.0328435054237944e-05, | |
| "loss": 0.185, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08185586333274841, | |
| "step": 2650, | |
| "valid_targets_mean": 5172.0, | |
| "valid_targets_min": 3161 | |
| }, | |
| { | |
| "epoch": 2.768909754825248, | |
| "grad_norm": 0.407121208613779, | |
| "learning_rate": 3.0283866935453057e-05, | |
| "loss": 0.1804, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09270358085632324, | |
| "step": 2655, | |
| "valid_targets_mean": 7083.6, | |
| "valid_targets_min": 5428 | |
| }, | |
| { | |
| "epoch": 2.7741262389149712, | |
| "grad_norm": 0.44698755149033326, | |
| "learning_rate": 3.023922928569843e-05, | |
| "loss": 0.1836, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10404074192047119, | |
| "step": 2660, | |
| "valid_targets_mean": 6086.4, | |
| "valid_targets_min": 2364 | |
| }, | |
| { | |
| "epoch": 2.779342723004695, | |
| "grad_norm": 0.43414066777042026, | |
| "learning_rate": 3.019452240677678e-05, | |
| "loss": 0.1855, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08894358575344086, | |
| "step": 2665, | |
| "valid_targets_mean": 6284.8, | |
| "valid_targets_min": 4793 | |
| }, | |
| { | |
| "epoch": 2.7845592070944183, | |
| "grad_norm": 0.4288131082919885, | |
| "learning_rate": 3.0149746600958908e-05, | |
| "loss": 0.1824, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09316609054803848, | |
| "step": 2670, | |
| "valid_targets_mean": 6081.4, | |
| "valid_targets_min": 4588 | |
| }, | |
| { | |
| "epoch": 2.789775691184142, | |
| "grad_norm": 0.39020236026654825, | |
| "learning_rate": 3.0104902170981633e-05, | |
| "loss": 0.1632, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07775271683931351, | |
| "step": 2675, | |
| "valid_targets_mean": 7343.1, | |
| "valid_targets_min": 5585 | |
| }, | |
| { | |
| "epoch": 2.7949921752738653, | |
| "grad_norm": 0.4420042893874371, | |
| "learning_rate": 3.005998942004576e-05, | |
| "loss": 0.1659, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10572090744972229, | |
| "step": 2680, | |
| "valid_targets_mean": 6391.6, | |
| "valid_targets_min": 3985 | |
| }, | |
| { | |
| "epoch": 2.800208659363589, | |
| "grad_norm": 0.43128579876489953, | |
| "learning_rate": 3.0015008651814023e-05, | |
| "loss": 0.1858, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10359874367713928, | |
| "step": 2685, | |
| "valid_targets_mean": 7515.0, | |
| "valid_targets_min": 5325 | |
| }, | |
| { | |
| "epoch": 2.8054251434533124, | |
| "grad_norm": 0.43143207384472954, | |
| "learning_rate": 2.9969960170409033e-05, | |
| "loss": 0.1856, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10446514189243317, | |
| "step": 2690, | |
| "valid_targets_mean": 6863.9, | |
| "valid_targets_min": 4423 | |
| }, | |
| { | |
| "epoch": 2.810641627543036, | |
| "grad_norm": 0.5067531651704211, | |
| "learning_rate": 2.9924844280411208e-05, | |
| "loss": 0.1767, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09066420048475266, | |
| "step": 2695, | |
| "valid_targets_mean": 5935.5, | |
| "valid_targets_min": 4064 | |
| }, | |
| { | |
| "epoch": 2.8158581116327595, | |
| "grad_norm": 0.34828969739559845, | |
| "learning_rate": 2.987966128685674e-05, | |
| "loss": 0.1738, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08554694056510925, | |
| "step": 2700, | |
| "valid_targets_mean": 7679.9, | |
| "valid_targets_min": 5036 | |
| }, | |
| { | |
| "epoch": 2.821074595722483, | |
| "grad_norm": 0.4327723691662246, | |
| "learning_rate": 2.9834411495235526e-05, | |
| "loss": 0.1701, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10033709555864334, | |
| "step": 2705, | |
| "valid_targets_mean": 6041.1, | |
| "valid_targets_min": 5000 | |
| }, | |
| { | |
| "epoch": 2.8262910798122065, | |
| "grad_norm": 0.4385764686542864, | |
| "learning_rate": 2.9789095211489082e-05, | |
| "loss": 0.1757, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09196151047945023, | |
| "step": 2710, | |
| "valid_targets_mean": 6254.0, | |
| "valid_targets_min": 4602 | |
| }, | |
| { | |
| "epoch": 2.8315075639019303, | |
| "grad_norm": 0.4222129423103403, | |
| "learning_rate": 2.9743712742008486e-05, | |
| "loss": 0.182, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07351937890052795, | |
| "step": 2715, | |
| "valid_targets_mean": 5628.6, | |
| "valid_targets_min": 4475 | |
| }, | |
| { | |
| "epoch": 2.8367240479916536, | |
| "grad_norm": 0.45031611976077035, | |
| "learning_rate": 2.9698264393632326e-05, | |
| "loss": 0.17, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07873383909463882, | |
| "step": 2720, | |
| "valid_targets_mean": 6075.8, | |
| "valid_targets_min": 4824 | |
| }, | |
| { | |
| "epoch": 2.8419405320813773, | |
| "grad_norm": 0.4145744603664555, | |
| "learning_rate": 2.9652750473644597e-05, | |
| "loss": 0.1896, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09094424545764923, | |
| "step": 2725, | |
| "valid_targets_mean": 5807.0, | |
| "valid_targets_min": 4982 | |
| }, | |
| { | |
| "epoch": 2.8471570161711006, | |
| "grad_norm": 0.3910038072631834, | |
| "learning_rate": 2.9607171289772635e-05, | |
| "loss": 0.1631, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07469527423381805, | |
| "step": 2730, | |
| "valid_targets_mean": 5453.1, | |
| "valid_targets_min": 4443 | |
| }, | |
| { | |
| "epoch": 2.8523735002608244, | |
| "grad_norm": 0.39427103424377047, | |
| "learning_rate": 2.9561527150185035e-05, | |
| "loss": 0.1526, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07363346964120865, | |
| "step": 2735, | |
| "valid_targets_mean": 6058.6, | |
| "valid_targets_min": 4802 | |
| }, | |
| { | |
| "epoch": 2.8575899843505477, | |
| "grad_norm": 0.3810322728268205, | |
| "learning_rate": 2.9515818363489582e-05, | |
| "loss": 0.156, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06846986711025238, | |
| "step": 2740, | |
| "valid_targets_mean": 5375.4, | |
| "valid_targets_min": 4559 | |
| }, | |
| { | |
| "epoch": 2.8628064684402714, | |
| "grad_norm": 0.39137379687742896, | |
| "learning_rate": 2.9470045238731127e-05, | |
| "loss": 0.1636, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08692730963230133, | |
| "step": 2745, | |
| "valid_targets_mean": 6076.2, | |
| "valid_targets_min": 4573 | |
| }, | |
| { | |
| "epoch": 2.8680229525299947, | |
| "grad_norm": 0.4194724566452105, | |
| "learning_rate": 2.9424208085389544e-05, | |
| "loss": 0.1799, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10398776084184647, | |
| "step": 2750, | |
| "valid_targets_mean": 6920.9, | |
| "valid_targets_min": 4491 | |
| }, | |
| { | |
| "epoch": 2.873239436619718, | |
| "grad_norm": 0.5664642530203564, | |
| "learning_rate": 2.9378307213377603e-05, | |
| "loss": 0.2044, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13758191466331482, | |
| "step": 2755, | |
| "valid_targets_mean": 4242.4, | |
| "valid_targets_min": 817 | |
| }, | |
| { | |
| "epoch": 2.878455920709442, | |
| "grad_norm": 0.47380969995984196, | |
| "learning_rate": 2.93323429330389e-05, | |
| "loss": 0.1877, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08468654006719589, | |
| "step": 2760, | |
| "valid_targets_mean": 5535.4, | |
| "valid_targets_min": 3893 | |
| }, | |
| { | |
| "epoch": 2.8836724047991655, | |
| "grad_norm": 0.42582712655905436, | |
| "learning_rate": 2.9286315555145718e-05, | |
| "loss": 0.1725, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08283861726522446, | |
| "step": 2765, | |
| "valid_targets_mean": 5532.1, | |
| "valid_targets_min": 4622 | |
| }, | |
| { | |
| "epoch": 2.888888888888889, | |
| "grad_norm": 0.3812328248201834, | |
| "learning_rate": 2.924022539089698e-05, | |
| "loss": 0.1762, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0802740603685379, | |
| "step": 2770, | |
| "valid_targets_mean": 7928.9, | |
| "valid_targets_min": 5695 | |
| }, | |
| { | |
| "epoch": 2.894105372978612, | |
| "grad_norm": 0.40549228448649727, | |
| "learning_rate": 2.9194072751916106e-05, | |
| "loss": 0.1845, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09553305804729462, | |
| "step": 2775, | |
| "valid_targets_mean": 6435.6, | |
| "valid_targets_min": 4778 | |
| }, | |
| { | |
| "epoch": 2.899321857068336, | |
| "grad_norm": 0.4155622518612218, | |
| "learning_rate": 2.914785795024893e-05, | |
| "loss": 0.1924, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09575248509645462, | |
| "step": 2780, | |
| "valid_targets_mean": 6724.8, | |
| "valid_targets_min": 5299 | |
| }, | |
| { | |
| "epoch": 2.9045383411580596, | |
| "grad_norm": 0.4194130672916922, | |
| "learning_rate": 2.9101581298361563e-05, | |
| "loss": 0.1769, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10020703077316284, | |
| "step": 2785, | |
| "valid_targets_mean": 6748.9, | |
| "valid_targets_min": 4245 | |
| }, | |
| { | |
| "epoch": 2.909754825247783, | |
| "grad_norm": 0.44592688900911953, | |
| "learning_rate": 2.90552431091383e-05, | |
| "loss": 0.1817, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08873110264539719, | |
| "step": 2790, | |
| "valid_targets_mean": 5589.4, | |
| "valid_targets_min": 4409 | |
| }, | |
| { | |
| "epoch": 2.9149713093375063, | |
| "grad_norm": 0.40629174727584494, | |
| "learning_rate": 2.9008843695879508e-05, | |
| "loss": 0.1731, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09077601134777069, | |
| "step": 2795, | |
| "valid_targets_mean": 6732.4, | |
| "valid_targets_min": 4797 | |
| }, | |
| { | |
| "epoch": 2.92018779342723, | |
| "grad_norm": 0.41916270698979097, | |
| "learning_rate": 2.896238337229949e-05, | |
| "loss": 0.1649, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08564167469739914, | |
| "step": 2800, | |
| "valid_targets_mean": 6316.6, | |
| "valid_targets_min": 5240 | |
| }, | |
| { | |
| "epoch": 2.9254042775169538, | |
| "grad_norm": 0.5028835287678537, | |
| "learning_rate": 2.891586245252439e-05, | |
| "loss": 0.2237, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21472665667533875, | |
| "step": 2805, | |
| "valid_targets_mean": 6448.6, | |
| "valid_targets_min": 4778 | |
| }, | |
| { | |
| "epoch": 2.930620761606677, | |
| "grad_norm": 0.3819218917771511, | |
| "learning_rate": 2.886928125109003e-05, | |
| "loss": 0.1701, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06975749135017395, | |
| "step": 2810, | |
| "valid_targets_mean": 6228.1, | |
| "valid_targets_min": 4027 | |
| }, | |
| { | |
| "epoch": 2.9358372456964004, | |
| "grad_norm": 0.42885721905537494, | |
| "learning_rate": 2.882264008293982e-05, | |
| "loss": 0.1698, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08788222074508667, | |
| "step": 2815, | |
| "valid_targets_mean": 6315.6, | |
| "valid_targets_min": 4547 | |
| }, | |
| { | |
| "epoch": 2.941053729786124, | |
| "grad_norm": 0.427708328389471, | |
| "learning_rate": 2.8775939263422615e-05, | |
| "loss": 0.1799, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08929593116044998, | |
| "step": 2820, | |
| "valid_targets_mean": 5824.5, | |
| "valid_targets_min": 4611 | |
| }, | |
| { | |
| "epoch": 2.946270213875848, | |
| "grad_norm": 0.38357647001932016, | |
| "learning_rate": 2.872917910829059e-05, | |
| "loss": 0.1793, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07487665116786957, | |
| "step": 2825, | |
| "valid_targets_mean": 6651.8, | |
| "valid_targets_min": 4329 | |
| }, | |
| { | |
| "epoch": 2.951486697965571, | |
| "grad_norm": 0.43476819622450774, | |
| "learning_rate": 2.8682359933697075e-05, | |
| "loss": 0.1585, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08243931829929352, | |
| "step": 2830, | |
| "valid_targets_mean": 6751.9, | |
| "valid_targets_min": 4594 | |
| }, | |
| { | |
| "epoch": 2.9567031820552945, | |
| "grad_norm": 0.4011272243469665, | |
| "learning_rate": 2.863548205619447e-05, | |
| "loss": 0.1736, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09242729097604752, | |
| "step": 2835, | |
| "valid_targets_mean": 6895.8, | |
| "valid_targets_min": 4707 | |
| }, | |
| { | |
| "epoch": 2.9619196661450182, | |
| "grad_norm": 0.40563367271241674, | |
| "learning_rate": 2.8588545792732056e-05, | |
| "loss": 0.1629, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09251940995454788, | |
| "step": 2840, | |
| "valid_targets_mean": 6377.2, | |
| "valid_targets_min": 5224 | |
| }, | |
| { | |
| "epoch": 2.967136150234742, | |
| "grad_norm": 0.389423553930026, | |
| "learning_rate": 2.8541551460653875e-05, | |
| "loss": 0.1662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08260069042444229, | |
| "step": 2845, | |
| "valid_targets_mean": 6458.5, | |
| "valid_targets_min": 4735 | |
| }, | |
| { | |
| "epoch": 2.9723526343244653, | |
| "grad_norm": 0.3822673988834252, | |
| "learning_rate": 2.8494499377696586e-05, | |
| "loss": 0.1674, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09437039494514465, | |
| "step": 2850, | |
| "valid_targets_mean": 7325.6, | |
| "valid_targets_min": 6050 | |
| }, | |
| { | |
| "epoch": 2.9775691184141886, | |
| "grad_norm": 0.3772006968599561, | |
| "learning_rate": 2.8447389861987295e-05, | |
| "loss": 0.1751, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10092143714427948, | |
| "step": 2855, | |
| "valid_targets_mean": 7341.4, | |
| "valid_targets_min": 4982 | |
| }, | |
| { | |
| "epoch": 2.9827856025039123, | |
| "grad_norm": 0.36676496087033744, | |
| "learning_rate": 2.8400223232041456e-05, | |
| "loss": 0.16, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08028563857078552, | |
| "step": 2860, | |
| "valid_targets_mean": 7924.5, | |
| "valid_targets_min": 6578 | |
| }, | |
| { | |
| "epoch": 2.988002086593636, | |
| "grad_norm": 0.4192073418241293, | |
| "learning_rate": 2.835299980676064e-05, | |
| "loss": 0.176, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09415009617805481, | |
| "step": 2865, | |
| "valid_targets_mean": 6218.9, | |
| "valid_targets_min": 4682 | |
| }, | |
| { | |
| "epoch": 2.9932185706833594, | |
| "grad_norm": 0.48708126672137875, | |
| "learning_rate": 2.8305719905430442e-05, | |
| "loss": 0.1791, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09825067222118378, | |
| "step": 2870, | |
| "valid_targets_mean": 6293.5, | |
| "valid_targets_min": 5075 | |
| }, | |
| { | |
| "epoch": 2.9984350547730827, | |
| "grad_norm": 0.4569358003785821, | |
| "learning_rate": 2.8258383847718306e-05, | |
| "loss": 0.1846, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0871298685669899, | |
| "step": 2875, | |
| "valid_targets_mean": 5323.8, | |
| "valid_targets_min": 2821 | |
| }, | |
| { | |
| "epoch": 3.003129890453834, | |
| "grad_norm": 0.5328178602799944, | |
| "learning_rate": 2.821099195367135e-05, | |
| "loss": 0.2314, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12538772821426392, | |
| "step": 2880, | |
| "valid_targets_mean": 5440.5, | |
| "valid_targets_min": 1323 | |
| }, | |
| { | |
| "epoch": 3.0083463745435575, | |
| "grad_norm": 0.5241768871603599, | |
| "learning_rate": 2.8163544543714216e-05, | |
| "loss": 0.2611, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11469738930463791, | |
| "step": 2885, | |
| "valid_targets_mean": 3727.0, | |
| "valid_targets_min": 1005 | |
| }, | |
| { | |
| "epoch": 3.013562858633281, | |
| "grad_norm": 0.4531335974769602, | |
| "learning_rate": 2.811604193864689e-05, | |
| "loss": 0.2565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09050817787647247, | |
| "step": 2890, | |
| "valid_targets_mean": 4704.1, | |
| "valid_targets_min": 2909 | |
| }, | |
| { | |
| "epoch": 3.0187793427230045, | |
| "grad_norm": 0.5670417861093184, | |
| "learning_rate": 2.806848445964255e-05, | |
| "loss": 0.2575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.129816472530365, | |
| "step": 2895, | |
| "valid_targets_mean": 4356.1, | |
| "valid_targets_min": 1561 | |
| }, | |
| { | |
| "epoch": 3.0239958268127283, | |
| "grad_norm": 0.636723880959804, | |
| "learning_rate": 2.8020872428245382e-05, | |
| "loss": 0.2509, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11132115125656128, | |
| "step": 2900, | |
| "valid_targets_mean": 3706.4, | |
| "valid_targets_min": 1184 | |
| }, | |
| { | |
| "epoch": 3.0292123109024516, | |
| "grad_norm": 0.4739231295781894, | |
| "learning_rate": 2.797320616636841e-05, | |
| "loss": 0.2696, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13508076965808868, | |
| "step": 2905, | |
| "valid_targets_mean": 5442.8, | |
| "valid_targets_min": 1651 | |
| }, | |
| { | |
| "epoch": 3.0344287949921753, | |
| "grad_norm": 0.45322885788088474, | |
| "learning_rate": 2.792548599629132e-05, | |
| "loss": 0.2488, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12747766077518463, | |
| "step": 2910, | |
| "valid_targets_mean": 6709.0, | |
| "valid_targets_min": 3840 | |
| }, | |
| { | |
| "epoch": 3.0396452790818986, | |
| "grad_norm": 0.5582922657732436, | |
| "learning_rate": 2.787771224065829e-05, | |
| "loss": 0.2539, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13735924661159515, | |
| "step": 2915, | |
| "valid_targets_mean": 5088.9, | |
| "valid_targets_min": 1511 | |
| }, | |
| { | |
| "epoch": 3.0448617631716224, | |
| "grad_norm": 0.5525688512801951, | |
| "learning_rate": 2.782988522247578e-05, | |
| "loss": 0.2575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15440835058689117, | |
| "step": 2920, | |
| "valid_targets_mean": 6514.0, | |
| "valid_targets_min": 1748 | |
| }, | |
| { | |
| "epoch": 3.0500782472613457, | |
| "grad_norm": 0.5512203373386613, | |
| "learning_rate": 2.7782005265110375e-05, | |
| "loss": 0.2637, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13513679802417755, | |
| "step": 2925, | |
| "valid_targets_mean": 4561.9, | |
| "valid_targets_min": 1823 | |
| }, | |
| { | |
| "epoch": 3.0552947313510694, | |
| "grad_norm": 0.585311855423859, | |
| "learning_rate": 2.7734072692286604e-05, | |
| "loss": 0.2599, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11871016025543213, | |
| "step": 2930, | |
| "valid_targets_mean": 3019.6, | |
| "valid_targets_min": 1379 | |
| }, | |
| { | |
| "epoch": 3.0605112154407927, | |
| "grad_norm": 0.5685972553900638, | |
| "learning_rate": 2.768608782808472e-05, | |
| "loss": 0.2555, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1291067898273468, | |
| "step": 2935, | |
| "valid_targets_mean": 5904.4, | |
| "valid_targets_min": 1829 | |
| }, | |
| { | |
| "epoch": 3.0657276995305165, | |
| "grad_norm": 0.48794878994954966, | |
| "learning_rate": 2.763805099693854e-05, | |
| "loss": 0.2564, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11330127716064453, | |
| "step": 2940, | |
| "valid_targets_mean": 4564.4, | |
| "valid_targets_min": 1596 | |
| }, | |
| { | |
| "epoch": 3.07094418362024, | |
| "grad_norm": 0.5927642017342784, | |
| "learning_rate": 2.7589962523633218e-05, | |
| "loss": 0.2556, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13140177726745605, | |
| "step": 2945, | |
| "valid_targets_mean": 3930.2, | |
| "valid_targets_min": 2041 | |
| }, | |
| { | |
| "epoch": 3.0761606677099635, | |
| "grad_norm": 0.5741298585318886, | |
| "learning_rate": 2.75418227333031e-05, | |
| "loss": 0.2557, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1320699006319046, | |
| "step": 2950, | |
| "valid_targets_mean": 4118.0, | |
| "valid_targets_min": 1353 | |
| }, | |
| { | |
| "epoch": 3.081377151799687, | |
| "grad_norm": 0.615092177335412, | |
| "learning_rate": 2.749363195142947e-05, | |
| "loss": 0.2572, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11204314976930618, | |
| "step": 2955, | |
| "valid_targets_mean": 3082.5, | |
| "valid_targets_min": 1269 | |
| }, | |
| { | |
| "epoch": 3.0865936358894106, | |
| "grad_norm": 0.5944736938679396, | |
| "learning_rate": 2.744539050383838e-05, | |
| "loss": 0.2552, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13664039969444275, | |
| "step": 2960, | |
| "valid_targets_mean": 4006.6, | |
| "valid_targets_min": 1040 | |
| }, | |
| { | |
| "epoch": 3.091810119979134, | |
| "grad_norm": 0.5988448986301992, | |
| "learning_rate": 2.7397098716698463e-05, | |
| "loss": 0.2617, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12584733963012695, | |
| "step": 2965, | |
| "valid_targets_mean": 3566.5, | |
| "valid_targets_min": 2704 | |
| }, | |
| { | |
| "epoch": 3.0970266040688577, | |
| "grad_norm": 0.6173714394698799, | |
| "learning_rate": 2.7348756916518663e-05, | |
| "loss": 0.2629, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13249710202217102, | |
| "step": 2970, | |
| "valid_targets_mean": 3457.6, | |
| "valid_targets_min": 1399 | |
| }, | |
| { | |
| "epoch": 3.102243088158581, | |
| "grad_norm": 0.5771551987650408, | |
| "learning_rate": 2.7300365430146117e-05, | |
| "loss": 0.2598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1428576111793518, | |
| "step": 2975, | |
| "valid_targets_mean": 4442.8, | |
| "valid_targets_min": 2931 | |
| }, | |
| { | |
| "epoch": 3.1074595722483047, | |
| "grad_norm": 0.6049991359252717, | |
| "learning_rate": 2.7251924584763867e-05, | |
| "loss": 0.2597, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09134162217378616, | |
| "step": 2980, | |
| "valid_targets_mean": 2659.6, | |
| "valid_targets_min": 662 | |
| }, | |
| { | |
| "epoch": 3.112676056338028, | |
| "grad_norm": 0.6389438255687586, | |
| "learning_rate": 2.7203434707888684e-05, | |
| "loss": 0.247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11147916316986084, | |
| "step": 2985, | |
| "valid_targets_mean": 3049.0, | |
| "valid_targets_min": 1955 | |
| }, | |
| { | |
| "epoch": 3.1178925404277518, | |
| "grad_norm": 0.5588131409391646, | |
| "learning_rate": 2.7154896127368872e-05, | |
| "loss": 0.238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12615853548049927, | |
| "step": 2990, | |
| "valid_targets_mean": 5040.6, | |
| "valid_targets_min": 2222 | |
| }, | |
| { | |
| "epoch": 3.123109024517475, | |
| "grad_norm": 0.6128687545703669, | |
| "learning_rate": 2.7106309171381995e-05, | |
| "loss": 0.248, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11452427506446838, | |
| "step": 2995, | |
| "valid_targets_mean": 3610.1, | |
| "valid_targets_min": 1165 | |
| }, | |
| { | |
| "epoch": 3.128325508607199, | |
| "grad_norm": 0.5892115664836025, | |
| "learning_rate": 2.705767416843272e-05, | |
| "loss": 0.2538, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11303652077913284, | |
| "step": 3000, | |
| "valid_targets_mean": 3885.1, | |
| "valid_targets_min": 1951 | |
| }, | |
| { | |
| "epoch": 3.133541992696922, | |
| "grad_norm": 0.5671467826117348, | |
| "learning_rate": 2.700899144735055e-05, | |
| "loss": 0.2608, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1562727391719818, | |
| "step": 3005, | |
| "valid_targets_mean": 4833.9, | |
| "valid_targets_min": 1765 | |
| }, | |
| { | |
| "epoch": 3.138758476786646, | |
| "grad_norm": 0.6098864807869582, | |
| "learning_rate": 2.696026133728763e-05, | |
| "loss": 0.2401, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11387574672698975, | |
| "step": 3010, | |
| "valid_targets_mean": 3226.4, | |
| "valid_targets_min": 1948 | |
| }, | |
| { | |
| "epoch": 3.143974960876369, | |
| "grad_norm": 0.5340654675644365, | |
| "learning_rate": 2.69114841677165e-05, | |
| "loss": 0.2445, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10542303323745728, | |
| "step": 3015, | |
| "valid_targets_mean": 3881.2, | |
| "valid_targets_min": 1603 | |
| }, | |
| { | |
| "epoch": 3.149191444966093, | |
| "grad_norm": 0.6032482048553691, | |
| "learning_rate": 2.6862660268427885e-05, | |
| "loss": 0.2538, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1262836754322052, | |
| "step": 3020, | |
| "valid_targets_mean": 3789.2, | |
| "valid_targets_min": 1504 | |
| }, | |
| { | |
| "epoch": 3.1544079290558162, | |
| "grad_norm": 0.5578026356642047, | |
| "learning_rate": 2.6813789969528454e-05, | |
| "loss": 0.2469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11230334639549255, | |
| "step": 3025, | |
| "valid_targets_mean": 3687.8, | |
| "valid_targets_min": 1410 | |
| }, | |
| { | |
| "epoch": 3.15962441314554, | |
| "grad_norm": 0.5205319239728282, | |
| "learning_rate": 2.6764873601438588e-05, | |
| "loss": 0.2418, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11862343549728394, | |
| "step": 3030, | |
| "valid_targets_mean": 4841.0, | |
| "valid_targets_min": 1075 | |
| }, | |
| { | |
| "epoch": 3.1648408972352633, | |
| "grad_norm": 0.6410203231247146, | |
| "learning_rate": 2.6715911494890163e-05, | |
| "loss": 0.2537, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13596734404563904, | |
| "step": 3035, | |
| "valid_targets_mean": 3541.0, | |
| "valid_targets_min": 2420 | |
| }, | |
| { | |
| "epoch": 3.170057381324987, | |
| "grad_norm": 0.6099677724577756, | |
| "learning_rate": 2.6666903980924284e-05, | |
| "loss": 0.248, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12231546640396118, | |
| "step": 3040, | |
| "valid_targets_mean": 4196.1, | |
| "valid_targets_min": 1876 | |
| }, | |
| { | |
| "epoch": 3.1752738654147104, | |
| "grad_norm": 0.662356882596706, | |
| "learning_rate": 2.6617851390889074e-05, | |
| "loss": 0.2483, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1463899314403534, | |
| "step": 3045, | |
| "valid_targets_mean": 3674.8, | |
| "valid_targets_min": 2306 | |
| }, | |
| { | |
| "epoch": 3.180490349504434, | |
| "grad_norm": 0.645708505456798, | |
| "learning_rate": 2.6568754056437412e-05, | |
| "loss": 0.2501, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1382240355014801, | |
| "step": 3050, | |
| "valid_targets_mean": 4006.8, | |
| "valid_targets_min": 1177 | |
| }, | |
| { | |
| "epoch": 3.1857068335941574, | |
| "grad_norm": 0.659288732642372, | |
| "learning_rate": 2.6519612309524727e-05, | |
| "loss": 0.2512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.133757084608078, | |
| "step": 3055, | |
| "valid_targets_mean": 3767.1, | |
| "valid_targets_min": 1872 | |
| }, | |
| { | |
| "epoch": 3.190923317683881, | |
| "grad_norm": 0.5926084409842253, | |
| "learning_rate": 2.6470426482406688e-05, | |
| "loss": 0.2475, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15349382162094116, | |
| "step": 3060, | |
| "valid_targets_mean": 5323.2, | |
| "valid_targets_min": 2730 | |
| }, | |
| { | |
| "epoch": 3.1961398017736045, | |
| "grad_norm": 0.6512761350950464, | |
| "learning_rate": 2.6421196907637036e-05, | |
| "loss": 0.2422, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10784897953271866, | |
| "step": 3065, | |
| "valid_targets_mean": 2929.2, | |
| "valid_targets_min": 1254 | |
| }, | |
| { | |
| "epoch": 3.201356285863328, | |
| "grad_norm": 0.6089313012548655, | |
| "learning_rate": 2.6371923918065273e-05, | |
| "loss": 0.2489, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12177689373493195, | |
| "step": 3070, | |
| "valid_targets_mean": 3540.4, | |
| "valid_targets_min": 2098 | |
| }, | |
| { | |
| "epoch": 3.2065727699530515, | |
| "grad_norm": 0.6675024039921, | |
| "learning_rate": 2.6322607846834444e-05, | |
| "loss": 0.2516, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13874541223049164, | |
| "step": 3075, | |
| "valid_targets_mean": 3339.2, | |
| "valid_targets_min": 1363 | |
| }, | |
| { | |
| "epoch": 3.2117892540427753, | |
| "grad_norm": 0.605607610953424, | |
| "learning_rate": 2.6273249027378878e-05, | |
| "loss": 0.2473, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13103488087654114, | |
| "step": 3080, | |
| "valid_targets_mean": 3820.1, | |
| "valid_targets_min": 1308 | |
| }, | |
| { | |
| "epoch": 3.2170057381324986, | |
| "grad_norm": 0.6060281116778171, | |
| "learning_rate": 2.6223847793421938e-05, | |
| "loss": 0.2494, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15593184530735016, | |
| "step": 3085, | |
| "valid_targets_mean": 4714.2, | |
| "valid_targets_min": 2620 | |
| }, | |
| { | |
| "epoch": 3.2222222222222223, | |
| "grad_norm": 0.638198221525077, | |
| "learning_rate": 2.6174404478973746e-05, | |
| "loss": 0.2497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12382198870182037, | |
| "step": 3090, | |
| "valid_targets_mean": 3283.2, | |
| "valid_targets_min": 2316 | |
| }, | |
| { | |
| "epoch": 3.2274387063119456, | |
| "grad_norm": 0.7121770302602156, | |
| "learning_rate": 2.612491941832894e-05, | |
| "loss": 0.2414, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1322920173406601, | |
| "step": 3095, | |
| "valid_targets_mean": 4171.6, | |
| "valid_targets_min": 2680 | |
| }, | |
| { | |
| "epoch": 3.2326551904016694, | |
| "grad_norm": 0.5952537869653255, | |
| "learning_rate": 2.6075392946064417e-05, | |
| "loss": 0.2413, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09708808362483978, | |
| "step": 3100, | |
| "valid_targets_mean": 3157.8, | |
| "valid_targets_min": 1294 | |
| }, | |
| { | |
| "epoch": 3.2378716744913927, | |
| "grad_norm": 0.8133199266032085, | |
| "learning_rate": 2.6025825397037057e-05, | |
| "loss": 0.2467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1084396243095398, | |
| "step": 3105, | |
| "valid_targets_mean": 3239.6, | |
| "valid_targets_min": 1700 | |
| }, | |
| { | |
| "epoch": 3.2430881585811164, | |
| "grad_norm": 0.5985728916046679, | |
| "learning_rate": 2.5976217106381477e-05, | |
| "loss": 0.2441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11410784721374512, | |
| "step": 3110, | |
| "valid_targets_mean": 2965.0, | |
| "valid_targets_min": 1586 | |
| }, | |
| { | |
| "epoch": 3.2483046426708397, | |
| "grad_norm": 0.7122019427392492, | |
| "learning_rate": 2.5926568409507754e-05, | |
| "loss": 0.2451, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1259598284959793, | |
| "step": 3115, | |
| "valid_targets_mean": 2947.2, | |
| "valid_targets_min": 1358 | |
| }, | |
| { | |
| "epoch": 3.2535211267605635, | |
| "grad_norm": 0.6748880385066424, | |
| "learning_rate": 2.587687964209914e-05, | |
| "loss": 0.2513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12200219184160233, | |
| "step": 3120, | |
| "valid_targets_mean": 3410.6, | |
| "valid_targets_min": 2290 | |
| }, | |
| { | |
| "epoch": 3.258737610850287, | |
| "grad_norm": 0.6731474552720095, | |
| "learning_rate": 2.5827151140109836e-05, | |
| "loss": 0.2503, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1480051875114441, | |
| "step": 3125, | |
| "valid_targets_mean": 3763.6, | |
| "valid_targets_min": 1678 | |
| }, | |
| { | |
| "epoch": 3.2639540949400105, | |
| "grad_norm": 0.620498032561492, | |
| "learning_rate": 2.5777383239762676e-05, | |
| "loss": 0.2431, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12685906887054443, | |
| "step": 3130, | |
| "valid_targets_mean": 3434.9, | |
| "valid_targets_min": 1383 | |
| }, | |
| { | |
| "epoch": 3.269170579029734, | |
| "grad_norm": 0.5800541272669621, | |
| "learning_rate": 2.5727576277546888e-05, | |
| "loss": 0.2493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11399306356906891, | |
| "step": 3135, | |
| "valid_targets_mean": 3503.4, | |
| "valid_targets_min": 2011 | |
| }, | |
| { | |
| "epoch": 3.2743870631194576, | |
| "grad_norm": 0.5934306639774838, | |
| "learning_rate": 2.5677730590215792e-05, | |
| "loss": 0.2411, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14816632866859436, | |
| "step": 3140, | |
| "valid_targets_mean": 5301.6, | |
| "valid_targets_min": 2674 | |
| }, | |
| { | |
| "epoch": 3.279603547209181, | |
| "grad_norm": 0.5974209408596866, | |
| "learning_rate": 2.5627846514784553e-05, | |
| "loss": 0.2295, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10846276581287384, | |
| "step": 3145, | |
| "valid_targets_mean": 2980.1, | |
| "valid_targets_min": 1482 | |
| }, | |
| { | |
| "epoch": 3.2848200312989047, | |
| "grad_norm": 0.5306825937139009, | |
| "learning_rate": 2.5577924388527847e-05, | |
| "loss": 0.2395, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10336439311504364, | |
| "step": 3150, | |
| "valid_targets_mean": 3738.9, | |
| "valid_targets_min": 904 | |
| }, | |
| { | |
| "epoch": 3.290036515388628, | |
| "grad_norm": 0.6225620231982436, | |
| "learning_rate": 2.552796454897766e-05, | |
| "loss": 0.2352, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12323644757270813, | |
| "step": 3155, | |
| "valid_targets_mean": 3801.8, | |
| "valid_targets_min": 1194 | |
| }, | |
| { | |
| "epoch": 3.2952529994783517, | |
| "grad_norm": 0.6147982907325058, | |
| "learning_rate": 2.5477967333920942e-05, | |
| "loss": 0.2474, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12212170660495758, | |
| "step": 3160, | |
| "valid_targets_mean": 4188.1, | |
| "valid_targets_min": 1658 | |
| }, | |
| { | |
| "epoch": 3.300469483568075, | |
| "grad_norm": 0.6309488217192896, | |
| "learning_rate": 2.5427933081397357e-05, | |
| "loss": 0.2448, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1190255880355835, | |
| "step": 3165, | |
| "valid_targets_mean": 3323.8, | |
| "valid_targets_min": 1884 | |
| }, | |
| { | |
| "epoch": 3.3056859676577988, | |
| "grad_norm": 0.5975980678539443, | |
| "learning_rate": 2.5377862129696966e-05, | |
| "loss": 0.2455, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14298589527606964, | |
| "step": 3170, | |
| "valid_targets_mean": 4616.2, | |
| "valid_targets_min": 1185 | |
| }, | |
| { | |
| "epoch": 3.310902451747522, | |
| "grad_norm": 0.5890378180335619, | |
| "learning_rate": 2.5327754817357974e-05, | |
| "loss": 0.2468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11331599205732346, | |
| "step": 3175, | |
| "valid_targets_mean": 3698.1, | |
| "valid_targets_min": 1862 | |
| }, | |
| { | |
| "epoch": 3.316118935837246, | |
| "grad_norm": 0.5418236740409021, | |
| "learning_rate": 2.527761148316443e-05, | |
| "loss": 0.2541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12370961159467697, | |
| "step": 3180, | |
| "valid_targets_mean": 4769.2, | |
| "valid_targets_min": 2734 | |
| }, | |
| { | |
| "epoch": 3.321335419926969, | |
| "grad_norm": 0.6340075634469478, | |
| "learning_rate": 2.522743246614392e-05, | |
| "loss": 0.2466, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11277404427528381, | |
| "step": 3185, | |
| "valid_targets_mean": 3229.9, | |
| "valid_targets_min": 644 | |
| }, | |
| { | |
| "epoch": 3.326551904016693, | |
| "grad_norm": 0.6242647574928978, | |
| "learning_rate": 2.5177218105565306e-05, | |
| "loss": 0.237, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11351889371871948, | |
| "step": 3190, | |
| "valid_targets_mean": 3360.5, | |
| "valid_targets_min": 1397 | |
| }, | |
| { | |
| "epoch": 3.331768388106416, | |
| "grad_norm": 0.7005521267123596, | |
| "learning_rate": 2.5126968740936386e-05, | |
| "loss": 0.2283, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10776380449533463, | |
| "step": 3195, | |
| "valid_targets_mean": 3579.5, | |
| "valid_targets_min": 1180 | |
| }, | |
| { | |
| "epoch": 3.33698487219614, | |
| "grad_norm": 0.5877341187968578, | |
| "learning_rate": 2.5076684712001655e-05, | |
| "loss": 0.2329, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12471753358840942, | |
| "step": 3200, | |
| "valid_targets_mean": 4061.2, | |
| "valid_targets_min": 2456 | |
| }, | |
| { | |
| "epoch": 3.3422013562858632, | |
| "grad_norm": 0.5940732961392972, | |
| "learning_rate": 2.5026366358739963e-05, | |
| "loss": 0.233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09739533066749573, | |
| "step": 3205, | |
| "valid_targets_mean": 3381.2, | |
| "valid_targets_min": 1920 | |
| }, | |
| { | |
| "epoch": 3.347417840375587, | |
| "grad_norm": 0.6480998746879608, | |
| "learning_rate": 2.4976014021362236e-05, | |
| "loss": 0.2221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11704832315444946, | |
| "step": 3210, | |
| "valid_targets_mean": 3049.6, | |
| "valid_targets_min": 857 | |
| }, | |
| { | |
| "epoch": 3.3526343244653103, | |
| "grad_norm": 0.7082763026234205, | |
| "learning_rate": 2.492562804030918e-05, | |
| "loss": 0.2287, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12999625504016876, | |
| "step": 3215, | |
| "valid_targets_mean": 2781.9, | |
| "valid_targets_min": 720 | |
| }, | |
| { | |
| "epoch": 3.357850808555034, | |
| "grad_norm": 0.6431178094290348, | |
| "learning_rate": 2.487520875624895e-05, | |
| "loss": 0.2371, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12872865796089172, | |
| "step": 3220, | |
| "valid_targets_mean": 3739.0, | |
| "valid_targets_min": 1801 | |
| }, | |
| { | |
| "epoch": 3.3630672926447573, | |
| "grad_norm": 0.6439399943836848, | |
| "learning_rate": 2.48247565100749e-05, | |
| "loss": 0.2576, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13747379183769226, | |
| "step": 3225, | |
| "valid_targets_mean": 3661.0, | |
| "valid_targets_min": 1510 | |
| }, | |
| { | |
| "epoch": 3.368283776734481, | |
| "grad_norm": 0.6405788216776347, | |
| "learning_rate": 2.4774271642903213e-05, | |
| "loss": 0.2323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11055783927440643, | |
| "step": 3230, | |
| "valid_targets_mean": 2943.8, | |
| "valid_targets_min": 1305 | |
| }, | |
| { | |
| "epoch": 3.3735002608242044, | |
| "grad_norm": 0.5659231088879737, | |
| "learning_rate": 2.4723754496070652e-05, | |
| "loss": 0.2403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10495679080486298, | |
| "step": 3235, | |
| "valid_targets_mean": 4025.0, | |
| "valid_targets_min": 996 | |
| }, | |
| { | |
| "epoch": 3.378716744913928, | |
| "grad_norm": 0.6204010998347846, | |
| "learning_rate": 2.4673205411132218e-05, | |
| "loss": 0.2338, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11923807114362717, | |
| "step": 3240, | |
| "valid_targets_mean": 3508.8, | |
| "valid_targets_min": 2001 | |
| }, | |
| { | |
| "epoch": 3.3839332290036515, | |
| "grad_norm": 0.49881753205546003, | |
| "learning_rate": 2.462262472985886e-05, | |
| "loss": 0.2334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10014287382364273, | |
| "step": 3245, | |
| "valid_targets_mean": 5109.4, | |
| "valid_targets_min": 1805 | |
| }, | |
| { | |
| "epoch": 3.389149713093375, | |
| "grad_norm": 0.5346731991783655, | |
| "learning_rate": 2.457201279423514e-05, | |
| "loss": 0.2361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10690750926733017, | |
| "step": 3250, | |
| "valid_targets_mean": 4733.8, | |
| "valid_targets_min": 1511 | |
| }, | |
| { | |
| "epoch": 3.3943661971830985, | |
| "grad_norm": 0.6290502597121308, | |
| "learning_rate": 2.4521369946456954e-05, | |
| "loss": 0.2302, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1293402910232544, | |
| "step": 3255, | |
| "valid_targets_mean": 3592.4, | |
| "valid_targets_min": 974 | |
| }, | |
| { | |
| "epoch": 3.3995826812728223, | |
| "grad_norm": 0.5933854888123136, | |
| "learning_rate": 2.447069652892918e-05, | |
| "loss": 0.2362, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11077006161212921, | |
| "step": 3260, | |
| "valid_targets_mean": 4055.8, | |
| "valid_targets_min": 2339 | |
| }, | |
| { | |
| "epoch": 3.4047991653625456, | |
| "grad_norm": 0.6331904289180895, | |
| "learning_rate": 2.4419992884263404e-05, | |
| "loss": 0.2345, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13219593465328217, | |
| "step": 3265, | |
| "valid_targets_mean": 3922.5, | |
| "valid_targets_min": 1249 | |
| }, | |
| { | |
| "epoch": 3.4100156494522693, | |
| "grad_norm": 0.6246606137263171, | |
| "learning_rate": 2.436925935527557e-05, | |
| "loss": 0.2403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11937537789344788, | |
| "step": 3270, | |
| "valid_targets_mean": 3697.8, | |
| "valid_targets_min": 1223 | |
| }, | |
| { | |
| "epoch": 3.4152321335419926, | |
| "grad_norm": 0.6872903713754286, | |
| "learning_rate": 2.4318496284983676e-05, | |
| "loss": 0.2484, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12470114231109619, | |
| "step": 3275, | |
| "valid_targets_mean": 3235.8, | |
| "valid_targets_min": 1318 | |
| }, | |
| { | |
| "epoch": 3.4204486176317164, | |
| "grad_norm": 0.6569196678880491, | |
| "learning_rate": 2.4267704016605453e-05, | |
| "loss": 0.2395, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12077215313911438, | |
| "step": 3280, | |
| "valid_targets_mean": 2734.0, | |
| "valid_targets_min": 1002 | |
| }, | |
| { | |
| "epoch": 3.4256651017214397, | |
| "grad_norm": 0.6375855008441624, | |
| "learning_rate": 2.421688289355606e-05, | |
| "loss": 0.2575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1316242665052414, | |
| "step": 3285, | |
| "valid_targets_mean": 3840.6, | |
| "valid_targets_min": 845 | |
| }, | |
| { | |
| "epoch": 3.4308815858111634, | |
| "grad_norm": 0.5787766193876774, | |
| "learning_rate": 2.4166033259445723e-05, | |
| "loss": 0.2328, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12017252296209335, | |
| "step": 3290, | |
| "valid_targets_mean": 4119.0, | |
| "valid_targets_min": 1431 | |
| }, | |
| { | |
| "epoch": 3.4360980699008867, | |
| "grad_norm": 0.5958786685843589, | |
| "learning_rate": 2.4115155458077453e-05, | |
| "loss": 0.2524, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1369277834892273, | |
| "step": 3295, | |
| "valid_targets_mean": 4231.4, | |
| "valid_targets_min": 1147 | |
| }, | |
| { | |
| "epoch": 3.4413145539906105, | |
| "grad_norm": 0.629488920492327, | |
| "learning_rate": 2.4064249833444702e-05, | |
| "loss": 0.2459, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11386667191982269, | |
| "step": 3300, | |
| "valid_targets_mean": 2995.9, | |
| "valid_targets_min": 992 | |
| }, | |
| { | |
| "epoch": 3.446531038080334, | |
| "grad_norm": 0.5906647614148998, | |
| "learning_rate": 2.401331672972904e-05, | |
| "loss": 0.2427, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10709070414304733, | |
| "step": 3305, | |
| "valid_targets_mean": 3318.8, | |
| "valid_targets_min": 1211 | |
| }, | |
| { | |
| "epoch": 3.4517475221700575, | |
| "grad_norm": 0.66902878097557, | |
| "learning_rate": 2.3962356491297814e-05, | |
| "loss": 0.2409, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10157613456249237, | |
| "step": 3310, | |
| "valid_targets_mean": 2684.8, | |
| "valid_targets_min": 1889 | |
| }, | |
| { | |
| "epoch": 3.456964006259781, | |
| "grad_norm": 0.6438469070347622, | |
| "learning_rate": 2.391136946270186e-05, | |
| "loss": 0.2434, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11882095038890839, | |
| "step": 3315, | |
| "valid_targets_mean": 3471.5, | |
| "valid_targets_min": 1624 | |
| }, | |
| { | |
| "epoch": 3.4621804903495046, | |
| "grad_norm": 0.5665296038850571, | |
| "learning_rate": 2.3860355988673122e-05, | |
| "loss": 0.243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12186641246080399, | |
| "step": 3320, | |
| "valid_targets_mean": 4212.0, | |
| "valid_targets_min": 2763 | |
| }, | |
| { | |
| "epoch": 3.467396974439228, | |
| "grad_norm": 0.8261952436595091, | |
| "learning_rate": 2.380931641412236e-05, | |
| "loss": 0.2431, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10220103710889816, | |
| "step": 3325, | |
| "valid_targets_mean": 2885.9, | |
| "valid_targets_min": 1504 | |
| }, | |
| { | |
| "epoch": 3.4726134585289516, | |
| "grad_norm": 0.5778027982554235, | |
| "learning_rate": 2.3758251084136794e-05, | |
| "loss": 0.2247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11044412851333618, | |
| "step": 3330, | |
| "valid_targets_mean": 4309.1, | |
| "valid_targets_min": 1437 | |
| }, | |
| { | |
| "epoch": 3.477829942618675, | |
| "grad_norm": 0.604357169536002, | |
| "learning_rate": 2.3707160343977792e-05, | |
| "loss": 0.2312, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10773244500160217, | |
| "step": 3335, | |
| "valid_targets_mean": 3503.0, | |
| "valid_targets_min": 1844 | |
| }, | |
| { | |
| "epoch": 3.4830464267083987, | |
| "grad_norm": 0.6144265236752997, | |
| "learning_rate": 2.3656044539078523e-05, | |
| "loss": 0.2335, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12560561299324036, | |
| "step": 3340, | |
| "valid_targets_mean": 4120.4, | |
| "valid_targets_min": 2330 | |
| }, | |
| { | |
| "epoch": 3.488262910798122, | |
| "grad_norm": 0.712714166393161, | |
| "learning_rate": 2.3604904015041617e-05, | |
| "loss": 0.2396, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11547691375017166, | |
| "step": 3345, | |
| "valid_targets_mean": 3213.6, | |
| "valid_targets_min": 900 | |
| }, | |
| { | |
| "epoch": 3.4934793948878458, | |
| "grad_norm": 0.5601589701996351, | |
| "learning_rate": 2.3553739117636835e-05, | |
| "loss": 0.2341, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12281596660614014, | |
| "step": 3350, | |
| "valid_targets_mean": 5137.2, | |
| "valid_targets_min": 1713 | |
| }, | |
| { | |
| "epoch": 3.498695878977569, | |
| "grad_norm": 0.6300286635673807, | |
| "learning_rate": 2.3502550192798726e-05, | |
| "loss": 0.2422, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14864148199558258, | |
| "step": 3355, | |
| "valid_targets_mean": 4359.1, | |
| "valid_targets_min": 981 | |
| }, | |
| { | |
| "epoch": 3.5039123630672924, | |
| "grad_norm": 0.6346112208793668, | |
| "learning_rate": 2.345133758662431e-05, | |
| "loss": 0.2323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12066750973463058, | |
| "step": 3360, | |
| "valid_targets_mean": 3986.2, | |
| "valid_targets_min": 2098 | |
| }, | |
| { | |
| "epoch": 3.509128847157016, | |
| "grad_norm": 0.6628385816854059, | |
| "learning_rate": 2.3400101645370702e-05, | |
| "loss": 0.2435, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1334524154663086, | |
| "step": 3365, | |
| "valid_targets_mean": 3481.6, | |
| "valid_targets_min": 1107 | |
| }, | |
| { | |
| "epoch": 3.51434533124674, | |
| "grad_norm": 0.5853903492194051, | |
| "learning_rate": 2.3348842715452803e-05, | |
| "loss": 0.235, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13459880650043488, | |
| "step": 3370, | |
| "valid_targets_mean": 4699.1, | |
| "valid_targets_min": 2273 | |
| }, | |
| { | |
| "epoch": 3.519561815336463, | |
| "grad_norm": 0.6074809865962525, | |
| "learning_rate": 2.3297561143440932e-05, | |
| "loss": 0.2286, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12002347409725189, | |
| "step": 3375, | |
| "valid_targets_mean": 4053.8, | |
| "valid_targets_min": 1920 | |
| }, | |
| { | |
| "epoch": 3.5247782994261865, | |
| "grad_norm": 0.5392911772954323, | |
| "learning_rate": 2.3246257276058507e-05, | |
| "loss": 0.2186, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11810530722141266, | |
| "step": 3380, | |
| "valid_targets_mean": 5109.0, | |
| "valid_targets_min": 3856 | |
| }, | |
| { | |
| "epoch": 3.5299947835159102, | |
| "grad_norm": 0.7041882354651804, | |
| "learning_rate": 2.3194931460179677e-05, | |
| "loss": 0.2274, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09251343458890915, | |
| "step": 3385, | |
| "valid_targets_mean": 3035.6, | |
| "valid_targets_min": 1375 | |
| }, | |
| { | |
| "epoch": 3.535211267605634, | |
| "grad_norm": 0.6225653040256555, | |
| "learning_rate": 2.314358404282699e-05, | |
| "loss": 0.2299, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10462531447410583, | |
| "step": 3390, | |
| "valid_targets_mean": 3503.1, | |
| "valid_targets_min": 906 | |
| }, | |
| { | |
| "epoch": 3.5404277516953573, | |
| "grad_norm": 0.6537295993273218, | |
| "learning_rate": 2.309221537116906e-05, | |
| "loss": 0.2203, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1109628677368164, | |
| "step": 3395, | |
| "valid_targets_mean": 3281.4, | |
| "valid_targets_min": 1874 | |
| }, | |
| { | |
| "epoch": 3.5456442357850806, | |
| "grad_norm": 0.5918322744142531, | |
| "learning_rate": 2.3040825792518188e-05, | |
| "loss": 0.219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10810720920562744, | |
| "step": 3400, | |
| "valid_targets_mean": 3377.8, | |
| "valid_targets_min": 1877 | |
| }, | |
| { | |
| "epoch": 3.5508607198748043, | |
| "grad_norm": 0.63578297833589, | |
| "learning_rate": 2.298941565432804e-05, | |
| "loss": 0.2283, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12137743830680847, | |
| "step": 3405, | |
| "valid_targets_mean": 4080.1, | |
| "valid_targets_min": 1697 | |
| }, | |
| { | |
| "epoch": 3.556077203964528, | |
| "grad_norm": 0.7154437646967018, | |
| "learning_rate": 2.2937985304191285e-05, | |
| "loss": 0.2271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12459730356931686, | |
| "step": 3410, | |
| "valid_targets_mean": 3291.1, | |
| "valid_targets_min": 1804 | |
| }, | |
| { | |
| "epoch": 3.5612936880542514, | |
| "grad_norm": 0.639551638250752, | |
| "learning_rate": 2.2886535089837247e-05, | |
| "loss": 0.2435, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1219329908490181, | |
| "step": 3415, | |
| "valid_targets_mean": 3880.0, | |
| "valid_targets_min": 1922 | |
| }, | |
| { | |
| "epoch": 3.5665101721439747, | |
| "grad_norm": 0.636970261021399, | |
| "learning_rate": 2.2835065359129564e-05, | |
| "loss": 0.2322, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11701183021068573, | |
| "step": 3420, | |
| "valid_targets_mean": 3474.0, | |
| "valid_targets_min": 2114 | |
| }, | |
| { | |
| "epoch": 3.5717266562336984, | |
| "grad_norm": 0.6520892474397556, | |
| "learning_rate": 2.278357646006381e-05, | |
| "loss": 0.2279, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10156352818012238, | |
| "step": 3425, | |
| "valid_targets_mean": 3308.4, | |
| "valid_targets_min": 993 | |
| }, | |
| { | |
| "epoch": 3.576943140323422, | |
| "grad_norm": 0.7097854318418587, | |
| "learning_rate": 2.273206874076518e-05, | |
| "loss": 0.2221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10659400373697281, | |
| "step": 3430, | |
| "valid_targets_mean": 3287.5, | |
| "valid_targets_min": 1275 | |
| }, | |
| { | |
| "epoch": 3.5821596244131455, | |
| "grad_norm": 0.6344493048433608, | |
| "learning_rate": 2.26805425494861e-05, | |
| "loss": 0.2261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1291656196117401, | |
| "step": 3435, | |
| "valid_targets_mean": 4199.4, | |
| "valid_targets_min": 2250 | |
| }, | |
| { | |
| "epoch": 3.587376108502869, | |
| "grad_norm": 0.6524045797339227, | |
| "learning_rate": 2.26289982346039e-05, | |
| "loss": 0.23, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1383363902568817, | |
| "step": 3440, | |
| "valid_targets_mean": 4590.0, | |
| "valid_targets_min": 2470 | |
| }, | |
| { | |
| "epoch": 3.5925925925925926, | |
| "grad_norm": 0.5432066934989859, | |
| "learning_rate": 2.2577436144618438e-05, | |
| "loss": 0.2364, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11627845466136932, | |
| "step": 3445, | |
| "valid_targets_mean": 4423.4, | |
| "valid_targets_min": 2935 | |
| }, | |
| { | |
| "epoch": 3.5978090766823163, | |
| "grad_norm": 0.5762676333906214, | |
| "learning_rate": 2.2525856628149754e-05, | |
| "loss": 0.2312, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11467200517654419, | |
| "step": 3450, | |
| "valid_targets_mean": 4339.2, | |
| "valid_targets_min": 1817 | |
| }, | |
| { | |
| "epoch": 3.6030255607720396, | |
| "grad_norm": 0.6224994746933468, | |
| "learning_rate": 2.2474260033935718e-05, | |
| "loss": 0.2324, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12807044386863708, | |
| "step": 3455, | |
| "valid_targets_mean": 3971.9, | |
| "valid_targets_min": 1727 | |
| }, | |
| { | |
| "epoch": 3.608242044861763, | |
| "grad_norm": 0.5837933001877118, | |
| "learning_rate": 2.2422646710829653e-05, | |
| "loss": 0.2321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08826511353254318, | |
| "step": 3460, | |
| "valid_targets_mean": 3147.2, | |
| "valid_targets_min": 1622 | |
| }, | |
| { | |
| "epoch": 3.6134585289514867, | |
| "grad_norm": 0.46446182450022666, | |
| "learning_rate": 2.2371017007798005e-05, | |
| "loss": 0.2045, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07843726873397827, | |
| "step": 3465, | |
| "valid_targets_mean": 6618.4, | |
| "valid_targets_min": 3896 | |
| }, | |
| { | |
| "epoch": 3.6186750130412104, | |
| "grad_norm": 0.37359976470798095, | |
| "learning_rate": 2.231937127391795e-05, | |
| "loss": 0.1706, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06784458458423615, | |
| "step": 3470, | |
| "valid_targets_mean": 6955.8, | |
| "valid_targets_min": 5230 | |
| }, | |
| { | |
| "epoch": 3.6238914971309337, | |
| "grad_norm": 0.5065041373031558, | |
| "learning_rate": 2.226770985837507e-05, | |
| "loss": 0.1598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06437147408723831, | |
| "step": 3475, | |
| "valid_targets_mean": 5736.5, | |
| "valid_targets_min": 4342 | |
| }, | |
| { | |
| "epoch": 3.629107981220657, | |
| "grad_norm": 0.4493552660249773, | |
| "learning_rate": 2.2216033110460952e-05, | |
| "loss": 0.1629, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07926934957504272, | |
| "step": 3480, | |
| "valid_targets_mean": 5118.6, | |
| "valid_targets_min": 4321 | |
| }, | |
| { | |
| "epoch": 3.634324465310381, | |
| "grad_norm": 0.4662571711014913, | |
| "learning_rate": 2.216434137957087e-05, | |
| "loss": 0.1708, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0991881936788559, | |
| "step": 3485, | |
| "valid_targets_mean": 5821.6, | |
| "valid_targets_min": 4088 | |
| }, | |
| { | |
| "epoch": 3.6395409494001045, | |
| "grad_norm": 0.437945815481701, | |
| "learning_rate": 2.2112635015201383e-05, | |
| "loss": 0.1695, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07521703839302063, | |
| "step": 3490, | |
| "valid_targets_mean": 6549.0, | |
| "valid_targets_min": 4413 | |
| }, | |
| { | |
| "epoch": 3.644757433489828, | |
| "grad_norm": 0.4203676431407671, | |
| "learning_rate": 2.2060914366948004e-05, | |
| "loss": 0.173, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10256016254425049, | |
| "step": 3495, | |
| "valid_targets_mean": 7627.8, | |
| "valid_targets_min": 5355 | |
| }, | |
| { | |
| "epoch": 3.649973917579551, | |
| "grad_norm": 0.4407626169642318, | |
| "learning_rate": 2.2009179784502802e-05, | |
| "loss": 0.1606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07370106875896454, | |
| "step": 3500, | |
| "valid_targets_mean": 5261.0, | |
| "valid_targets_min": 2766 | |
| }, | |
| { | |
| "epoch": 3.655190401669275, | |
| "grad_norm": 0.39784096786707623, | |
| "learning_rate": 2.1957431617652074e-05, | |
| "loss": 0.1936, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08131895959377289, | |
| "step": 3505, | |
| "valid_targets_mean": 6799.0, | |
| "valid_targets_min": 4926 | |
| }, | |
| { | |
| "epoch": 3.6604068857589986, | |
| "grad_norm": 0.4414140680253097, | |
| "learning_rate": 2.1905670216273967e-05, | |
| "loss": 0.163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07633579522371292, | |
| "step": 3510, | |
| "valid_targets_mean": 6284.2, | |
| "valid_targets_min": 4406 | |
| }, | |
| { | |
| "epoch": 3.665623369848722, | |
| "grad_norm": 0.4826282530601143, | |
| "learning_rate": 2.185389593033609e-05, | |
| "loss": 0.1742, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13399258255958557, | |
| "step": 3515, | |
| "valid_targets_mean": 6786.8, | |
| "valid_targets_min": 4715 | |
| }, | |
| { | |
| "epoch": 3.6708398539384453, | |
| "grad_norm": 0.42283392904970313, | |
| "learning_rate": 2.1802109109893184e-05, | |
| "loss": 0.157, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07545018196105957, | |
| "step": 3520, | |
| "valid_targets_mean": 5691.8, | |
| "valid_targets_min": 4971 | |
| }, | |
| { | |
| "epoch": 3.676056338028169, | |
| "grad_norm": 0.42231874149001375, | |
| "learning_rate": 2.1750310105084725e-05, | |
| "loss": 0.1681, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06555106490850449, | |
| "step": 3525, | |
| "valid_targets_mean": 5637.9, | |
| "valid_targets_min": 4673 | |
| }, | |
| { | |
| "epoch": 3.6812728221178928, | |
| "grad_norm": 0.3820602063554853, | |
| "learning_rate": 2.1698499266132585e-05, | |
| "loss": 0.1529, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06583434343338013, | |
| "step": 3530, | |
| "valid_targets_mean": 5416.5, | |
| "valid_targets_min": 4507 | |
| }, | |
| { | |
| "epoch": 3.686489306207616, | |
| "grad_norm": 0.4324904969593689, | |
| "learning_rate": 2.164667694333863e-05, | |
| "loss": 0.1488, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07300804555416107, | |
| "step": 3535, | |
| "valid_targets_mean": 6103.2, | |
| "valid_targets_min": 4560 | |
| }, | |
| { | |
| "epoch": 3.6917057902973394, | |
| "grad_norm": 0.4629649263259508, | |
| "learning_rate": 2.159484348708239e-05, | |
| "loss": 0.165, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08109663426876068, | |
| "step": 3540, | |
| "valid_targets_mean": 5827.8, | |
| "valid_targets_min": 4663 | |
| }, | |
| { | |
| "epoch": 3.696922274387063, | |
| "grad_norm": 0.5966962543678572, | |
| "learning_rate": 2.1542999247818653e-05, | |
| "loss": 0.1761, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12644332647323608, | |
| "step": 3545, | |
| "valid_targets_mean": 2712.2, | |
| "valid_targets_min": 182 | |
| }, | |
| { | |
| "epoch": 3.702138758476787, | |
| "grad_norm": 0.4046776410114913, | |
| "learning_rate": 2.149114457607512e-05, | |
| "loss": 0.1411, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06464780122041702, | |
| "step": 3550, | |
| "valid_targets_mean": 5468.2, | |
| "valid_targets_min": 3245 | |
| }, | |
| { | |
| "epoch": 3.70735524256651, | |
| "grad_norm": 0.44765878423351213, | |
| "learning_rate": 2.1439279822450034e-05, | |
| "loss": 0.1494, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07346566766500473, | |
| "step": 3555, | |
| "valid_targets_mean": 6488.1, | |
| "valid_targets_min": 4896 | |
| }, | |
| { | |
| "epoch": 3.7125717266562335, | |
| "grad_norm": 0.4029478622651516, | |
| "learning_rate": 2.1387405337609787e-05, | |
| "loss": 0.15, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07606856524944305, | |
| "step": 3560, | |
| "valid_targets_mean": 7451.6, | |
| "valid_targets_min": 4704 | |
| }, | |
| { | |
| "epoch": 3.7177882107459572, | |
| "grad_norm": 0.40518971706836593, | |
| "learning_rate": 2.1335521472286578e-05, | |
| "loss": 0.1811, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07901161909103394, | |
| "step": 3565, | |
| "valid_targets_mean": 6701.9, | |
| "valid_targets_min": 4659 | |
| }, | |
| { | |
| "epoch": 3.723004694835681, | |
| "grad_norm": 0.36733160748581695, | |
| "learning_rate": 2.1283628577276034e-05, | |
| "loss": 0.1575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06584405899047852, | |
| "step": 3570, | |
| "valid_targets_mean": 7120.9, | |
| "valid_targets_min": 4442 | |
| }, | |
| { | |
| "epoch": 3.7282211789254043, | |
| "grad_norm": 0.4137364200036521, | |
| "learning_rate": 2.1231727003434816e-05, | |
| "loss": 0.159, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07803145051002502, | |
| "step": 3575, | |
| "valid_targets_mean": 6407.0, | |
| "valid_targets_min": 4670 | |
| }, | |
| { | |
| "epoch": 3.7334376630151276, | |
| "grad_norm": 0.39285656487365483, | |
| "learning_rate": 2.1179817101678272e-05, | |
| "loss": 0.1652, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07262278348207474, | |
| "step": 3580, | |
| "valid_targets_mean": 7604.9, | |
| "valid_targets_min": 3545 | |
| }, | |
| { | |
| "epoch": 3.7386541471048513, | |
| "grad_norm": 0.4109575631243786, | |
| "learning_rate": 2.1127899222978064e-05, | |
| "loss": 0.1831, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09729693830013275, | |
| "step": 3585, | |
| "valid_targets_mean": 7567.9, | |
| "valid_targets_min": 5634 | |
| }, | |
| { | |
| "epoch": 3.743870631194575, | |
| "grad_norm": 0.423079220667748, | |
| "learning_rate": 2.107597371835979e-05, | |
| "loss": 0.1937, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09883062541484833, | |
| "step": 3590, | |
| "valid_targets_mean": 6300.1, | |
| "valid_targets_min": 4449 | |
| }, | |
| { | |
| "epoch": 3.7490871152842984, | |
| "grad_norm": 0.3945299565054771, | |
| "learning_rate": 2.1024040938900586e-05, | |
| "loss": 0.1733, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07824397087097168, | |
| "step": 3595, | |
| "valid_targets_mean": 6269.6, | |
| "valid_targets_min": 4800 | |
| }, | |
| { | |
| "epoch": 3.7543035993740217, | |
| "grad_norm": 0.45245320476298123, | |
| "learning_rate": 2.097210123572679e-05, | |
| "loss": 0.1689, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08230909705162048, | |
| "step": 3600, | |
| "valid_targets_mean": 5825.2, | |
| "valid_targets_min": 4455 | |
| }, | |
| { | |
| "epoch": 3.7595200834637454, | |
| "grad_norm": 0.43543634916658713, | |
| "learning_rate": 2.092015496001155e-05, | |
| "loss": 0.163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08002972602844238, | |
| "step": 3605, | |
| "valid_targets_mean": 6547.0, | |
| "valid_targets_min": 4393 | |
| }, | |
| { | |
| "epoch": 3.764736567553469, | |
| "grad_norm": 0.43174540759732544, | |
| "learning_rate": 2.0868202462972458e-05, | |
| "loss": 0.1706, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08252950012683868, | |
| "step": 3610, | |
| "valid_targets_mean": 5936.0, | |
| "valid_targets_min": 4710 | |
| }, | |
| { | |
| "epoch": 3.7699530516431925, | |
| "grad_norm": 0.410505844208886, | |
| "learning_rate": 2.0816244095869157e-05, | |
| "loss": 0.1631, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07169058918952942, | |
| "step": 3615, | |
| "valid_targets_mean": 6396.4, | |
| "valid_targets_min": 5001 | |
| }, | |
| { | |
| "epoch": 3.775169535732916, | |
| "grad_norm": 0.4383659835076105, | |
| "learning_rate": 2.0764280210001e-05, | |
| "loss": 0.1719, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08996845036745071, | |
| "step": 3620, | |
| "valid_targets_mean": 6635.6, | |
| "valid_targets_min": 4909 | |
| }, | |
| { | |
| "epoch": 3.7803860198226396, | |
| "grad_norm": 0.45933494092313853, | |
| "learning_rate": 2.0712311156704624e-05, | |
| "loss": 0.1684, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07435374706983566, | |
| "step": 3625, | |
| "valid_targets_mean": 5196.2, | |
| "valid_targets_min": 2527 | |
| }, | |
| { | |
| "epoch": 3.7856025039123633, | |
| "grad_norm": 0.4056053332538491, | |
| "learning_rate": 2.066033728735162e-05, | |
| "loss": 0.1685, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07838048040866852, | |
| "step": 3630, | |
| "valid_targets_mean": 6386.4, | |
| "valid_targets_min": 4595 | |
| }, | |
| { | |
| "epoch": 3.7908189880020866, | |
| "grad_norm": 0.369324465695597, | |
| "learning_rate": 2.0608358953346146e-05, | |
| "loss": 0.1447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07389973104000092, | |
| "step": 3635, | |
| "valid_targets_mean": 6966.0, | |
| "valid_targets_min": 4894 | |
| }, | |
| { | |
| "epoch": 3.79603547209181, | |
| "grad_norm": 0.3936185429559105, | |
| "learning_rate": 2.0556376506122535e-05, | |
| "loss": 0.1566, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07261225581169128, | |
| "step": 3640, | |
| "valid_targets_mean": 6822.6, | |
| "valid_targets_min": 4829 | |
| }, | |
| { | |
| "epoch": 3.8012519561815337, | |
| "grad_norm": 0.48256139613445764, | |
| "learning_rate": 2.0504390297142935e-05, | |
| "loss": 0.1758, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08893297612667084, | |
| "step": 3645, | |
| "valid_targets_mean": 7109.1, | |
| "valid_targets_min": 5223 | |
| }, | |
| { | |
| "epoch": 3.8064684402712574, | |
| "grad_norm": 0.4393335630123843, | |
| "learning_rate": 2.0452400677894936e-05, | |
| "loss": 0.17, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0912787914276123, | |
| "step": 3650, | |
| "valid_targets_mean": 6694.9, | |
| "valid_targets_min": 4966 | |
| }, | |
| { | |
| "epoch": 3.8116849243609807, | |
| "grad_norm": 0.4360276030112334, | |
| "learning_rate": 2.0400407999889165e-05, | |
| "loss": 0.1631, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07746833562850952, | |
| "step": 3655, | |
| "valid_targets_mean": 5583.5, | |
| "valid_targets_min": 3782 | |
| }, | |
| { | |
| "epoch": 3.816901408450704, | |
| "grad_norm": 0.4212868269457988, | |
| "learning_rate": 2.0348412614656952e-05, | |
| "loss": 0.1555, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06774541735649109, | |
| "step": 3660, | |
| "valid_targets_mean": 5549.4, | |
| "valid_targets_min": 3551 | |
| }, | |
| { | |
| "epoch": 3.8221178925404278, | |
| "grad_norm": 0.4518618024924554, | |
| "learning_rate": 2.0296414873747915e-05, | |
| "loss": 0.1637, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08855678141117096, | |
| "step": 3665, | |
| "valid_targets_mean": 5986.4, | |
| "valid_targets_min": 3667 | |
| }, | |
| { | |
| "epoch": 3.8273343766301515, | |
| "grad_norm": 0.42039749080278666, | |
| "learning_rate": 2.0244415128727612e-05, | |
| "loss": 0.1673, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09838884323835373, | |
| "step": 3670, | |
| "valid_targets_mean": 5938.0, | |
| "valid_targets_min": 4854 | |
| }, | |
| { | |
| "epoch": 3.832550860719875, | |
| "grad_norm": 0.4345508107843753, | |
| "learning_rate": 2.0192413731175146e-05, | |
| "loss": 0.1585, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06815984845161438, | |
| "step": 3675, | |
| "valid_targets_mean": 5221.1, | |
| "valid_targets_min": 4067 | |
| }, | |
| { | |
| "epoch": 3.837767344809598, | |
| "grad_norm": 0.3908167334207756, | |
| "learning_rate": 2.0140411032680775e-05, | |
| "loss": 0.1568, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08416864275932312, | |
| "step": 3680, | |
| "valid_targets_mean": 7217.8, | |
| "valid_targets_min": 4705 | |
| }, | |
| { | |
| "epoch": 3.842983828899322, | |
| "grad_norm": 0.4347299598878379, | |
| "learning_rate": 2.008840738484359e-05, | |
| "loss": 0.1779, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07899241894483566, | |
| "step": 3685, | |
| "valid_targets_mean": 5841.9, | |
| "valid_targets_min": 4510 | |
| }, | |
| { | |
| "epoch": 3.8482003129890456, | |
| "grad_norm": 0.45740585767043196, | |
| "learning_rate": 2.0036403139269067e-05, | |
| "loss": 0.1441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07235970348119736, | |
| "step": 3690, | |
| "valid_targets_mean": 6076.8, | |
| "valid_targets_min": 4696 | |
| }, | |
| { | |
| "epoch": 3.853416797078769, | |
| "grad_norm": 0.3918658818567728, | |
| "learning_rate": 1.998439864756674e-05, | |
| "loss": 0.1402, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06755511462688446, | |
| "step": 3695, | |
| "valid_targets_mean": 5841.4, | |
| "valid_targets_min": 4465 | |
| }, | |
| { | |
| "epoch": 3.8586332811684922, | |
| "grad_norm": 0.42055620367550356, | |
| "learning_rate": 1.993239426134781e-05, | |
| "loss": 0.1458, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07950583100318909, | |
| "step": 3700, | |
| "valid_targets_mean": 6146.8, | |
| "valid_targets_min": 4883 | |
| }, | |
| { | |
| "epoch": 3.863849765258216, | |
| "grad_norm": 0.3958093278409998, | |
| "learning_rate": 1.988039033222275e-05, | |
| "loss": 0.151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07928058505058289, | |
| "step": 3705, | |
| "valid_targets_mean": 6933.9, | |
| "valid_targets_min": 5350 | |
| }, | |
| { | |
| "epoch": 3.8690662493479397, | |
| "grad_norm": 0.42034423184373537, | |
| "learning_rate": 1.982838721179896e-05, | |
| "loss": 0.1697, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08753135055303574, | |
| "step": 3710, | |
| "valid_targets_mean": 6186.8, | |
| "valid_targets_min": 4815 | |
| }, | |
| { | |
| "epoch": 3.874282733437663, | |
| "grad_norm": 0.4956328070108992, | |
| "learning_rate": 1.9776385251678352e-05, | |
| "loss": 0.1917, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09320716559886932, | |
| "step": 3715, | |
| "valid_targets_mean": 6471.8, | |
| "valid_targets_min": 4435 | |
| }, | |
| { | |
| "epoch": 3.8794992175273864, | |
| "grad_norm": 0.47411182758987847, | |
| "learning_rate": 1.9724384803455013e-05, | |
| "loss": 0.1693, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09804536402225494, | |
| "step": 3720, | |
| "valid_targets_mean": 6043.5, | |
| "valid_targets_min": 4841 | |
| }, | |
| { | |
| "epoch": 3.88471570161711, | |
| "grad_norm": 0.46361484934313935, | |
| "learning_rate": 1.9672386218712794e-05, | |
| "loss": 0.1585, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0813588947057724, | |
| "step": 3725, | |
| "valid_targets_mean": 5626.6, | |
| "valid_targets_min": 4466 | |
| }, | |
| { | |
| "epoch": 3.889932185706834, | |
| "grad_norm": 0.3981296378214338, | |
| "learning_rate": 1.962038984902295e-05, | |
| "loss": 0.1629, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08089756965637207, | |
| "step": 3730, | |
| "valid_targets_mean": 5946.9, | |
| "valid_targets_min": 4733 | |
| }, | |
| { | |
| "epoch": 3.895148669796557, | |
| "grad_norm": 0.44702845608585673, | |
| "learning_rate": 1.9568396045941766e-05, | |
| "loss": 0.1702, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08252634108066559, | |
| "step": 3735, | |
| "valid_targets_mean": 6208.2, | |
| "valid_targets_min": 4384 | |
| }, | |
| { | |
| "epoch": 3.9003651538862805, | |
| "grad_norm": 0.40389732639994486, | |
| "learning_rate": 1.951640516100817e-05, | |
| "loss": 0.1786, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0899917483329773, | |
| "step": 3740, | |
| "valid_targets_mean": 7067.5, | |
| "valid_targets_min": 5490 | |
| }, | |
| { | |
| "epoch": 3.905581637976004, | |
| "grad_norm": 0.39317045684307433, | |
| "learning_rate": 1.9464417545741352e-05, | |
| "loss": 0.1651, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0825595110654831, | |
| "step": 3745, | |
| "valid_targets_mean": 6640.4, | |
| "valid_targets_min": 3599 | |
| }, | |
| { | |
| "epoch": 3.910798122065728, | |
| "grad_norm": 0.4034498400462444, | |
| "learning_rate": 1.941243355163841e-05, | |
| "loss": 0.1696, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10053454339504242, | |
| "step": 3750, | |
| "valid_targets_mean": 7752.0, | |
| "valid_targets_min": 5147 | |
| }, | |
| { | |
| "epoch": 3.9160146061554513, | |
| "grad_norm": 0.36066532901536635, | |
| "learning_rate": 1.936045353017195e-05, | |
| "loss": 0.1541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0794573724269867, | |
| "step": 3755, | |
| "valid_targets_mean": 8426.9, | |
| "valid_targets_min": 4903 | |
| }, | |
| { | |
| "epoch": 3.9212310902451746, | |
| "grad_norm": 0.4801452019232369, | |
| "learning_rate": 1.9308477832787724e-05, | |
| "loss": 0.1581, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09317049384117126, | |
| "step": 3760, | |
| "valid_targets_mean": 6670.1, | |
| "valid_targets_min": 4444 | |
| }, | |
| { | |
| "epoch": 3.9264475743348983, | |
| "grad_norm": 0.3973077531301437, | |
| "learning_rate": 1.9256506810902226e-05, | |
| "loss": 0.2091, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.082591213285923, | |
| "step": 3765, | |
| "valid_targets_mean": 6387.2, | |
| "valid_targets_min": 4474 | |
| }, | |
| { | |
| "epoch": 3.931664058424622, | |
| "grad_norm": 0.3707647026060573, | |
| "learning_rate": 1.9204540815900368e-05, | |
| "loss": 0.1545, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07417616248130798, | |
| "step": 3770, | |
| "valid_targets_mean": 6365.8, | |
| "valid_targets_min": 4544 | |
| }, | |
| { | |
| "epoch": 3.9368805425143454, | |
| "grad_norm": 0.4135371756444951, | |
| "learning_rate": 1.915258019913306e-05, | |
| "loss": 0.1588, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08456431329250336, | |
| "step": 3775, | |
| "valid_targets_mean": 6586.4, | |
| "valid_targets_min": 3596 | |
| }, | |
| { | |
| "epoch": 3.9420970266040687, | |
| "grad_norm": 0.40489175605771827, | |
| "learning_rate": 1.9100625311914855e-05, | |
| "loss": 0.166, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08205069601535797, | |
| "step": 3780, | |
| "valid_targets_mean": 6363.9, | |
| "valid_targets_min": 4714 | |
| }, | |
| { | |
| "epoch": 3.9473135106937924, | |
| "grad_norm": 0.4389236386269901, | |
| "learning_rate": 1.9048676505521552e-05, | |
| "loss": 0.1644, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08890068531036377, | |
| "step": 3785, | |
| "valid_targets_mean": 6411.8, | |
| "valid_targets_min": 4657 | |
| }, | |
| { | |
| "epoch": 3.952529994783516, | |
| "grad_norm": 0.40549697000358137, | |
| "learning_rate": 1.8996734131187867e-05, | |
| "loss": 0.1448, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07563371956348419, | |
| "step": 3790, | |
| "valid_targets_mean": 5596.1, | |
| "valid_targets_min": 4750 | |
| }, | |
| { | |
| "epoch": 3.9577464788732395, | |
| "grad_norm": 0.45123113242804963, | |
| "learning_rate": 1.894479854010499e-05, | |
| "loss": 0.1582, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0627874881029129, | |
| "step": 3795, | |
| "valid_targets_mean": 6262.6, | |
| "valid_targets_min": 4620 | |
| }, | |
| { | |
| "epoch": 3.962962962962963, | |
| "grad_norm": 0.4192659749137676, | |
| "learning_rate": 1.8892870083418272e-05, | |
| "loss": 0.1553, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06344600021839142, | |
| "step": 3800, | |
| "valid_targets_mean": 5552.9, | |
| "valid_targets_min": 4998 | |
| }, | |
| { | |
| "epoch": 3.9681794470526865, | |
| "grad_norm": 0.3819000814684171, | |
| "learning_rate": 1.8840949112224823e-05, | |
| "loss": 0.152, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07665708661079407, | |
| "step": 3805, | |
| "valid_targets_mean": 6240.2, | |
| "valid_targets_min": 4357 | |
| }, | |
| { | |
| "epoch": 3.97339593114241, | |
| "grad_norm": 0.42687911150560537, | |
| "learning_rate": 1.8789035977571145e-05, | |
| "loss": 0.1564, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08195020258426666, | |
| "step": 3810, | |
| "valid_targets_mean": 7040.5, | |
| "valid_targets_min": 4815 | |
| }, | |
| { | |
| "epoch": 3.9786124152321336, | |
| "grad_norm": 0.4112057616977073, | |
| "learning_rate": 1.873713103045076e-05, | |
| "loss": 0.161, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07531554996967316, | |
| "step": 3815, | |
| "valid_targets_mean": 7012.8, | |
| "valid_targets_min": 5006 | |
| }, | |
| { | |
| "epoch": 3.983828899321857, | |
| "grad_norm": 0.4182233033993366, | |
| "learning_rate": 1.8685234621801803e-05, | |
| "loss": 0.1502, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07511749863624573, | |
| "step": 3820, | |
| "valid_targets_mean": 6534.4, | |
| "valid_targets_min": 5343 | |
| }, | |
| { | |
| "epoch": 3.9890453834115807, | |
| "grad_norm": 0.39425552720498763, | |
| "learning_rate": 1.863334710250471e-05, | |
| "loss": 0.1636, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07950687408447266, | |
| "step": 3825, | |
| "valid_targets_mean": 6909.8, | |
| "valid_targets_min": 5444 | |
| }, | |
| { | |
| "epoch": 3.994261867501304, | |
| "grad_norm": 0.4706697620689, | |
| "learning_rate": 1.858146882337981e-05, | |
| "loss": 0.1685, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07978710532188416, | |
| "step": 3830, | |
| "valid_targets_mean": 4885.5, | |
| "valid_targets_min": 2057 | |
| }, | |
| { | |
| "epoch": 3.9994783515910277, | |
| "grad_norm": 0.49683984248668944, | |
| "learning_rate": 1.852960013518495e-05, | |
| "loss": 0.169, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08415490388870239, | |
| "step": 3835, | |
| "valid_targets_mean": 5273.2, | |
| "valid_targets_min": 4371 | |
| }, | |
| { | |
| "epoch": 4.005216484089724, | |
| "grad_norm": 0.5642926074321785, | |
| "learning_rate": 1.8477741388613132e-05, | |
| "loss": 0.2701, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1377515196800232, | |
| "step": 3840, | |
| "valid_targets_mean": 5540.2, | |
| "valid_targets_min": 746 | |
| }, | |
| { | |
| "epoch": 4.010432968179447, | |
| "grad_norm": 0.48908908675313234, | |
| "learning_rate": 1.8425892934290152e-05, | |
| "loss": 0.23, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11653114855289459, | |
| "step": 3845, | |
| "valid_targets_mean": 6751.5, | |
| "valid_targets_min": 2859 | |
| }, | |
| { | |
| "epoch": 4.01564945226917, | |
| "grad_norm": 0.48077486636589595, | |
| "learning_rate": 1.8374055122772195e-05, | |
| "loss": 0.2301, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0999593585729599, | |
| "step": 3850, | |
| "valid_targets_mean": 6099.8, | |
| "valid_targets_min": 1822 | |
| }, | |
| { | |
| "epoch": 4.020865936358894, | |
| "grad_norm": 0.5235033314278852, | |
| "learning_rate": 1.8322228304543515e-05, | |
| "loss": 0.2293, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13101504743099213, | |
| "step": 3855, | |
| "valid_targets_mean": 6526.8, | |
| "valid_targets_min": 2373 | |
| }, | |
| { | |
| "epoch": 4.026082420448618, | |
| "grad_norm": 0.5956778258202827, | |
| "learning_rate": 1.827041283001403e-05, | |
| "loss": 0.2423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11201220750808716, | |
| "step": 3860, | |
| "valid_targets_mean": 4286.5, | |
| "valid_targets_min": 2745 | |
| }, | |
| { | |
| "epoch": 4.031298904538341, | |
| "grad_norm": 0.6385761123328692, | |
| "learning_rate": 1.8218609049516943e-05, | |
| "loss": 0.2351, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11561360210180283, | |
| "step": 3865, | |
| "valid_targets_mean": 4962.8, | |
| "valid_targets_min": 1089 | |
| }, | |
| { | |
| "epoch": 4.0365153886280645, | |
| "grad_norm": 0.5947368414728974, | |
| "learning_rate": 1.816681731330641e-05, | |
| "loss": 0.2271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12422865629196167, | |
| "step": 3870, | |
| "valid_targets_mean": 4189.5, | |
| "valid_targets_min": 627 | |
| }, | |
| { | |
| "epoch": 4.041731872717788, | |
| "grad_norm": 0.5448305559306198, | |
| "learning_rate": 1.811503797155516e-05, | |
| "loss": 0.2356, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12050231546163559, | |
| "step": 3875, | |
| "valid_targets_mean": 4491.2, | |
| "valid_targets_min": 2605 | |
| }, | |
| { | |
| "epoch": 4.046948356807512, | |
| "grad_norm": 0.5623755878200662, | |
| "learning_rate": 1.8063271374352097e-05, | |
| "loss": 0.2372, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14401790499687195, | |
| "step": 3880, | |
| "valid_targets_mean": 5947.6, | |
| "valid_targets_min": 1899 | |
| }, | |
| { | |
| "epoch": 4.052164840897235, | |
| "grad_norm": 0.5545604425553117, | |
| "learning_rate": 1.8011517871699978e-05, | |
| "loss": 0.2362, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11851419508457184, | |
| "step": 3885, | |
| "valid_targets_mean": 4468.6, | |
| "valid_targets_min": 2916 | |
| }, | |
| { | |
| "epoch": 4.057381324986959, | |
| "grad_norm": 0.5599485173640235, | |
| "learning_rate": 1.7959777813513015e-05, | |
| "loss": 0.2366, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12138886004686356, | |
| "step": 3890, | |
| "valid_targets_mean": 5149.9, | |
| "valid_targets_min": 1939 | |
| }, | |
| { | |
| "epoch": 4.062597809076682, | |
| "grad_norm": 0.5904889448222663, | |
| "learning_rate": 1.7908051549614527e-05, | |
| "loss": 0.2307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12189013510942459, | |
| "step": 3895, | |
| "valid_targets_mean": 4151.2, | |
| "valid_targets_min": 1461 | |
| }, | |
| { | |
| "epoch": 4.067814293166406, | |
| "grad_norm": 0.5445904722882943, | |
| "learning_rate": 1.7856339429734557e-05, | |
| "loss": 0.2315, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11320637166500092, | |
| "step": 3900, | |
| "valid_targets_mean": 5063.0, | |
| "valid_targets_min": 1366 | |
| }, | |
| { | |
| "epoch": 4.073030777256129, | |
| "grad_norm": 0.605639791344134, | |
| "learning_rate": 1.780464180350753e-05, | |
| "loss": 0.2281, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09552490711212158, | |
| "step": 3905, | |
| "valid_targets_mean": 3026.4, | |
| "valid_targets_min": 2050 | |
| }, | |
| { | |
| "epoch": 4.078247261345853, | |
| "grad_norm": 0.650596957742453, | |
| "learning_rate": 1.7752959020469868e-05, | |
| "loss": 0.2363, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11308236420154572, | |
| "step": 3910, | |
| "valid_targets_mean": 2961.8, | |
| "valid_targets_min": 746 | |
| }, | |
| { | |
| "epoch": 4.083463745435576, | |
| "grad_norm": 0.5855223369090674, | |
| "learning_rate": 1.7701291430057653e-05, | |
| "loss": 0.2185, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11510932445526123, | |
| "step": 3915, | |
| "valid_targets_mean": 4270.5, | |
| "valid_targets_min": 1710 | |
| }, | |
| { | |
| "epoch": 4.0886802295253, | |
| "grad_norm": 0.5743971302902094, | |
| "learning_rate": 1.7649639381604207e-05, | |
| "loss": 0.2294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1282425969839096, | |
| "step": 3920, | |
| "valid_targets_mean": 4471.6, | |
| "valid_targets_min": 2006 | |
| }, | |
| { | |
| "epoch": 4.093896713615023, | |
| "grad_norm": 0.6687767812528663, | |
| "learning_rate": 1.759800322433781e-05, | |
| "loss": 0.2398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11971822381019592, | |
| "step": 3925, | |
| "valid_targets_mean": 3130.4, | |
| "valid_targets_min": 2227 | |
| }, | |
| { | |
| "epoch": 4.099113197704747, | |
| "grad_norm": 0.6188130993695292, | |
| "learning_rate": 1.7546383307379284e-05, | |
| "loss": 0.2364, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11791865527629852, | |
| "step": 3930, | |
| "valid_targets_mean": 3935.1, | |
| "valid_targets_min": 1323 | |
| }, | |
| { | |
| "epoch": 4.1043296817944706, | |
| "grad_norm": 0.629204596884135, | |
| "learning_rate": 1.7494779979739645e-05, | |
| "loss": 0.2294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13178078830242157, | |
| "step": 3935, | |
| "valid_targets_mean": 4362.8, | |
| "valid_targets_min": 2809 | |
| }, | |
| { | |
| "epoch": 4.109546165884194, | |
| "grad_norm": 0.5731678331060561, | |
| "learning_rate": 1.7443193590317756e-05, | |
| "loss": 0.2234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08953022956848145, | |
| "step": 3940, | |
| "valid_targets_mean": 3333.0, | |
| "valid_targets_min": 1365 | |
| }, | |
| { | |
| "epoch": 4.114762649973917, | |
| "grad_norm": 0.616694650557625, | |
| "learning_rate": 1.7391624487897947e-05, | |
| "loss": 0.2226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12539394199848175, | |
| "step": 3945, | |
| "valid_targets_mean": 4949.2, | |
| "valid_targets_min": 2567 | |
| }, | |
| { | |
| "epoch": 4.119979134063641, | |
| "grad_norm": 0.7059971053674392, | |
| "learning_rate": 1.734007302114768e-05, | |
| "loss": 0.207, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09253738075494766, | |
| "step": 3950, | |
| "valid_targets_mean": 2560.8, | |
| "valid_targets_min": 1505 | |
| }, | |
| { | |
| "epoch": 4.125195618153365, | |
| "grad_norm": 0.7036326792507426, | |
| "learning_rate": 1.728853953861516e-05, | |
| "loss": 0.2313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10895891487598419, | |
| "step": 3955, | |
| "valid_targets_mean": 3180.1, | |
| "valid_targets_min": 1978 | |
| }, | |
| { | |
| "epoch": 4.130412102243088, | |
| "grad_norm": 0.6052738188995984, | |
| "learning_rate": 1.723702438872701e-05, | |
| "loss": 0.2249, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11007113009691238, | |
| "step": 3960, | |
| "valid_targets_mean": 4338.9, | |
| "valid_targets_min": 699 | |
| }, | |
| { | |
| "epoch": 4.135628586332811, | |
| "grad_norm": 0.5654114100584382, | |
| "learning_rate": 1.718552791978591e-05, | |
| "loss": 0.2243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10845179855823517, | |
| "step": 3965, | |
| "valid_targets_mean": 3750.2, | |
| "valid_targets_min": 648 | |
| }, | |
| { | |
| "epoch": 4.140845070422535, | |
| "grad_norm": 0.5856435789283881, | |
| "learning_rate": 1.713405047996822e-05, | |
| "loss": 0.227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11388012766838074, | |
| "step": 3970, | |
| "valid_targets_mean": 4000.8, | |
| "valid_targets_min": 1786 | |
| }, | |
| { | |
| "epoch": 4.146061554512259, | |
| "grad_norm": 0.6414206140012945, | |
| "learning_rate": 1.7082592417321647e-05, | |
| "loss": 0.2156, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11015298962593079, | |
| "step": 3975, | |
| "valid_targets_mean": 3459.1, | |
| "valid_targets_min": 1830 | |
| }, | |
| { | |
| "epoch": 4.1512780386019825, | |
| "grad_norm": 0.6298045333113581, | |
| "learning_rate": 1.7031154079762885e-05, | |
| "loss": 0.2255, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1005692183971405, | |
| "step": 3980, | |
| "valid_targets_mean": 3505.4, | |
| "valid_targets_min": 1825 | |
| }, | |
| { | |
| "epoch": 4.156494522691705, | |
| "grad_norm": 0.636769225757317, | |
| "learning_rate": 1.6979735815075263e-05, | |
| "loss": 0.2194, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10404954850673676, | |
| "step": 3985, | |
| "valid_targets_mean": 3360.8, | |
| "valid_targets_min": 1208 | |
| }, | |
| { | |
| "epoch": 4.161711006781429, | |
| "grad_norm": 0.6358473059714908, | |
| "learning_rate": 1.6928337970906406e-05, | |
| "loss": 0.2224, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09258357435464859, | |
| "step": 3990, | |
| "valid_targets_mean": 3075.5, | |
| "valid_targets_min": 1501 | |
| }, | |
| { | |
| "epoch": 4.166927490871153, | |
| "grad_norm": 0.7084093909347079, | |
| "learning_rate": 1.687696089476585e-05, | |
| "loss": 0.2207, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13329192996025085, | |
| "step": 3995, | |
| "valid_targets_mean": 2868.9, | |
| "valid_targets_min": 1504 | |
| }, | |
| { | |
| "epoch": 4.172143974960877, | |
| "grad_norm": 0.6419208206234245, | |
| "learning_rate": 1.6825604934022734e-05, | |
| "loss": 0.2262, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1333308219909668, | |
| "step": 4000, | |
| "valid_targets_mean": 4565.0, | |
| "valid_targets_min": 1941 | |
| }, | |
| { | |
| "epoch": 4.1773604590505995, | |
| "grad_norm": 0.6407170095846483, | |
| "learning_rate": 1.6774270435903423e-05, | |
| "loss": 0.215, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09747381508350372, | |
| "step": 4005, | |
| "valid_targets_mean": 3095.5, | |
| "valid_targets_min": 1281 | |
| }, | |
| { | |
| "epoch": 4.182576943140323, | |
| "grad_norm": 0.6583542529710326, | |
| "learning_rate": 1.6722957747489172e-05, | |
| "loss": 0.2232, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10393165796995163, | |
| "step": 4010, | |
| "valid_targets_mean": 3554.9, | |
| "valid_targets_min": 1697 | |
| }, | |
| { | |
| "epoch": 4.187793427230047, | |
| "grad_norm": 0.6226345096868418, | |
| "learning_rate": 1.6671667215713784e-05, | |
| "loss": 0.2241, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09727461636066437, | |
| "step": 4015, | |
| "valid_targets_mean": 3937.0, | |
| "valid_targets_min": 2306 | |
| }, | |
| { | |
| "epoch": 4.193009911319771, | |
| "grad_norm": 0.611304036673067, | |
| "learning_rate": 1.6620399187361226e-05, | |
| "loss": 0.2216, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1254786252975464, | |
| "step": 4020, | |
| "valid_targets_mean": 4594.0, | |
| "valid_targets_min": 2534 | |
| }, | |
| { | |
| "epoch": 4.198226395409494, | |
| "grad_norm": 0.6380251643291203, | |
| "learning_rate": 1.656915400906336e-05, | |
| "loss": 0.2163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0909174308180809, | |
| "step": 4025, | |
| "valid_targets_mean": 2910.1, | |
| "valid_targets_min": 1184 | |
| }, | |
| { | |
| "epoch": 4.203442879499217, | |
| "grad_norm": 0.6656173815054769, | |
| "learning_rate": 1.6517932027297525e-05, | |
| "loss": 0.2288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09456546604633331, | |
| "step": 4030, | |
| "valid_targets_mean": 2873.5, | |
| "valid_targets_min": 1401 | |
| }, | |
| { | |
| "epoch": 4.208659363588941, | |
| "grad_norm": 0.6210898954065078, | |
| "learning_rate": 1.6466733588384233e-05, | |
| "loss": 0.2228, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11676488816738129, | |
| "step": 4035, | |
| "valid_targets_mean": 3721.0, | |
| "valid_targets_min": 2109 | |
| }, | |
| { | |
| "epoch": 4.213875847678665, | |
| "grad_norm": 0.7260760139916839, | |
| "learning_rate": 1.6415559038484827e-05, | |
| "loss": 0.2167, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09377293288707733, | |
| "step": 4040, | |
| "valid_targets_mean": 2451.2, | |
| "valid_targets_min": 1855 | |
| }, | |
| { | |
| "epoch": 4.219092331768388, | |
| "grad_norm": 0.6479025254717592, | |
| "learning_rate": 1.6364408723599116e-05, | |
| "loss": 0.2259, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1260455846786499, | |
| "step": 4045, | |
| "valid_targets_mean": 4160.6, | |
| "valid_targets_min": 1580 | |
| }, | |
| { | |
| "epoch": 4.2243088158581115, | |
| "grad_norm": 0.6616200797083767, | |
| "learning_rate": 1.6313282989563062e-05, | |
| "loss": 0.2232, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10317033529281616, | |
| "step": 4050, | |
| "valid_targets_mean": 3270.6, | |
| "valid_targets_min": 1577 | |
| }, | |
| { | |
| "epoch": 4.229525299947835, | |
| "grad_norm": 0.5445276463670887, | |
| "learning_rate": 1.626218218204643e-05, | |
| "loss": 0.2165, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08595336973667145, | |
| "step": 4055, | |
| "valid_targets_mean": 3585.1, | |
| "valid_targets_min": 1856 | |
| }, | |
| { | |
| "epoch": 4.234741784037559, | |
| "grad_norm": 0.5926703698220305, | |
| "learning_rate": 1.6211106646550455e-05, | |
| "loss": 0.2146, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13162322342395782, | |
| "step": 4060, | |
| "valid_targets_mean": 4419.6, | |
| "valid_targets_min": 2508 | |
| }, | |
| { | |
| "epoch": 4.239958268127282, | |
| "grad_norm": 0.5852880746491417, | |
| "learning_rate": 1.616005672840551e-05, | |
| "loss": 0.2249, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12295317649841309, | |
| "step": 4065, | |
| "valid_targets_mean": 4644.9, | |
| "valid_targets_min": 2964 | |
| }, | |
| { | |
| "epoch": 4.245174752217006, | |
| "grad_norm": 0.5991355165546476, | |
| "learning_rate": 1.6109032772768736e-05, | |
| "loss": 0.2161, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09156499058008194, | |
| "step": 4070, | |
| "valid_targets_mean": 3736.2, | |
| "valid_targets_min": 1817 | |
| }, | |
| { | |
| "epoch": 4.250391236306729, | |
| "grad_norm": 0.6258026751072319, | |
| "learning_rate": 1.6058035124621766e-05, | |
| "loss": 0.2257, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10584190487861633, | |
| "step": 4075, | |
| "valid_targets_mean": 3567.5, | |
| "valid_targets_min": 1913 | |
| }, | |
| { | |
| "epoch": 4.255607720396453, | |
| "grad_norm": 0.6062395335736582, | |
| "learning_rate": 1.6007064128768354e-05, | |
| "loss": 0.2197, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10947876423597336, | |
| "step": 4080, | |
| "valid_targets_mean": 4227.0, | |
| "valid_targets_min": 1757 | |
| }, | |
| { | |
| "epoch": 4.260824204486176, | |
| "grad_norm": 0.6311769425999669, | |
| "learning_rate": 1.5956120129832054e-05, | |
| "loss": 0.2294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11744509637355804, | |
| "step": 4085, | |
| "valid_targets_mean": 3979.1, | |
| "valid_targets_min": 2424 | |
| }, | |
| { | |
| "epoch": 4.2660406885759, | |
| "grad_norm": 0.6876476610264438, | |
| "learning_rate": 1.5905203472253892e-05, | |
| "loss": 0.2124, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1170625239610672, | |
| "step": 4090, | |
| "valid_targets_mean": 3148.6, | |
| "valid_targets_min": 1955 | |
| }, | |
| { | |
| "epoch": 4.271257172665623, | |
| "grad_norm": 0.6022635550338309, | |
| "learning_rate": 1.5854314500290025e-05, | |
| "loss": 0.2233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09701558947563171, | |
| "step": 4095, | |
| "valid_targets_mean": 3444.0, | |
| "valid_targets_min": 1221 | |
| }, | |
| { | |
| "epoch": 4.276473656755347, | |
| "grad_norm": 0.6668154017634088, | |
| "learning_rate": 1.580345355800944e-05, | |
| "loss": 0.2129, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09597952663898468, | |
| "step": 4100, | |
| "valid_targets_mean": 4665.4, | |
| "valid_targets_min": 2506 | |
| }, | |
| { | |
| "epoch": 4.28169014084507, | |
| "grad_norm": 0.5835483357530894, | |
| "learning_rate": 1.5752620989291592e-05, | |
| "loss": 0.2062, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09537338465452194, | |
| "step": 4105, | |
| "valid_targets_mean": 4472.1, | |
| "valid_targets_min": 2579 | |
| }, | |
| { | |
| "epoch": 4.286906624934794, | |
| "grad_norm": 0.6445387143214929, | |
| "learning_rate": 1.5701817137824108e-05, | |
| "loss": 0.2073, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12605199217796326, | |
| "step": 4110, | |
| "valid_targets_mean": 3854.4, | |
| "valid_targets_min": 1679 | |
| }, | |
| { | |
| "epoch": 4.2921231090245175, | |
| "grad_norm": 0.6232033375703452, | |
| "learning_rate": 1.5651042347100452e-05, | |
| "loss": 0.2251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10799071192741394, | |
| "step": 4115, | |
| "valid_targets_mean": 3779.6, | |
| "valid_targets_min": 1059 | |
| }, | |
| { | |
| "epoch": 4.297339593114241, | |
| "grad_norm": 0.6064163224186345, | |
| "learning_rate": 1.5600296960417597e-05, | |
| "loss": 0.2161, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09386972337961197, | |
| "step": 4120, | |
| "valid_targets_mean": 3221.6, | |
| "valid_targets_min": 964 | |
| }, | |
| { | |
| "epoch": 4.302556077203964, | |
| "grad_norm": 0.5665584236414938, | |
| "learning_rate": 1.5549581320873715e-05, | |
| "loss": 0.2234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09377917647361755, | |
| "step": 4125, | |
| "valid_targets_mean": 4071.9, | |
| "valid_targets_min": 1105 | |
| }, | |
| { | |
| "epoch": 4.307772561293688, | |
| "grad_norm": 0.6106447446267973, | |
| "learning_rate": 1.549889577136586e-05, | |
| "loss": 0.2153, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10893549025058746, | |
| "step": 4130, | |
| "valid_targets_mean": 4799.2, | |
| "valid_targets_min": 1460 | |
| }, | |
| { | |
| "epoch": 4.312989045383412, | |
| "grad_norm": 0.5882030153344908, | |
| "learning_rate": 1.5448240654587622e-05, | |
| "loss": 0.2308, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10527902841567993, | |
| "step": 4135, | |
| "valid_targets_mean": 3648.5, | |
| "valid_targets_min": 1172 | |
| }, | |
| { | |
| "epoch": 4.318205529473135, | |
| "grad_norm": 0.5712865171559052, | |
| "learning_rate": 1.5397616313026853e-05, | |
| "loss": 0.2224, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12021110206842422, | |
| "step": 4140, | |
| "valid_targets_mean": 5118.2, | |
| "valid_targets_min": 3004 | |
| }, | |
| { | |
| "epoch": 4.323422013562858, | |
| "grad_norm": 0.5356865221346134, | |
| "learning_rate": 1.5347023088963315e-05, | |
| "loss": 0.2119, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09559895098209381, | |
| "step": 4145, | |
| "valid_targets_mean": 4048.8, | |
| "valid_targets_min": 1535 | |
| }, | |
| { | |
| "epoch": 4.328638497652582, | |
| "grad_norm": 0.6012825074255658, | |
| "learning_rate": 1.5296461324466384e-05, | |
| "loss": 0.2121, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10620524734258652, | |
| "step": 4150, | |
| "valid_targets_mean": 4052.0, | |
| "valid_targets_min": 2075 | |
| }, | |
| { | |
| "epoch": 4.333854981742306, | |
| "grad_norm": 0.6519219263593533, | |
| "learning_rate": 1.5245931361392727e-05, | |
| "loss": 0.2061, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09374214708805084, | |
| "step": 4155, | |
| "valid_targets_mean": 3067.6, | |
| "valid_targets_min": 1629 | |
| }, | |
| { | |
| "epoch": 4.3390714658320295, | |
| "grad_norm": 0.6365413078139583, | |
| "learning_rate": 1.5195433541384009e-05, | |
| "loss": 0.2069, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11092965304851532, | |
| "step": 4160, | |
| "valid_targets_mean": 4654.1, | |
| "valid_targets_min": 3131 | |
| }, | |
| { | |
| "epoch": 4.344287949921752, | |
| "grad_norm": 0.6445821383508478, | |
| "learning_rate": 1.514496820586456e-05, | |
| "loss": 0.2023, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09095177799463272, | |
| "step": 4165, | |
| "valid_targets_mean": 3172.0, | |
| "valid_targets_min": 1079 | |
| }, | |
| { | |
| "epoch": 4.349504434011476, | |
| "grad_norm": 0.6280528720801285, | |
| "learning_rate": 1.5094535696039067e-05, | |
| "loss": 0.1975, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10127344727516174, | |
| "step": 4170, | |
| "valid_targets_mean": 4661.1, | |
| "valid_targets_min": 2667 | |
| }, | |
| { | |
| "epoch": 4.3547209181012, | |
| "grad_norm": 0.6347161447723051, | |
| "learning_rate": 1.50441363528903e-05, | |
| "loss": 0.2073, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0983014702796936, | |
| "step": 4175, | |
| "valid_targets_mean": 3484.2, | |
| "valid_targets_min": 1981 | |
| }, | |
| { | |
| "epoch": 4.359937402190924, | |
| "grad_norm": 0.8099899056842533, | |
| "learning_rate": 1.4993770517176764e-05, | |
| "loss": 0.2296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1119382306933403, | |
| "step": 4180, | |
| "valid_targets_mean": 3380.0, | |
| "valid_targets_min": 1774 | |
| }, | |
| { | |
| "epoch": 4.3651538862806465, | |
| "grad_norm": 0.6347269196077265, | |
| "learning_rate": 1.4943438529430428e-05, | |
| "loss": 0.2106, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09649521112442017, | |
| "step": 4185, | |
| "valid_targets_mean": 3107.4, | |
| "valid_targets_min": 1691 | |
| }, | |
| { | |
| "epoch": 4.37037037037037, | |
| "grad_norm": 0.632411453777992, | |
| "learning_rate": 1.4893140729954402e-05, | |
| "loss": 0.2164, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11470407247543335, | |
| "step": 4190, | |
| "valid_targets_mean": 4045.0, | |
| "valid_targets_min": 2318 | |
| }, | |
| { | |
| "epoch": 4.375586854460094, | |
| "grad_norm": 0.5867948799948602, | |
| "learning_rate": 1.4842877458820644e-05, | |
| "loss": 0.2088, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08849073946475983, | |
| "step": 4195, | |
| "valid_targets_mean": 3686.9, | |
| "valid_targets_min": 1509 | |
| }, | |
| { | |
| "epoch": 4.380803338549818, | |
| "grad_norm": 0.6501452694543437, | |
| "learning_rate": 1.4792649055867668e-05, | |
| "loss": 0.2089, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09674352407455444, | |
| "step": 4200, | |
| "valid_targets_mean": 3588.1, | |
| "valid_targets_min": 1601 | |
| }, | |
| { | |
| "epoch": 4.386019822639541, | |
| "grad_norm": 0.6489287171477346, | |
| "learning_rate": 1.474245586069822e-05, | |
| "loss": 0.2075, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11378400027751923, | |
| "step": 4205, | |
| "valid_targets_mean": 4142.2, | |
| "valid_targets_min": 1823 | |
| }, | |
| { | |
| "epoch": 4.391236306729264, | |
| "grad_norm": 0.6355505214574326, | |
| "learning_rate": 1.4692298212677018e-05, | |
| "loss": 0.2087, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09085379540920258, | |
| "step": 4210, | |
| "valid_targets_mean": 2910.9, | |
| "valid_targets_min": 1445 | |
| }, | |
| { | |
| "epoch": 4.396452790818988, | |
| "grad_norm": 0.6565014799685682, | |
| "learning_rate": 1.464217645092843e-05, | |
| "loss": 0.2093, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11628004163503647, | |
| "step": 4215, | |
| "valid_targets_mean": 3551.8, | |
| "valid_targets_min": 1722 | |
| }, | |
| { | |
| "epoch": 4.401669274908712, | |
| "grad_norm": 0.6308115897720618, | |
| "learning_rate": 1.4592090914334206e-05, | |
| "loss": 0.21, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11344485729932785, | |
| "step": 4220, | |
| "valid_targets_mean": 4151.2, | |
| "valid_targets_min": 2121 | |
| }, | |
| { | |
| "epoch": 4.406885758998435, | |
| "grad_norm": 0.6000648140330713, | |
| "learning_rate": 1.4542041941531136e-05, | |
| "loss": 0.2033, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10597352683544159, | |
| "step": 4225, | |
| "valid_targets_mean": 3703.8, | |
| "valid_targets_min": 1202 | |
| }, | |
| { | |
| "epoch": 4.4121022430881585, | |
| "grad_norm": 0.6448007941906359, | |
| "learning_rate": 1.4492029870908835e-05, | |
| "loss": 0.2251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11381104588508606, | |
| "step": 4230, | |
| "valid_targets_mean": 4232.8, | |
| "valid_targets_min": 3287 | |
| }, | |
| { | |
| "epoch": 4.417318727177882, | |
| "grad_norm": 0.6219201188139125, | |
| "learning_rate": 1.4442055040607402e-05, | |
| "loss": 0.2199, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08483625948429108, | |
| "step": 4235, | |
| "valid_targets_mean": 3408.4, | |
| "valid_targets_min": 1849 | |
| }, | |
| { | |
| "epoch": 4.422535211267606, | |
| "grad_norm": 0.7141003264949369, | |
| "learning_rate": 1.4392117788515138e-05, | |
| "loss": 0.2269, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16411037743091583, | |
| "step": 4240, | |
| "valid_targets_mean": 4216.5, | |
| "valid_targets_min": 1857 | |
| }, | |
| { | |
| "epoch": 4.427751695357329, | |
| "grad_norm": 0.6296864724849339, | |
| "learning_rate": 1.4342218452266284e-05, | |
| "loss": 0.2159, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10889279842376709, | |
| "step": 4245, | |
| "valid_targets_mean": 3993.6, | |
| "valid_targets_min": 2129 | |
| }, | |
| { | |
| "epoch": 4.432968179447053, | |
| "grad_norm": 0.7056705351955537, | |
| "learning_rate": 1.4292357369238726e-05, | |
| "loss": 0.219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12574821710586548, | |
| "step": 4250, | |
| "valid_targets_mean": 4451.1, | |
| "valid_targets_min": 2650 | |
| }, | |
| { | |
| "epoch": 4.438184663536776, | |
| "grad_norm": 0.6161593353808862, | |
| "learning_rate": 1.42425348765517e-05, | |
| "loss": 0.2247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09880797564983368, | |
| "step": 4255, | |
| "valid_targets_mean": 3303.0, | |
| "valid_targets_min": 802 | |
| }, | |
| { | |
| "epoch": 4.4434011476265, | |
| "grad_norm": 0.6330994095347192, | |
| "learning_rate": 1.4192751311063541e-05, | |
| "loss": 0.2282, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12411338090896606, | |
| "step": 4260, | |
| "valid_targets_mean": 4286.2, | |
| "valid_targets_min": 2325 | |
| }, | |
| { | |
| "epoch": 4.448617631716223, | |
| "grad_norm": 0.6418717943024234, | |
| "learning_rate": 1.4143007009369388e-05, | |
| "loss": 0.2099, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12486034631729126, | |
| "step": 4265, | |
| "valid_targets_mean": 3906.0, | |
| "valid_targets_min": 2023 | |
| }, | |
| { | |
| "epoch": 4.453834115805947, | |
| "grad_norm": 0.6600435521803033, | |
| "learning_rate": 1.4093302307798906e-05, | |
| "loss": 0.2115, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09783441573381424, | |
| "step": 4270, | |
| "valid_targets_mean": 3225.9, | |
| "valid_targets_min": 1501 | |
| }, | |
| { | |
| "epoch": 4.45905059989567, | |
| "grad_norm": 0.7423278169222988, | |
| "learning_rate": 1.4043637542414007e-05, | |
| "loss": 0.2216, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12215706706047058, | |
| "step": 4275, | |
| "valid_targets_mean": 3023.4, | |
| "valid_targets_min": 2260 | |
| }, | |
| { | |
| "epoch": 4.464267083985394, | |
| "grad_norm": 0.7252284013117394, | |
| "learning_rate": 1.399401304900661e-05, | |
| "loss": 0.2186, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12740066647529602, | |
| "step": 4280, | |
| "valid_targets_mean": 4483.0, | |
| "valid_targets_min": 1835 | |
| }, | |
| { | |
| "epoch": 4.469483568075117, | |
| "grad_norm": 0.609794177061817, | |
| "learning_rate": 1.3944429163096332e-05, | |
| "loss": 0.212, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09345977008342743, | |
| "step": 4285, | |
| "valid_targets_mean": 4982.4, | |
| "valid_targets_min": 2343 | |
| }, | |
| { | |
| "epoch": 4.474700052164841, | |
| "grad_norm": 0.6804172767897737, | |
| "learning_rate": 1.3894886219928247e-05, | |
| "loss": 0.2026, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09366916120052338, | |
| "step": 4290, | |
| "valid_targets_mean": 3090.5, | |
| "valid_targets_min": 1951 | |
| }, | |
| { | |
| "epoch": 4.4799165362545645, | |
| "grad_norm": 0.6417346287161049, | |
| "learning_rate": 1.3845384554470596e-05, | |
| "loss": 0.2125, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1100846379995346, | |
| "step": 4295, | |
| "valid_targets_mean": 3504.0, | |
| "valid_targets_min": 2077 | |
| }, | |
| { | |
| "epoch": 4.485133020344288, | |
| "grad_norm": 0.6501577642187959, | |
| "learning_rate": 1.3795924501412542e-05, | |
| "loss": 0.1995, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08919946849346161, | |
| "step": 4300, | |
| "valid_targets_mean": 3386.9, | |
| "valid_targets_min": 521 | |
| }, | |
| { | |
| "epoch": 4.490349504434011, | |
| "grad_norm": 0.6359632700060287, | |
| "learning_rate": 1.3746506395161901e-05, | |
| "loss": 0.2219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10065778344869614, | |
| "step": 4305, | |
| "valid_targets_mean": 3535.0, | |
| "valid_targets_min": 2185 | |
| }, | |
| { | |
| "epoch": 4.495565988523735, | |
| "grad_norm": 0.6019134934033642, | |
| "learning_rate": 1.3697130569842874e-05, | |
| "loss": 0.2139, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10896902531385422, | |
| "step": 4310, | |
| "valid_targets_mean": 4123.2, | |
| "valid_targets_min": 2024 | |
| }, | |
| { | |
| "epoch": 4.500782472613459, | |
| "grad_norm": 0.6155736511852239, | |
| "learning_rate": 1.3647797359293797e-05, | |
| "loss": 0.2132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09079296886920929, | |
| "step": 4315, | |
| "valid_targets_mean": 3541.8, | |
| "valid_targets_min": 2282 | |
| }, | |
| { | |
| "epoch": 4.505998956703182, | |
| "grad_norm": 0.6515342677075426, | |
| "learning_rate": 1.359850709706489e-05, | |
| "loss": 0.2099, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07732906937599182, | |
| "step": 4320, | |
| "valid_targets_mean": 2962.8, | |
| "valid_targets_min": 1965 | |
| }, | |
| { | |
| "epoch": 4.511215440792905, | |
| "grad_norm": 0.7001463555541474, | |
| "learning_rate": 1.354926011641596e-05, | |
| "loss": 0.2126, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10789378732442856, | |
| "step": 4325, | |
| "valid_targets_mean": 3577.2, | |
| "valid_targets_min": 1307 | |
| }, | |
| { | |
| "epoch": 4.516431924882629, | |
| "grad_norm": 0.6539389251623247, | |
| "learning_rate": 1.3500056750314215e-05, | |
| "loss": 0.2144, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13170264661312103, | |
| "step": 4330, | |
| "valid_targets_mean": 4636.2, | |
| "valid_targets_min": 1968 | |
| }, | |
| { | |
| "epoch": 4.521648408972353, | |
| "grad_norm": 0.6881442117648264, | |
| "learning_rate": 1.345089733143196e-05, | |
| "loss": 0.2019, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08748503029346466, | |
| "step": 4335, | |
| "valid_targets_mean": 2862.9, | |
| "valid_targets_min": 1035 | |
| }, | |
| { | |
| "epoch": 4.5268648930620765, | |
| "grad_norm": 0.6309703044130636, | |
| "learning_rate": 1.3401782192144372e-05, | |
| "loss": 0.2018, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.117793507874012, | |
| "step": 4340, | |
| "valid_targets_mean": 4241.4, | |
| "valid_targets_min": 2513 | |
| }, | |
| { | |
| "epoch": 4.532081377151799, | |
| "grad_norm": 0.5867714468050478, | |
| "learning_rate": 1.3352711664527242e-05, | |
| "loss": 0.2051, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09242960065603256, | |
| "step": 4345, | |
| "valid_targets_mean": 4271.6, | |
| "valid_targets_min": 2303 | |
| }, | |
| { | |
| "epoch": 4.537297861241523, | |
| "grad_norm": 0.6377407360149095, | |
| "learning_rate": 1.3303686080354739e-05, | |
| "loss": 0.2012, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09161535650491714, | |
| "step": 4350, | |
| "valid_targets_mean": 3762.1, | |
| "valid_targets_min": 1434 | |
| }, | |
| { | |
| "epoch": 4.542514345331247, | |
| "grad_norm": 0.6225784501584094, | |
| "learning_rate": 1.3254705771097154e-05, | |
| "loss": 0.1974, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11333035677671432, | |
| "step": 4355, | |
| "valid_targets_mean": 4333.0, | |
| "valid_targets_min": 2332 | |
| }, | |
| { | |
| "epoch": 4.547730829420971, | |
| "grad_norm": 0.6471450001720589, | |
| "learning_rate": 1.3205771067918675e-05, | |
| "loss": 0.1969, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09911085665225983, | |
| "step": 4360, | |
| "valid_targets_mean": 2995.6, | |
| "valid_targets_min": 1518 | |
| }, | |
| { | |
| "epoch": 4.5529473135106935, | |
| "grad_norm": 0.6033449705589792, | |
| "learning_rate": 1.3156882301675133e-05, | |
| "loss": 0.2009, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08495312929153442, | |
| "step": 4365, | |
| "valid_targets_mean": 4031.6, | |
| "valid_targets_min": 1152 | |
| }, | |
| { | |
| "epoch": 4.558163797600417, | |
| "grad_norm": 0.7322391659973415, | |
| "learning_rate": 1.3108039802911776e-05, | |
| "loss": 0.2105, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13618046045303345, | |
| "step": 4370, | |
| "valid_targets_mean": 4547.6, | |
| "valid_targets_min": 1598 | |
| }, | |
| { | |
| "epoch": 4.563380281690141, | |
| "grad_norm": 0.6700775167578584, | |
| "learning_rate": 1.3059243901861024e-05, | |
| "loss": 0.223, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11785143613815308, | |
| "step": 4375, | |
| "valid_targets_mean": 4027.6, | |
| "valid_targets_min": 1611 | |
| }, | |
| { | |
| "epoch": 4.568596765779865, | |
| "grad_norm": 0.6223438710929402, | |
| "learning_rate": 1.3010494928440248e-05, | |
| "loss": 0.1971, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08668733388185501, | |
| "step": 4380, | |
| "valid_targets_mean": 3423.5, | |
| "valid_targets_min": 1490 | |
| }, | |
| { | |
| "epoch": 4.573813249869588, | |
| "grad_norm": 0.8002295348987879, | |
| "learning_rate": 1.2961793212249527e-05, | |
| "loss": 0.2027, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09039891511201859, | |
| "step": 4385, | |
| "valid_targets_mean": 3238.2, | |
| "valid_targets_min": 2535 | |
| }, | |
| { | |
| "epoch": 4.579029733959311, | |
| "grad_norm": 0.6714048622182723, | |
| "learning_rate": 1.2913139082569434e-05, | |
| "loss": 0.2022, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09013304114341736, | |
| "step": 4390, | |
| "valid_targets_mean": 3477.9, | |
| "valid_targets_min": 1743 | |
| }, | |
| { | |
| "epoch": 4.584246218049035, | |
| "grad_norm": 0.6288758420694553, | |
| "learning_rate": 1.2864532868358799e-05, | |
| "loss": 0.2034, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10492882132530212, | |
| "step": 4395, | |
| "valid_targets_mean": 5560.0, | |
| "valid_targets_min": 2933 | |
| }, | |
| { | |
| "epoch": 4.589462702138759, | |
| "grad_norm": 0.6653001380194464, | |
| "learning_rate": 1.2815974898252482e-05, | |
| "loss": 0.2104, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12476067990064621, | |
| "step": 4400, | |
| "valid_targets_mean": 3995.4, | |
| "valid_targets_min": 2433 | |
| }, | |
| { | |
| "epoch": 4.594679186228482, | |
| "grad_norm": 0.7071723586353563, | |
| "learning_rate": 1.2767465500559162e-05, | |
| "loss": 0.2147, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12354932725429535, | |
| "step": 4405, | |
| "valid_targets_mean": 3831.2, | |
| "valid_targets_min": 2184 | |
| }, | |
| { | |
| "epoch": 4.5998956703182055, | |
| "grad_norm": 0.6265775624785376, | |
| "learning_rate": 1.271900500325911e-05, | |
| "loss": 0.2054, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09005740284919739, | |
| "step": 4410, | |
| "valid_targets_mean": 3591.1, | |
| "valid_targets_min": 1503 | |
| }, | |
| { | |
| "epoch": 4.605112154407929, | |
| "grad_norm": 0.6231877899996424, | |
| "learning_rate": 1.2670593734001972e-05, | |
| "loss": 0.2123, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11392821371555328, | |
| "step": 4415, | |
| "valid_targets_mean": 4813.1, | |
| "valid_targets_min": 1822 | |
| }, | |
| { | |
| "epoch": 4.610328638497653, | |
| "grad_norm": 0.6411040938795858, | |
| "learning_rate": 1.2622232020104568e-05, | |
| "loss": 0.2094, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13739052414894104, | |
| "step": 4420, | |
| "valid_targets_mean": 5186.4, | |
| "valid_targets_min": 2743 | |
| }, | |
| { | |
| "epoch": 4.615545122587376, | |
| "grad_norm": 0.5158840170277111, | |
| "learning_rate": 1.2573920188548634e-05, | |
| "loss": 0.17, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0762033611536026, | |
| "step": 4425, | |
| "valid_targets_mean": 5314.2, | |
| "valid_targets_min": 4389 | |
| }, | |
| { | |
| "epoch": 4.6207616066771, | |
| "grad_norm": 0.41553037992111713, | |
| "learning_rate": 1.2525658565978668e-05, | |
| "loss": 0.1547, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07500723749399185, | |
| "step": 4430, | |
| "valid_targets_mean": 6811.5, | |
| "valid_targets_min": 4598 | |
| }, | |
| { | |
| "epoch": 4.625978090766823, | |
| "grad_norm": 0.4564923115172026, | |
| "learning_rate": 1.2477447478699696e-05, | |
| "loss": 0.1531, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09284155070781708, | |
| "step": 4435, | |
| "valid_targets_mean": 6251.1, | |
| "valid_targets_min": 5341 | |
| }, | |
| { | |
| "epoch": 4.631194574856547, | |
| "grad_norm": 0.4289790927092238, | |
| "learning_rate": 1.2429287252675062e-05, | |
| "loss": 0.1483, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07401865720748901, | |
| "step": 4440, | |
| "valid_targets_mean": 6485.6, | |
| "valid_targets_min": 4586 | |
| }, | |
| { | |
| "epoch": 4.63641105894627, | |
| "grad_norm": 0.41870397544228133, | |
| "learning_rate": 1.2381178213524223e-05, | |
| "loss": 0.1602, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07338064908981323, | |
| "step": 4445, | |
| "valid_targets_mean": 5752.4, | |
| "valid_targets_min": 4211 | |
| }, | |
| { | |
| "epoch": 4.641627543035994, | |
| "grad_norm": 0.45735998804437394, | |
| "learning_rate": 1.2333120686520568e-05, | |
| "loss": 0.1601, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0789732038974762, | |
| "step": 4450, | |
| "valid_targets_mean": 5376.8, | |
| "valid_targets_min": 4169 | |
| }, | |
| { | |
| "epoch": 4.646844027125717, | |
| "grad_norm": 0.44745103416829096, | |
| "learning_rate": 1.2285114996589181e-05, | |
| "loss": 0.1562, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05815988406538963, | |
| "step": 4455, | |
| "valid_targets_mean": 6118.5, | |
| "valid_targets_min": 4589 | |
| }, | |
| { | |
| "epoch": 4.652060511215441, | |
| "grad_norm": 0.45446505133359344, | |
| "learning_rate": 1.2237161468304681e-05, | |
| "loss": 0.1619, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08166366815567017, | |
| "step": 4460, | |
| "valid_targets_mean": 5471.8, | |
| "valid_targets_min": 4525 | |
| }, | |
| { | |
| "epoch": 4.657276995305164, | |
| "grad_norm": 0.4105461341791599, | |
| "learning_rate": 1.218926042588902e-05, | |
| "loss": 0.1689, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06613098084926605, | |
| "step": 4465, | |
| "valid_targets_mean": 6151.6, | |
| "valid_targets_min": 4778 | |
| }, | |
| { | |
| "epoch": 4.662493479394888, | |
| "grad_norm": 0.4473861379835718, | |
| "learning_rate": 1.2141412193209274e-05, | |
| "loss": 0.1494, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07261233031749725, | |
| "step": 4470, | |
| "valid_targets_mean": 6537.2, | |
| "valid_targets_min": 5289 | |
| }, | |
| { | |
| "epoch": 4.6677099634846115, | |
| "grad_norm": 0.40739175082740664, | |
| "learning_rate": 1.2093617093775458e-05, | |
| "loss": 0.1649, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07324215024709702, | |
| "step": 4475, | |
| "valid_targets_mean": 6323.9, | |
| "valid_targets_min": 5398 | |
| }, | |
| { | |
| "epoch": 4.672926447574335, | |
| "grad_norm": 0.3878667314697942, | |
| "learning_rate": 1.2045875450738352e-05, | |
| "loss": 0.149, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09113568067550659, | |
| "step": 4480, | |
| "valid_targets_mean": 8419.1, | |
| "valid_targets_min": 5304 | |
| }, | |
| { | |
| "epoch": 4.678142931664058, | |
| "grad_norm": 0.4168762541588953, | |
| "learning_rate": 1.199818758688731e-05, | |
| "loss": 0.1519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06608033180236816, | |
| "step": 4485, | |
| "valid_targets_mean": 5858.1, | |
| "valid_targets_min": 3815 | |
| }, | |
| { | |
| "epoch": 4.683359415753782, | |
| "grad_norm": 0.3929887512788324, | |
| "learning_rate": 1.1950553824648077e-05, | |
| "loss": 0.1406, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07456553727388382, | |
| "step": 4490, | |
| "valid_targets_mean": 6356.6, | |
| "valid_targets_min": 4629 | |
| }, | |
| { | |
| "epoch": 4.688575899843506, | |
| "grad_norm": 0.415908431778273, | |
| "learning_rate": 1.1902974486080599e-05, | |
| "loss": 0.1399, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06937122344970703, | |
| "step": 4495, | |
| "valid_targets_mean": 7511.5, | |
| "valid_targets_min": 4842 | |
| }, | |
| { | |
| "epoch": 4.6937923839332285, | |
| "grad_norm": 0.41159391392408107, | |
| "learning_rate": 1.1855449892876858e-05, | |
| "loss": 0.1565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06251582503318787, | |
| "step": 4500, | |
| "valid_targets_mean": 6017.4, | |
| "valid_targets_min": 4618 | |
| }, | |
| { | |
| "epoch": 4.699008868022952, | |
| "grad_norm": 0.39174455752756904, | |
| "learning_rate": 1.1807980366358699e-05, | |
| "loss": 0.1538, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06267444044351578, | |
| "step": 4505, | |
| "valid_targets_mean": 6133.8, | |
| "valid_targets_min": 4618 | |
| }, | |
| { | |
| "epoch": 4.704225352112676, | |
| "grad_norm": 0.3944402243406655, | |
| "learning_rate": 1.1760566227475642e-05, | |
| "loss": 0.1374, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07039774954319, | |
| "step": 4510, | |
| "valid_targets_mean": 6665.2, | |
| "valid_targets_min": 4772 | |
| }, | |
| { | |
| "epoch": 4.7094418362024, | |
| "grad_norm": 0.4417358503918768, | |
| "learning_rate": 1.1713207796802739e-05, | |
| "loss": 0.1365, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06284479796886444, | |
| "step": 4515, | |
| "valid_targets_mean": 6613.4, | |
| "valid_targets_min": 5243 | |
| }, | |
| { | |
| "epoch": 4.7146583202921235, | |
| "grad_norm": 0.4440447599924135, | |
| "learning_rate": 1.166590539453837e-05, | |
| "loss": 0.1473, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07085361331701279, | |
| "step": 4520, | |
| "valid_targets_mean": 7256.1, | |
| "valid_targets_min": 5574 | |
| }, | |
| { | |
| "epoch": 4.719874804381846, | |
| "grad_norm": 0.43640560706979126, | |
| "learning_rate": 1.1618659340502104e-05, | |
| "loss": 0.1689, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0730242133140564, | |
| "step": 4525, | |
| "valid_targets_mean": 6051.2, | |
| "valid_targets_min": 647 | |
| }, | |
| { | |
| "epoch": 4.72509128847157, | |
| "grad_norm": 0.4586464142491029, | |
| "learning_rate": 1.157146995413252e-05, | |
| "loss": 0.1462, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08194848150014877, | |
| "step": 4530, | |
| "valid_targets_mean": 6290.5, | |
| "valid_targets_min": 4499 | |
| }, | |
| { | |
| "epoch": 4.730307772561294, | |
| "grad_norm": 0.44989998251659796, | |
| "learning_rate": 1.152433755448509e-05, | |
| "loss": 0.1513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08564908057451248, | |
| "step": 4535, | |
| "valid_targets_mean": 6375.5, | |
| "valid_targets_min": 5795 | |
| }, | |
| { | |
| "epoch": 4.735524256651017, | |
| "grad_norm": 0.4456528114004034, | |
| "learning_rate": 1.1477262460229945e-05, | |
| "loss": 0.1583, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08031286299228668, | |
| "step": 4540, | |
| "valid_targets_mean": 6107.5, | |
| "valid_targets_min": 4619 | |
| }, | |
| { | |
| "epoch": 4.7407407407407405, | |
| "grad_norm": 0.44968479629407576, | |
| "learning_rate": 1.143024498964981e-05, | |
| "loss": 0.1775, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0790557935833931, | |
| "step": 4545, | |
| "valid_targets_mean": 6077.5, | |
| "valid_targets_min": 4596 | |
| }, | |
| { | |
| "epoch": 4.745957224830464, | |
| "grad_norm": 0.43801436790028536, | |
| "learning_rate": 1.1383285460637766e-05, | |
| "loss": 0.1796, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06983804702758789, | |
| "step": 4550, | |
| "valid_targets_mean": 5823.2, | |
| "valid_targets_min": 4581 | |
| }, | |
| { | |
| "epoch": 4.751173708920188, | |
| "grad_norm": 0.4339561285448081, | |
| "learning_rate": 1.1336384190695172e-05, | |
| "loss": 0.1542, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08872683346271515, | |
| "step": 4555, | |
| "valid_targets_mean": 6410.8, | |
| "valid_targets_min": 5086 | |
| }, | |
| { | |
| "epoch": 4.756390193009912, | |
| "grad_norm": 0.4402884932864944, | |
| "learning_rate": 1.1289541496929466e-05, | |
| "loss": 0.1561, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07995378971099854, | |
| "step": 4560, | |
| "valid_targets_mean": 7107.5, | |
| "valid_targets_min": 5827 | |
| }, | |
| { | |
| "epoch": 4.761606677099635, | |
| "grad_norm": 0.4577075388398883, | |
| "learning_rate": 1.1242757696052044e-05, | |
| "loss": 0.1544, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0784759372472763, | |
| "step": 4565, | |
| "valid_targets_mean": 6158.5, | |
| "valid_targets_min": 4810 | |
| }, | |
| { | |
| "epoch": 4.766823161189358, | |
| "grad_norm": 0.4562369009913848, | |
| "learning_rate": 1.1196033104376141e-05, | |
| "loss": 0.1595, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06998102366924286, | |
| "step": 4570, | |
| "valid_targets_mean": 5886.2, | |
| "valid_targets_min": 4546 | |
| }, | |
| { | |
| "epoch": 4.772039645279082, | |
| "grad_norm": 0.409744282289116, | |
| "learning_rate": 1.1149368037814644e-05, | |
| "loss": 0.1508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.080677330493927, | |
| "step": 4575, | |
| "valid_targets_mean": 8312.5, | |
| "valid_targets_min": 5125 | |
| }, | |
| { | |
| "epoch": 4.777256129368805, | |
| "grad_norm": 0.4546608779171481, | |
| "learning_rate": 1.1102762811877974e-05, | |
| "loss": 0.165, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08435115218162537, | |
| "step": 4580, | |
| "valid_targets_mean": 7113.1, | |
| "valid_targets_min": 5089 | |
| }, | |
| { | |
| "epoch": 4.782472613458529, | |
| "grad_norm": 0.4365274958133037, | |
| "learning_rate": 1.1056217741672e-05, | |
| "loss": 0.1557, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07127689570188522, | |
| "step": 4585, | |
| "valid_targets_mean": 6532.9, | |
| "valid_targets_min": 4219 | |
| }, | |
| { | |
| "epoch": 4.7876890975482524, | |
| "grad_norm": 0.44484298928457333, | |
| "learning_rate": 1.1009733141895823e-05, | |
| "loss": 0.148, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07346318662166595, | |
| "step": 4590, | |
| "valid_targets_mean": 5441.4, | |
| "valid_targets_min": 3556 | |
| }, | |
| { | |
| "epoch": 4.792905581637976, | |
| "grad_norm": 0.40840875554472666, | |
| "learning_rate": 1.0963309326839708e-05, | |
| "loss": 0.1361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07146067917346954, | |
| "step": 4595, | |
| "valid_targets_mean": 6978.2, | |
| "valid_targets_min": 3281 | |
| }, | |
| { | |
| "epoch": 4.7981220657277, | |
| "grad_norm": 0.4472343476696256, | |
| "learning_rate": 1.0916946610382966e-05, | |
| "loss": 0.156, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1032462939620018, | |
| "step": 4600, | |
| "valid_targets_mean": 8108.8, | |
| "valid_targets_min": 6178 | |
| }, | |
| { | |
| "epoch": 4.803338549817423, | |
| "grad_norm": 0.4875549116662931, | |
| "learning_rate": 1.0870645305991772e-05, | |
| "loss": 0.1627, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07532303035259247, | |
| "step": 4605, | |
| "valid_targets_mean": 6134.0, | |
| "valid_targets_min": 5308 | |
| }, | |
| { | |
| "epoch": 4.808555033907147, | |
| "grad_norm": 0.4584139854510215, | |
| "learning_rate": 1.0824405726717119e-05, | |
| "loss": 0.1587, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07672050595283508, | |
| "step": 4610, | |
| "valid_targets_mean": 6336.0, | |
| "valid_targets_min": 4404 | |
| }, | |
| { | |
| "epoch": 4.81377151799687, | |
| "grad_norm": 0.4267556876547731, | |
| "learning_rate": 1.0778228185192639e-05, | |
| "loss": 0.1464, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06810172647237778, | |
| "step": 4615, | |
| "valid_targets_mean": 6028.5, | |
| "valid_targets_min": 4123 | |
| }, | |
| { | |
| "epoch": 4.818988002086593, | |
| "grad_norm": 0.49920288391673734, | |
| "learning_rate": 1.0732112993632539e-05, | |
| "loss": 0.1459, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06928777694702148, | |
| "step": 4620, | |
| "valid_targets_mean": 5918.5, | |
| "valid_targets_min": 4110 | |
| }, | |
| { | |
| "epoch": 4.824204486176317, | |
| "grad_norm": 0.822938187519566, | |
| "learning_rate": 1.0686060463829451e-05, | |
| "loss": 0.1529, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06522960215806961, | |
| "step": 4625, | |
| "valid_targets_mean": 5972.8, | |
| "valid_targets_min": 3722 | |
| }, | |
| { | |
| "epoch": 4.829420970266041, | |
| "grad_norm": 0.42326923858405024, | |
| "learning_rate": 1.0640070907152342e-05, | |
| "loss": 0.1594, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06937051564455032, | |
| "step": 4630, | |
| "valid_targets_mean": 5983.5, | |
| "valid_targets_min": 5322 | |
| }, | |
| { | |
| "epoch": 4.834637454355764, | |
| "grad_norm": 0.44361002733034566, | |
| "learning_rate": 1.0594144634544405e-05, | |
| "loss": 0.1468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07195259630680084, | |
| "step": 4635, | |
| "valid_targets_mean": 5483.4, | |
| "valid_targets_min": 4716 | |
| }, | |
| { | |
| "epoch": 4.839853938445488, | |
| "grad_norm": 0.5122695640943079, | |
| "learning_rate": 1.0548281956520978e-05, | |
| "loss": 0.1544, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.087204709649086, | |
| "step": 4640, | |
| "valid_targets_mean": 6165.9, | |
| "valid_targets_min": 4711 | |
| }, | |
| { | |
| "epoch": 4.845070422535211, | |
| "grad_norm": 0.4481850904494651, | |
| "learning_rate": 1.0502483183167395e-05, | |
| "loss": 0.1562, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06289017200469971, | |
| "step": 4645, | |
| "valid_targets_mean": 5928.9, | |
| "valid_targets_min": 4670 | |
| }, | |
| { | |
| "epoch": 4.850286906624935, | |
| "grad_norm": 0.4172783884554342, | |
| "learning_rate": 1.0456748624136951e-05, | |
| "loss": 0.1324, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07310999929904938, | |
| "step": 4650, | |
| "valid_targets_mean": 6224.0, | |
| "valid_targets_min": 4852 | |
| }, | |
| { | |
| "epoch": 4.8555033907146585, | |
| "grad_norm": 0.4238198220409301, | |
| "learning_rate": 1.0411078588648756e-05, | |
| "loss": 0.1276, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07028134912252426, | |
| "step": 4655, | |
| "valid_targets_mean": 5841.4, | |
| "valid_targets_min": 4162 | |
| }, | |
| { | |
| "epoch": 4.860719874804381, | |
| "grad_norm": 0.3975514559271428, | |
| "learning_rate": 1.0365473385485662e-05, | |
| "loss": 0.1395, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07043386250734329, | |
| "step": 4660, | |
| "valid_targets_mean": 7206.6, | |
| "valid_targets_min": 4924 | |
| }, | |
| { | |
| "epoch": 4.865936358894105, | |
| "grad_norm": 0.4463947737511925, | |
| "learning_rate": 1.0319933322992206e-05, | |
| "loss": 0.1455, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06930739432573318, | |
| "step": 4665, | |
| "valid_targets_mean": 5832.1, | |
| "valid_targets_min": 3853 | |
| }, | |
| { | |
| "epoch": 4.871152842983829, | |
| "grad_norm": 0.5046807073448807, | |
| "learning_rate": 1.0274458709072459e-05, | |
| "loss": 0.168, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07562737911939621, | |
| "step": 4670, | |
| "valid_targets_mean": 5730.9, | |
| "valid_targets_min": 4650 | |
| }, | |
| { | |
| "epoch": 4.876369327073553, | |
| "grad_norm": 0.44543822960701385, | |
| "learning_rate": 1.022904985118803e-05, | |
| "loss": 0.1787, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08253324776887894, | |
| "step": 4675, | |
| "valid_targets_mean": 6447.1, | |
| "valid_targets_min": 5258 | |
| }, | |
| { | |
| "epoch": 4.881585811163276, | |
| "grad_norm": 0.4768986536116351, | |
| "learning_rate": 1.0183707056355883e-05, | |
| "loss": 0.1467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07737173140048981, | |
| "step": 4680, | |
| "valid_targets_mean": 5665.6, | |
| "valid_targets_min": 4873 | |
| }, | |
| { | |
| "epoch": 4.886802295252999, | |
| "grad_norm": 0.45865311990037294, | |
| "learning_rate": 1.0138430631146372e-05, | |
| "loss": 0.1548, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08142006397247314, | |
| "step": 4685, | |
| "valid_targets_mean": 5372.9, | |
| "valid_targets_min": 4540 | |
| }, | |
| { | |
| "epoch": 4.892018779342723, | |
| "grad_norm": 0.46062456414985725, | |
| "learning_rate": 1.009322088168108e-05, | |
| "loss": 0.1496, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08761345595121384, | |
| "step": 4690, | |
| "valid_targets_mean": 6109.0, | |
| "valid_targets_min": 4826 | |
| }, | |
| { | |
| "epoch": 4.897235263432447, | |
| "grad_norm": 0.4353587500407794, | |
| "learning_rate": 1.0048078113630806e-05, | |
| "loss": 0.1727, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08518470823764801, | |
| "step": 4695, | |
| "valid_targets_mean": 7653.9, | |
| "valid_targets_min": 5227 | |
| }, | |
| { | |
| "epoch": 4.90245174752217, | |
| "grad_norm": 0.42999083186403564, | |
| "learning_rate": 1.0003002632213455e-05, | |
| "loss": 0.155, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07076172530651093, | |
| "step": 4700, | |
| "valid_targets_mean": 6947.5, | |
| "valid_targets_min": 4757 | |
| }, | |
| { | |
| "epoch": 4.907668231611893, | |
| "grad_norm": 0.4462430869480212, | |
| "learning_rate": 9.95799474219202e-06, | |
| "loss": 0.1571, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0766562819480896, | |
| "step": 4705, | |
| "valid_targets_mean": 5976.1, | |
| "valid_targets_min": 4475 | |
| }, | |
| { | |
| "epoch": 4.912884715701617, | |
| "grad_norm": 0.4466783821385786, | |
| "learning_rate": 9.913054747872473e-06, | |
| "loss": 0.161, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07428234815597534, | |
| "step": 4710, | |
| "valid_targets_mean": 6807.8, | |
| "valid_targets_min": 5634 | |
| }, | |
| { | |
| "epoch": 4.918101199791341, | |
| "grad_norm": 0.42665413600401636, | |
| "learning_rate": 9.868182953101754e-06, | |
| "loss": 0.1398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07369183003902435, | |
| "step": 4715, | |
| "valid_targets_mean": 6153.1, | |
| "valid_targets_min": 3621 | |
| }, | |
| { | |
| "epoch": 4.923317683881065, | |
| "grad_norm": 0.39086367604381295, | |
| "learning_rate": 9.823379661265677e-06, | |
| "loss": 0.1514, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07054505497217178, | |
| "step": 4720, | |
| "valid_targets_mean": 6605.1, | |
| "valid_targets_min": 4758 | |
| }, | |
| { | |
| "epoch": 4.9285341679707875, | |
| "grad_norm": 0.40723486782651813, | |
| "learning_rate": 9.778645175286904e-06, | |
| "loss": 0.1954, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07002904266119003, | |
| "step": 4725, | |
| "valid_targets_mean": 5819.9, | |
| "valid_targets_min": 4140 | |
| }, | |
| { | |
| "epoch": 4.933750652060511, | |
| "grad_norm": 0.4451297378754627, | |
| "learning_rate": 9.733979797622874e-06, | |
| "loss": 0.1427, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06993333250284195, | |
| "step": 4730, | |
| "valid_targets_mean": 5897.2, | |
| "valid_targets_min": 5062 | |
| }, | |
| { | |
| "epoch": 4.938967136150235, | |
| "grad_norm": 0.46426532407228227, | |
| "learning_rate": 9.689383830263808e-06, | |
| "loss": 0.1559, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07636301219463348, | |
| "step": 4735, | |
| "valid_targets_mean": 5454.0, | |
| "valid_targets_min": 4267 | |
| }, | |
| { | |
| "epoch": 4.944183620239958, | |
| "grad_norm": 0.46954032339010526, | |
| "learning_rate": 9.6448575747306e-06, | |
| "loss": 0.1599, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08528496325016022, | |
| "step": 4740, | |
| "valid_targets_mean": 6005.0, | |
| "valid_targets_min": 4615 | |
| }, | |
| { | |
| "epoch": 4.949400104329682, | |
| "grad_norm": 0.4152092165140548, | |
| "learning_rate": 9.60040133207281e-06, | |
| "loss": 0.1429, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06312207877635956, | |
| "step": 4745, | |
| "valid_targets_mean": 6020.8, | |
| "valid_targets_min": 5087 | |
| }, | |
| { | |
| "epoch": 4.954616588419405, | |
| "grad_norm": 0.437181609081802, | |
| "learning_rate": 9.556015402866661e-06, | |
| "loss": 0.1402, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08271750807762146, | |
| "step": 4750, | |
| "valid_targets_mean": 6847.1, | |
| "valid_targets_min": 3685 | |
| }, | |
| { | |
| "epoch": 4.959833072509129, | |
| "grad_norm": 0.3971411009242725, | |
| "learning_rate": 9.511700087212934e-06, | |
| "loss": 0.1398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05720261484384537, | |
| "step": 4755, | |
| "valid_targets_mean": 5624.6, | |
| "valid_targets_min": 3437 | |
| }, | |
| { | |
| "epoch": 4.965049556598853, | |
| "grad_norm": 0.433044273243943, | |
| "learning_rate": 9.467455684735015e-06, | |
| "loss": 0.1552, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07429549843072891, | |
| "step": 4760, | |
| "valid_targets_mean": 7199.6, | |
| "valid_targets_min": 5647 | |
| }, | |
| { | |
| "epoch": 4.970266040688576, | |
| "grad_norm": 0.40575815755964584, | |
| "learning_rate": 9.423282494576804e-06, | |
| "loss": 0.1439, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07480494678020477, | |
| "step": 4765, | |
| "valid_targets_mean": 7090.4, | |
| "valid_targets_min": 4651 | |
| }, | |
| { | |
| "epoch": 4.975482524778299, | |
| "grad_norm": 0.3878880270451748, | |
| "learning_rate": 9.379180815400753e-06, | |
| "loss": 0.1472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06863409280776978, | |
| "step": 4770, | |
| "valid_targets_mean": 6706.9, | |
| "valid_targets_min": 5050 | |
| }, | |
| { | |
| "epoch": 4.980699008868023, | |
| "grad_norm": 0.40087933009836835, | |
| "learning_rate": 9.33515094538579e-06, | |
| "loss": 0.1543, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08148118853569031, | |
| "step": 4775, | |
| "valid_targets_mean": 7697.8, | |
| "valid_targets_min": 6388 | |
| }, | |
| { | |
| "epoch": 4.985915492957746, | |
| "grad_norm": 0.46626038518621354, | |
| "learning_rate": 9.291193182225341e-06, | |
| "loss": 0.1362, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07387973368167877, | |
| "step": 4780, | |
| "valid_targets_mean": 5380.2, | |
| "valid_targets_min": 3662 | |
| }, | |
| { | |
| "epoch": 4.99113197704747, | |
| "grad_norm": 0.4116320350888051, | |
| "learning_rate": 9.247307823125296e-06, | |
| "loss": 0.1557, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07387221604585648, | |
| "step": 4785, | |
| "valid_targets_mean": 6321.4, | |
| "valid_targets_min": 5334 | |
| }, | |
| { | |
| "epoch": 4.9963484611371936, | |
| "grad_norm": 0.4574827602329708, | |
| "learning_rate": 9.203495164802027e-06, | |
| "loss": 0.158, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07320703566074371, | |
| "step": 4790, | |
| "valid_targets_mean": 5324.9, | |
| "valid_targets_min": 3545 | |
| }, | |
| { | |
| "epoch": 5.001043296817945, | |
| "grad_norm": 0.717721948880893, | |
| "learning_rate": 9.159755503480345e-06, | |
| "loss": 0.1664, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10447976738214493, | |
| "step": 4795, | |
| "valid_targets_mean": 2562.9, | |
| "valid_targets_min": 954 | |
| }, | |
| { | |
| "epoch": 5.006259780907668, | |
| "grad_norm": 0.6113719627466943, | |
| "learning_rate": 9.116089134891532e-06, | |
| "loss": 0.2208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09623823314905167, | |
| "step": 4800, | |
| "valid_targets_mean": 3426.9, | |
| "valid_targets_min": 533 | |
| }, | |
| { | |
| "epoch": 5.011476264997392, | |
| "grad_norm": 0.6331028067618968, | |
| "learning_rate": 9.072496354271307e-06, | |
| "loss": 0.218, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08945457637310028, | |
| "step": 4805, | |
| "valid_targets_mean": 3435.4, | |
| "valid_targets_min": 1752 | |
| }, | |
| { | |
| "epoch": 5.016692749087115, | |
| "grad_norm": 0.6251710648914875, | |
| "learning_rate": 9.028977456357872e-06, | |
| "loss": 0.2152, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12705007195472717, | |
| "step": 4810, | |
| "valid_targets_mean": 3926.2, | |
| "valid_targets_min": 777 | |
| }, | |
| { | |
| "epoch": 5.021909233176839, | |
| "grad_norm": 0.5787296244242508, | |
| "learning_rate": 8.985532735389873e-06, | |
| "loss": 0.209, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09727857261896133, | |
| "step": 4815, | |
| "valid_targets_mean": 3980.1, | |
| "valid_targets_min": 3308 | |
| }, | |
| { | |
| "epoch": 5.027125717266562, | |
| "grad_norm": 0.5982947775694556, | |
| "learning_rate": 8.942162485104436e-06, | |
| "loss": 0.2242, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10972978174686432, | |
| "step": 4820, | |
| "valid_targets_mean": 4755.1, | |
| "valid_targets_min": 1432 | |
| }, | |
| { | |
| "epoch": 5.032342201356286, | |
| "grad_norm": 0.5382905699165589, | |
| "learning_rate": 8.898866998735195e-06, | |
| "loss": 0.2161, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09513285011053085, | |
| "step": 4825, | |
| "valid_targets_mean": 4481.8, | |
| "valid_targets_min": 1541 | |
| }, | |
| { | |
| "epoch": 5.037558685446009, | |
| "grad_norm": 0.5555370263633436, | |
| "learning_rate": 8.85564656901028e-06, | |
| "loss": 0.2156, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14512521028518677, | |
| "step": 4830, | |
| "valid_targets_mean": 6505.9, | |
| "valid_targets_min": 4293 | |
| }, | |
| { | |
| "epoch": 5.042775169535733, | |
| "grad_norm": 0.580637776024558, | |
| "learning_rate": 8.81250148815035e-06, | |
| "loss": 0.2132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07656830549240112, | |
| "step": 4835, | |
| "valid_targets_mean": 3263.4, | |
| "valid_targets_min": 1732 | |
| }, | |
| { | |
| "epoch": 5.0479916536254565, | |
| "grad_norm": 0.5800100543225436, | |
| "learning_rate": 8.769432047866608e-06, | |
| "loss": 0.2248, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09634354710578918, | |
| "step": 4840, | |
| "valid_targets_mean": 4176.4, | |
| "valid_targets_min": 2309 | |
| }, | |
| { | |
| "epoch": 5.05320813771518, | |
| "grad_norm": 0.6452112835714812, | |
| "learning_rate": 8.72643853935887e-06, | |
| "loss": 0.2215, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11466588079929352, | |
| "step": 4845, | |
| "valid_targets_mean": 4178.6, | |
| "valid_targets_min": 2823 | |
| }, | |
| { | |
| "epoch": 5.058424621804903, | |
| "grad_norm": 0.558809072424868, | |
| "learning_rate": 8.683521253313527e-06, | |
| "loss": 0.2109, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1106732189655304, | |
| "step": 4850, | |
| "valid_targets_mean": 7147.0, | |
| "valid_targets_min": 2745 | |
| }, | |
| { | |
| "epoch": 5.063641105894627, | |
| "grad_norm": 0.5497172241007894, | |
| "learning_rate": 8.640680479901648e-06, | |
| "loss": 0.2189, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10635042190551758, | |
| "step": 4855, | |
| "valid_targets_mean": 4228.4, | |
| "valid_targets_min": 1329 | |
| }, | |
| { | |
| "epoch": 5.068857589984351, | |
| "grad_norm": 0.6253489466918133, | |
| "learning_rate": 8.597916508776958e-06, | |
| "loss": 0.2126, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09799353033304214, | |
| "step": 4860, | |
| "valid_targets_mean": 3960.5, | |
| "valid_targets_min": 1412 | |
| }, | |
| { | |
| "epoch": 5.074074074074074, | |
| "grad_norm": 0.690438534288466, | |
| "learning_rate": 8.55522962907394e-06, | |
| "loss": 0.2093, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0879107192158699, | |
| "step": 4865, | |
| "valid_targets_mean": 2968.0, | |
| "valid_targets_min": 1334 | |
| }, | |
| { | |
| "epoch": 5.079290558163797, | |
| "grad_norm": 0.5880117717989857, | |
| "learning_rate": 8.512620129405816e-06, | |
| "loss": 0.2142, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09974095970392227, | |
| "step": 4870, | |
| "valid_targets_mean": 4079.0, | |
| "valid_targets_min": 1762 | |
| }, | |
| { | |
| "epoch": 5.084507042253521, | |
| "grad_norm": 0.6461700357169372, | |
| "learning_rate": 8.470088297862669e-06, | |
| "loss": 0.2008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10750432312488556, | |
| "step": 4875, | |
| "valid_targets_mean": 4510.4, | |
| "valid_targets_min": 2458 | |
| }, | |
| { | |
| "epoch": 5.089723526343245, | |
| "grad_norm": 0.6289769194704092, | |
| "learning_rate": 8.427634422009399e-06, | |
| "loss": 0.2118, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13655385375022888, | |
| "step": 4880, | |
| "valid_targets_mean": 4449.2, | |
| "valid_targets_min": 1058 | |
| }, | |
| { | |
| "epoch": 5.0949400104329685, | |
| "grad_norm": 0.6389529360882062, | |
| "learning_rate": 8.385258788883889e-06, | |
| "loss": 0.2195, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11452177166938782, | |
| "step": 4885, | |
| "valid_targets_mean": 3653.0, | |
| "valid_targets_min": 1830 | |
| }, | |
| { | |
| "epoch": 5.100156494522691, | |
| "grad_norm": 0.6231310597996599, | |
| "learning_rate": 8.342961684994975e-06, | |
| "loss": 0.2142, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10713696479797363, | |
| "step": 4890, | |
| "valid_targets_mean": 3991.8, | |
| "valid_targets_min": 671 | |
| }, | |
| { | |
| "epoch": 5.105372978612415, | |
| "grad_norm": 0.6821158960173235, | |
| "learning_rate": 8.300743396320566e-06, | |
| "loss": 0.2156, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08912687003612518, | |
| "step": 4895, | |
| "valid_targets_mean": 2786.0, | |
| "valid_targets_min": 1291 | |
| }, | |
| { | |
| "epoch": 5.110589462702139, | |
| "grad_norm": 0.6436205650765777, | |
| "learning_rate": 8.25860420830567e-06, | |
| "loss": 0.1973, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07429733127355576, | |
| "step": 4900, | |
| "valid_targets_mean": 2281.9, | |
| "valid_targets_min": 1133 | |
| }, | |
| { | |
| "epoch": 5.115805946791863, | |
| "grad_norm": 0.6362175996781257, | |
| "learning_rate": 8.216544405860482e-06, | |
| "loss": 0.2069, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11864350736141205, | |
| "step": 4905, | |
| "valid_targets_mean": 4559.8, | |
| "valid_targets_min": 2259 | |
| }, | |
| { | |
| "epoch": 5.1210224308815855, | |
| "grad_norm": 0.6177029609843017, | |
| "learning_rate": 8.17456427335848e-06, | |
| "loss": 0.1875, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10711812973022461, | |
| "step": 4910, | |
| "valid_targets_mean": 4101.8, | |
| "valid_targets_min": 2223 | |
| }, | |
| { | |
| "epoch": 5.126238914971309, | |
| "grad_norm": 0.7079737093542273, | |
| "learning_rate": 8.132664094634452e-06, | |
| "loss": 0.2171, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09318976104259491, | |
| "step": 4915, | |
| "valid_targets_mean": 4128.5, | |
| "valid_targets_min": 1957 | |
| }, | |
| { | |
| "epoch": 5.131455399061033, | |
| "grad_norm": 0.6922066079713052, | |
| "learning_rate": 8.090844152982628e-06, | |
| "loss": 0.2113, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15240296721458435, | |
| "step": 4920, | |
| "valid_targets_mean": 4823.2, | |
| "valid_targets_min": 3425 | |
| }, | |
| { | |
| "epoch": 5.136671883150757, | |
| "grad_norm": 0.6694272210841654, | |
| "learning_rate": 8.049104731154722e-06, | |
| "loss": 0.198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09714214503765106, | |
| "step": 4925, | |
| "valid_targets_mean": 3564.6, | |
| "valid_targets_min": 2353 | |
| }, | |
| { | |
| "epoch": 5.14188836724048, | |
| "grad_norm": 0.6820829154087947, | |
| "learning_rate": 8.007446111358066e-06, | |
| "loss": 0.2051, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09757177531719208, | |
| "step": 4930, | |
| "valid_targets_mean": 3194.1, | |
| "valid_targets_min": 1009 | |
| }, | |
| { | |
| "epoch": 5.147104851330203, | |
| "grad_norm": 0.7085397237695232, | |
| "learning_rate": 7.965868575253632e-06, | |
| "loss": 0.1975, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11103971302509308, | |
| "step": 4935, | |
| "valid_targets_mean": 3638.1, | |
| "valid_targets_min": 1717 | |
| }, | |
| { | |
| "epoch": 5.152321335419927, | |
| "grad_norm": 0.6027176977604475, | |
| "learning_rate": 7.92437240395422e-06, | |
| "loss": 0.2029, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09024336189031601, | |
| "step": 4940, | |
| "valid_targets_mean": 3436.4, | |
| "valid_targets_min": 1847 | |
| }, | |
| { | |
| "epoch": 5.157537819509651, | |
| "grad_norm": 0.7360145615209331, | |
| "learning_rate": 7.882957878022472e-06, | |
| "loss": 0.2079, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11444054543972015, | |
| "step": 4945, | |
| "valid_targets_mean": 3374.8, | |
| "valid_targets_min": 1190 | |
| }, | |
| { | |
| "epoch": 5.162754303599374, | |
| "grad_norm": 0.7247843546491468, | |
| "learning_rate": 7.841625277469043e-06, | |
| "loss": 0.1999, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11292405426502228, | |
| "step": 4950, | |
| "valid_targets_mean": 3286.4, | |
| "valid_targets_min": 571 | |
| }, | |
| { | |
| "epoch": 5.1679707876890975, | |
| "grad_norm": 0.6867621302206789, | |
| "learning_rate": 7.800374881750644e-06, | |
| "loss": 0.2021, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10789409279823303, | |
| "step": 4955, | |
| "valid_targets_mean": 3616.6, | |
| "valid_targets_min": 2175 | |
| }, | |
| { | |
| "epoch": 5.173187271778821, | |
| "grad_norm": 0.6150471513972822, | |
| "learning_rate": 7.759206969768216e-06, | |
| "loss": 0.2038, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10389900207519531, | |
| "step": 4960, | |
| "valid_targets_mean": 4052.4, | |
| "valid_targets_min": 3176 | |
| }, | |
| { | |
| "epoch": 5.178403755868545, | |
| "grad_norm": 0.6567774876003658, | |
| "learning_rate": 7.718121819864983e-06, | |
| "loss": 0.1957, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09662721306085587, | |
| "step": 4965, | |
| "valid_targets_mean": 3447.2, | |
| "valid_targets_min": 513 | |
| }, | |
| { | |
| "epoch": 5.183620239958268, | |
| "grad_norm": 0.6383848055012892, | |
| "learning_rate": 7.677119709824635e-06, | |
| "loss": 0.198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10547171533107758, | |
| "step": 4970, | |
| "valid_targets_mean": 4041.1, | |
| "valid_targets_min": 2558 | |
| }, | |
| { | |
| "epoch": 5.188836724047992, | |
| "grad_norm": 0.6722501717129034, | |
| "learning_rate": 7.636200916869387e-06, | |
| "loss": 0.2103, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09563149511814117, | |
| "step": 4975, | |
| "valid_targets_mean": 3804.2, | |
| "valid_targets_min": 2295 | |
| }, | |
| { | |
| "epoch": 5.194053208137715, | |
| "grad_norm": 0.6773514137214155, | |
| "learning_rate": 7.595365717658143e-06, | |
| "loss": 0.2041, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09137743711471558, | |
| "step": 4980, | |
| "valid_targets_mean": 3320.9, | |
| "valid_targets_min": 1860 | |
| }, | |
| { | |
| "epoch": 5.199269692227439, | |
| "grad_norm": 0.695252279487759, | |
| "learning_rate": 7.554614388284609e-06, | |
| "loss": 0.2041, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1351241022348404, | |
| "step": 4985, | |
| "valid_targets_mean": 5827.2, | |
| "valid_targets_min": 2495 | |
| }, | |
| { | |
| "epoch": 5.204486176317162, | |
| "grad_norm": 0.5944804418423317, | |
| "learning_rate": 7.513947204275453e-06, | |
| "loss": 0.2004, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08687921613454819, | |
| "step": 4990, | |
| "valid_targets_mean": 3450.2, | |
| "valid_targets_min": 1292 | |
| }, | |
| { | |
| "epoch": 5.209702660406886, | |
| "grad_norm": 0.7018626472923897, | |
| "learning_rate": 7.473364440588404e-06, | |
| "loss": 0.2118, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10209470987319946, | |
| "step": 4995, | |
| "valid_targets_mean": 3142.2, | |
| "valid_targets_min": 1265 | |
| }, | |
| { | |
| "epoch": 5.214919144496609, | |
| "grad_norm": 0.6369459060455145, | |
| "learning_rate": 7.432866371610403e-06, | |
| "loss": 0.1956, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10971283167600632, | |
| "step": 5000, | |
| "valid_targets_mean": 4196.9, | |
| "valid_targets_min": 1757 | |
| }, | |
| { | |
| "epoch": 5.220135628586333, | |
| "grad_norm": 0.6878890791059945, | |
| "learning_rate": 7.392453271155786e-06, | |
| "loss": 0.2085, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10959449410438538, | |
| "step": 5005, | |
| "valid_targets_mean": 4234.8, | |
| "valid_targets_min": 959 | |
| }, | |
| { | |
| "epoch": 5.225352112676056, | |
| "grad_norm": 0.6643401130448944, | |
| "learning_rate": 7.352125412464368e-06, | |
| "loss": 0.2027, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08521958440542221, | |
| "step": 5010, | |
| "valid_targets_mean": 3103.6, | |
| "valid_targets_min": 1097 | |
| }, | |
| { | |
| "epoch": 5.23056859676578, | |
| "grad_norm": 0.6415675065935869, | |
| "learning_rate": 7.311883068199659e-06, | |
| "loss": 0.1984, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10025811195373535, | |
| "step": 5015, | |
| "valid_targets_mean": 3483.9, | |
| "valid_targets_min": 1833 | |
| }, | |
| { | |
| "epoch": 5.2357850808555035, | |
| "grad_norm": 0.7068055592574838, | |
| "learning_rate": 7.271726510446968e-06, | |
| "loss": 0.1975, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10649476945400238, | |
| "step": 5020, | |
| "valid_targets_mean": 3176.0, | |
| "valid_targets_min": 1200 | |
| }, | |
| { | |
| "epoch": 5.241001564945227, | |
| "grad_norm": 0.6500644599129355, | |
| "learning_rate": 7.231656010711609e-06, | |
| "loss": 0.2049, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09014496207237244, | |
| "step": 5025, | |
| "valid_targets_mean": 4101.2, | |
| "valid_targets_min": 2135 | |
| }, | |
| { | |
| "epoch": 5.24621804903495, | |
| "grad_norm": 0.7031295513095206, | |
| "learning_rate": 7.191671839917025e-06, | |
| "loss": 0.1978, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10667258501052856, | |
| "step": 5030, | |
| "valid_targets_mean": 3469.1, | |
| "valid_targets_min": 2518 | |
| }, | |
| { | |
| "epoch": 5.251434533124674, | |
| "grad_norm": 0.6489606424873458, | |
| "learning_rate": 7.15177426840298e-06, | |
| "loss": 0.2118, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09300978481769562, | |
| "step": 5035, | |
| "valid_targets_mean": 3631.1, | |
| "valid_targets_min": 2182 | |
| }, | |
| { | |
| "epoch": 5.256651017214398, | |
| "grad_norm": 0.6801978162229362, | |
| "learning_rate": 7.111963565923723e-06, | |
| "loss": 0.1995, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10606791079044342, | |
| "step": 5040, | |
| "valid_targets_mean": 3587.1, | |
| "valid_targets_min": 1361 | |
| }, | |
| { | |
| "epoch": 5.261867501304121, | |
| "grad_norm": 0.6297712081904472, | |
| "learning_rate": 7.07224000164618e-06, | |
| "loss": 0.2056, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0922778993844986, | |
| "step": 5045, | |
| "valid_targets_mean": 3687.1, | |
| "valid_targets_min": 1685 | |
| }, | |
| { | |
| "epoch": 5.267083985393844, | |
| "grad_norm": 0.740405687798328, | |
| "learning_rate": 7.032603844148098e-06, | |
| "loss": 0.2036, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0993020236492157, | |
| "step": 5050, | |
| "valid_targets_mean": 2796.0, | |
| "valid_targets_min": 474 | |
| }, | |
| { | |
| "epoch": 5.272300469483568, | |
| "grad_norm": 0.6884709056067042, | |
| "learning_rate": 6.993055361416281e-06, | |
| "loss": 0.2019, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09852635860443115, | |
| "step": 5055, | |
| "valid_targets_mean": 3220.1, | |
| "valid_targets_min": 2200 | |
| }, | |
| { | |
| "epoch": 5.277516953573292, | |
| "grad_norm": 0.6236133621236264, | |
| "learning_rate": 6.953594820844725e-06, | |
| "loss": 0.1928, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10870516300201416, | |
| "step": 5060, | |
| "valid_targets_mean": 4378.4, | |
| "valid_targets_min": 2241 | |
| }, | |
| { | |
| "epoch": 5.2827334376630155, | |
| "grad_norm": 0.6392810348348091, | |
| "learning_rate": 6.914222489232834e-06, | |
| "loss": 0.1948, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12641771137714386, | |
| "step": 5065, | |
| "valid_targets_mean": 4750.9, | |
| "valid_targets_min": 2623 | |
| }, | |
| { | |
| "epoch": 5.287949921752738, | |
| "grad_norm": 0.5959046891326175, | |
| "learning_rate": 6.874938632783639e-06, | |
| "loss": 0.1807, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08762572705745697, | |
| "step": 5070, | |
| "valid_targets_mean": 4176.5, | |
| "valid_targets_min": 1387 | |
| }, | |
| { | |
| "epoch": 5.293166405842462, | |
| "grad_norm": 0.6569941497873603, | |
| "learning_rate": 6.835743517101947e-06, | |
| "loss": 0.2143, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10848087817430496, | |
| "step": 5075, | |
| "valid_targets_mean": 3480.9, | |
| "valid_targets_min": 1220 | |
| }, | |
| { | |
| "epoch": 5.298382889932186, | |
| "grad_norm": 0.6454631626465558, | |
| "learning_rate": 6.796637407192608e-06, | |
| "loss": 0.1981, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10460580140352249, | |
| "step": 5080, | |
| "valid_targets_mean": 4020.6, | |
| "valid_targets_min": 1988 | |
| }, | |
| { | |
| "epoch": 5.30359937402191, | |
| "grad_norm": 0.6580002877249211, | |
| "learning_rate": 6.7576205674586405e-06, | |
| "loss": 0.2058, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10093130171298981, | |
| "step": 5085, | |
| "valid_targets_mean": 4179.1, | |
| "valid_targets_min": 2913 | |
| }, | |
| { | |
| "epoch": 5.3088158581116325, | |
| "grad_norm": 0.759324580704283, | |
| "learning_rate": 6.718693261699542e-06, | |
| "loss": 0.199, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11457392573356628, | |
| "step": 5090, | |
| "valid_targets_mean": 2762.0, | |
| "valid_targets_min": 1608 | |
| }, | |
| { | |
| "epoch": 5.314032342201356, | |
| "grad_norm": 0.6842695438203094, | |
| "learning_rate": 6.679855753109419e-06, | |
| "loss": 0.2121, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0984582006931305, | |
| "step": 5095, | |
| "valid_targets_mean": 3711.9, | |
| "valid_targets_min": 1091 | |
| }, | |
| { | |
| "epoch": 5.31924882629108, | |
| "grad_norm": 0.6646955473874829, | |
| "learning_rate": 6.64110830427527e-06, | |
| "loss": 0.2041, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1032104343175888, | |
| "step": 5100, | |
| "valid_targets_mean": 3489.2, | |
| "valid_targets_min": 1344 | |
| }, | |
| { | |
| "epoch": 5.324465310380804, | |
| "grad_norm": 0.6954391538493911, | |
| "learning_rate": 6.602451177175162e-06, | |
| "loss": 0.1941, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1080797016620636, | |
| "step": 5105, | |
| "valid_targets_mean": 3501.9, | |
| "valid_targets_min": 2528 | |
| }, | |
| { | |
| "epoch": 5.329681794470527, | |
| "grad_norm": 0.663859011008518, | |
| "learning_rate": 6.563884633176505e-06, | |
| "loss": 0.1908, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07488427311182022, | |
| "step": 5110, | |
| "valid_targets_mean": 2761.8, | |
| "valid_targets_min": 854 | |
| }, | |
| { | |
| "epoch": 5.33489827856025, | |
| "grad_norm": 0.7094006431270186, | |
| "learning_rate": 6.5254089330342366e-06, | |
| "loss": 0.192, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09925274550914764, | |
| "step": 5115, | |
| "valid_targets_mean": 3061.0, | |
| "valid_targets_min": 1369 | |
| }, | |
| { | |
| "epoch": 5.340114762649974, | |
| "grad_norm": 0.6445464660896773, | |
| "learning_rate": 6.487024336889107e-06, | |
| "loss": 0.184, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08848898857831955, | |
| "step": 5120, | |
| "valid_targets_mean": 3701.0, | |
| "valid_targets_min": 1703 | |
| }, | |
| { | |
| "epoch": 5.345331246739698, | |
| "grad_norm": 0.6706659177848282, | |
| "learning_rate": 6.448731104265871e-06, | |
| "loss": 0.1865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09668239951133728, | |
| "step": 5125, | |
| "valid_targets_mean": 3942.5, | |
| "valid_targets_min": 1524 | |
| }, | |
| { | |
| "epoch": 5.350547730829421, | |
| "grad_norm": 0.7009380008128605, | |
| "learning_rate": 6.410529494071596e-06, | |
| "loss": 0.1783, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09818179905414581, | |
| "step": 5130, | |
| "valid_targets_mean": 3333.9, | |
| "valid_targets_min": 2054 | |
| }, | |
| { | |
| "epoch": 5.3557642149191445, | |
| "grad_norm": 0.6532330129511432, | |
| "learning_rate": 6.372419764593825e-06, | |
| "loss": 0.1894, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0966290533542633, | |
| "step": 5135, | |
| "valid_targets_mean": 3478.1, | |
| "valid_targets_min": 914 | |
| }, | |
| { | |
| "epoch": 5.360980699008868, | |
| "grad_norm": 0.7204511552929059, | |
| "learning_rate": 6.334402173498926e-06, | |
| "loss": 0.2178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11823828518390656, | |
| "step": 5140, | |
| "valid_targets_mean": 4058.5, | |
| "valid_targets_min": 2093 | |
| }, | |
| { | |
| "epoch": 5.366197183098592, | |
| "grad_norm": 0.7558805126100967, | |
| "learning_rate": 6.296476977830272e-06, | |
| "loss": 0.1881, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10335324704647064, | |
| "step": 5145, | |
| "valid_targets_mean": 3423.1, | |
| "valid_targets_min": 1621 | |
| }, | |
| { | |
| "epoch": 5.371413667188315, | |
| "grad_norm": 0.6516664978442213, | |
| "learning_rate": 6.2586444340065625e-06, | |
| "loss": 0.1965, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08896977454423904, | |
| "step": 5150, | |
| "valid_targets_mean": 4211.0, | |
| "valid_targets_min": 2947 | |
| }, | |
| { | |
| "epoch": 5.376630151278039, | |
| "grad_norm": 0.7038302074062533, | |
| "learning_rate": 6.22090479782004e-06, | |
| "loss": 0.193, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08615154772996902, | |
| "step": 5155, | |
| "valid_targets_mean": 4113.5, | |
| "valid_targets_min": 1789 | |
| }, | |
| { | |
| "epoch": 5.381846635367762, | |
| "grad_norm": 0.8280512067294838, | |
| "learning_rate": 6.18325832443478e-06, | |
| "loss": 0.1956, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11172784864902496, | |
| "step": 5160, | |
| "valid_targets_mean": 4228.5, | |
| "valid_targets_min": 1586 | |
| }, | |
| { | |
| "epoch": 5.387063119457486, | |
| "grad_norm": 0.7171675568501169, | |
| "learning_rate": 6.145705268384996e-06, | |
| "loss": 0.1924, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10773415863513947, | |
| "step": 5165, | |
| "valid_targets_mean": 3750.8, | |
| "valid_targets_min": 2542 | |
| }, | |
| { | |
| "epoch": 5.392279603547209, | |
| "grad_norm": 0.9927910504672022, | |
| "learning_rate": 6.108245883573258e-06, | |
| "loss": 0.1865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09137014299631119, | |
| "step": 5170, | |
| "valid_targets_mean": 4209.8, | |
| "valid_targets_min": 2513 | |
| }, | |
| { | |
| "epoch": 5.397496087636933, | |
| "grad_norm": 0.6227528388792244, | |
| "learning_rate": 6.070880423268839e-06, | |
| "loss": 0.1971, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08537808805704117, | |
| "step": 5175, | |
| "valid_targets_mean": 4118.8, | |
| "valid_targets_min": 2156 | |
| }, | |
| { | |
| "epoch": 5.402712571726656, | |
| "grad_norm": 0.6901317852518757, | |
| "learning_rate": 6.033609140105949e-06, | |
| "loss": 0.1884, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08215728402137756, | |
| "step": 5180, | |
| "valid_targets_mean": 3040.5, | |
| "valid_targets_min": 1423 | |
| }, | |
| { | |
| "epoch": 5.40792905581638, | |
| "grad_norm": 0.7727298594907129, | |
| "learning_rate": 5.996432286082061e-06, | |
| "loss": 0.192, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10859899967908859, | |
| "step": 5185, | |
| "valid_targets_mean": 3219.5, | |
| "valid_targets_min": 2126 | |
| }, | |
| { | |
| "epoch": 5.413145539906103, | |
| "grad_norm": 0.723698451511629, | |
| "learning_rate": 5.9593501125561885e-06, | |
| "loss": 0.2092, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10895063728094101, | |
| "step": 5190, | |
| "valid_targets_mean": 3743.2, | |
| "valid_targets_min": 1834 | |
| }, | |
| { | |
| "epoch": 5.418362023995827, | |
| "grad_norm": 0.6427752771966732, | |
| "learning_rate": 5.922362870247214e-06, | |
| "loss": 0.1975, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10220670700073242, | |
| "step": 5195, | |
| "valid_targets_mean": 4402.9, | |
| "valid_targets_min": 1571 | |
| }, | |
| { | |
| "epoch": 5.4235785080855505, | |
| "grad_norm": 0.6646852185375391, | |
| "learning_rate": 5.885470809232143e-06, | |
| "loss": 0.2087, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09629872441291809, | |
| "step": 5200, | |
| "valid_targets_mean": 3430.1, | |
| "valid_targets_min": 1055 | |
| }, | |
| { | |
| "epoch": 5.428794992175274, | |
| "grad_norm": 0.646672003268297, | |
| "learning_rate": 5.8486741789444804e-06, | |
| "loss": 0.1985, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09758952260017395, | |
| "step": 5205, | |
| "valid_targets_mean": 4407.6, | |
| "valid_targets_min": 1241 | |
| }, | |
| { | |
| "epoch": 5.434011476264997, | |
| "grad_norm": 0.6682684352038685, | |
| "learning_rate": 5.8119732281724715e-06, | |
| "loss": 0.2083, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11466312408447266, | |
| "step": 5210, | |
| "valid_targets_mean": 4384.6, | |
| "valid_targets_min": 2492 | |
| }, | |
| { | |
| "epoch": 5.439227960354721, | |
| "grad_norm": 0.7078823755280258, | |
| "learning_rate": 5.775368205057488e-06, | |
| "loss": 0.2026, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10158593952655792, | |
| "step": 5215, | |
| "valid_targets_mean": 4149.1, | |
| "valid_targets_min": 2664 | |
| }, | |
| { | |
| "epoch": 5.444444444444445, | |
| "grad_norm": 0.6441187272502343, | |
| "learning_rate": 5.738859357092297e-06, | |
| "loss": 0.2042, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08556175231933594, | |
| "step": 5220, | |
| "valid_targets_mean": 3506.6, | |
| "valid_targets_min": 1868 | |
| }, | |
| { | |
| "epoch": 5.449660928534168, | |
| "grad_norm": 0.6951671169669054, | |
| "learning_rate": 5.7024469311194095e-06, | |
| "loss": 0.198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1207038089632988, | |
| "step": 5225, | |
| "valid_targets_mean": 4142.5, | |
| "valid_targets_min": 2348 | |
| }, | |
| { | |
| "epoch": 5.454877412623891, | |
| "grad_norm": 0.7177289349892816, | |
| "learning_rate": 5.66613117332943e-06, | |
| "loss": 0.1963, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12148942053318024, | |
| "step": 5230, | |
| "valid_targets_mean": 4413.2, | |
| "valid_targets_min": 1878 | |
| }, | |
| { | |
| "epoch": 5.460093896713615, | |
| "grad_norm": 0.7623408442848661, | |
| "learning_rate": 5.629912329259355e-06, | |
| "loss": 0.2053, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09220965206623077, | |
| "step": 5235, | |
| "valid_targets_mean": 3051.0, | |
| "valid_targets_min": 484 | |
| }, | |
| { | |
| "epoch": 5.465310380803339, | |
| "grad_norm": 0.6678802522911089, | |
| "learning_rate": 5.593790643790935e-06, | |
| "loss": 0.1983, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09806862473487854, | |
| "step": 5240, | |
| "valid_targets_mean": 4212.6, | |
| "valid_targets_min": 2305 | |
| }, | |
| { | |
| "epoch": 5.470526864893062, | |
| "grad_norm": 0.6825715069529491, | |
| "learning_rate": 5.557766361149013e-06, | |
| "loss": 0.1934, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08274856209754944, | |
| "step": 5245, | |
| "valid_targets_mean": 3063.2, | |
| "valid_targets_min": 953 | |
| }, | |
| { | |
| "epoch": 5.475743348982785, | |
| "grad_norm": 0.623151298652376, | |
| "learning_rate": 5.521839724899887e-06, | |
| "loss": 0.1879, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0962090790271759, | |
| "step": 5250, | |
| "valid_targets_mean": 4380.8, | |
| "valid_targets_min": 1659 | |
| }, | |
| { | |
| "epoch": 5.480959833072509, | |
| "grad_norm": 0.6281386433235461, | |
| "learning_rate": 5.48601097794963e-06, | |
| "loss": 0.1962, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07585996389389038, | |
| "step": 5255, | |
| "valid_targets_mean": 3453.9, | |
| "valid_targets_min": 1258 | |
| }, | |
| { | |
| "epoch": 5.486176317162233, | |
| "grad_norm": 0.6288082594164385, | |
| "learning_rate": 5.450280362542495e-06, | |
| "loss": 0.1864, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09140726923942566, | |
| "step": 5260, | |
| "valid_targets_mean": 4094.1, | |
| "valid_targets_min": 2032 | |
| }, | |
| { | |
| "epoch": 5.491392801251957, | |
| "grad_norm": 0.6742010710103064, | |
| "learning_rate": 5.414648120259225e-06, | |
| "loss": 0.2044, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09272578358650208, | |
| "step": 5265, | |
| "valid_targets_mean": 3808.8, | |
| "valid_targets_min": 1464 | |
| }, | |
| { | |
| "epoch": 5.4966092853416795, | |
| "grad_norm": 0.7786129720045577, | |
| "learning_rate": 5.379114492015467e-06, | |
| "loss": 0.196, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09917710721492767, | |
| "step": 5270, | |
| "valid_targets_mean": 3471.8, | |
| "valid_targets_min": 1826 | |
| }, | |
| { | |
| "epoch": 5.501825769431403, | |
| "grad_norm": 0.6363856153056031, | |
| "learning_rate": 5.343679718060104e-06, | |
| "loss": 0.1943, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08492106199264526, | |
| "step": 5275, | |
| "valid_targets_mean": 3982.6, | |
| "valid_targets_min": 2448 | |
| }, | |
| { | |
| "epoch": 5.507042253521127, | |
| "grad_norm": 0.6898072849840929, | |
| "learning_rate": 5.308344037973672e-06, | |
| "loss": 0.1977, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10971357673406601, | |
| "step": 5280, | |
| "valid_targets_mean": 3853.2, | |
| "valid_targets_min": 2194 | |
| }, | |
| { | |
| "epoch": 5.51225873761085, | |
| "grad_norm": 0.638618982352659, | |
| "learning_rate": 5.2731076906666786e-06, | |
| "loss": 0.1991, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10188505798578262, | |
| "step": 5285, | |
| "valid_targets_mean": 4452.6, | |
| "valid_targets_min": 1931 | |
| }, | |
| { | |
| "epoch": 5.517475221700574, | |
| "grad_norm": 0.6515783171624651, | |
| "learning_rate": 5.237970914378068e-06, | |
| "loss": 0.1959, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10107311606407166, | |
| "step": 5290, | |
| "valid_targets_mean": 5409.0, | |
| "valid_targets_min": 2251 | |
| }, | |
| { | |
| "epoch": 5.522691705790297, | |
| "grad_norm": 0.6373328541182325, | |
| "learning_rate": 5.202933946673532e-06, | |
| "loss": 0.1838, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10527370125055313, | |
| "step": 5295, | |
| "valid_targets_mean": 5085.1, | |
| "valid_targets_min": 3257 | |
| }, | |
| { | |
| "epoch": 5.527908189880021, | |
| "grad_norm": 0.7152250120708262, | |
| "learning_rate": 5.1679970244439695e-06, | |
| "loss": 0.1894, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08541359007358551, | |
| "step": 5300, | |
| "valid_targets_mean": 3407.0, | |
| "valid_targets_min": 1873 | |
| }, | |
| { | |
| "epoch": 5.533124673969745, | |
| "grad_norm": 0.6575032945462801, | |
| "learning_rate": 5.13316038390383e-06, | |
| "loss": 0.1883, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.088060662150383, | |
| "step": 5305, | |
| "valid_targets_mean": 3681.6, | |
| "valid_targets_min": 1614 | |
| }, | |
| { | |
| "epoch": 5.538341158059468, | |
| "grad_norm": 0.6063571552993661, | |
| "learning_rate": 5.098424260589565e-06, | |
| "loss": 0.1837, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10392385721206665, | |
| "step": 5310, | |
| "valid_targets_mean": 5232.4, | |
| "valid_targets_min": 2532 | |
| }, | |
| { | |
| "epoch": 5.5435576421491914, | |
| "grad_norm": 0.6523385628569274, | |
| "learning_rate": 5.063788889357995e-06, | |
| "loss": 0.1829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08824765682220459, | |
| "step": 5315, | |
| "valid_targets_mean": 3577.1, | |
| "valid_targets_min": 2054 | |
| }, | |
| { | |
| "epoch": 5.548774126238915, | |
| "grad_norm": 0.7201971592132397, | |
| "learning_rate": 5.029254504384733e-06, | |
| "loss": 0.1861, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09777437150478363, | |
| "step": 5320, | |
| "valid_targets_mean": 3497.2, | |
| "valid_targets_min": 1333 | |
| }, | |
| { | |
| "epoch": 5.553990610328638, | |
| "grad_norm": 0.6988915686907854, | |
| "learning_rate": 4.9948213391626325e-06, | |
| "loss": 0.1839, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07731719315052032, | |
| "step": 5325, | |
| "valid_targets_mean": 2877.4, | |
| "valid_targets_min": 1515 | |
| }, | |
| { | |
| "epoch": 5.559207094418362, | |
| "grad_norm": 0.7246581917112689, | |
| "learning_rate": 4.960489626500153e-06, | |
| "loss": 0.2009, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10949697345495224, | |
| "step": 5330, | |
| "valid_targets_mean": 2994.8, | |
| "valid_targets_min": 1128 | |
| }, | |
| { | |
| "epoch": 5.5644235785080856, | |
| "grad_norm": 0.709025000059326, | |
| "learning_rate": 4.926259598519851e-06, | |
| "loss": 0.1991, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08539760857820511, | |
| "step": 5335, | |
| "valid_targets_mean": 2844.6, | |
| "valid_targets_min": 1123 | |
| }, | |
| { | |
| "epoch": 5.569640062597809, | |
| "grad_norm": 0.7332051491895499, | |
| "learning_rate": 4.892131486656733e-06, | |
| "loss": 0.1865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10768003016710281, | |
| "step": 5340, | |
| "valid_targets_mean": 3376.0, | |
| "valid_targets_min": 1975 | |
| }, | |
| { | |
| "epoch": 5.574856546687533, | |
| "grad_norm": 0.6394050586873373, | |
| "learning_rate": 4.858105521656768e-06, | |
| "loss": 0.1808, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09275992214679718, | |
| "step": 5345, | |
| "valid_targets_mean": 4036.9, | |
| "valid_targets_min": 1071 | |
| }, | |
| { | |
| "epoch": 5.580073030777256, | |
| "grad_norm": 0.7062084748383708, | |
| "learning_rate": 4.824181933575272e-06, | |
| "loss": 0.1923, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1020720824599266, | |
| "step": 5350, | |
| "valid_targets_mean": 3313.1, | |
| "valid_targets_min": 1882 | |
| }, | |
| { | |
| "epoch": 5.58528951486698, | |
| "grad_norm": 0.6694129291457096, | |
| "learning_rate": 4.790360951775392e-06, | |
| "loss": 0.1853, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09757111966609955, | |
| "step": 5355, | |
| "valid_targets_mean": 3570.2, | |
| "valid_targets_min": 1605 | |
| }, | |
| { | |
| "epoch": 5.590505998956703, | |
| "grad_norm": 0.7502691073253468, | |
| "learning_rate": 4.756642804926517e-06, | |
| "loss": 0.1972, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07921934127807617, | |
| "step": 5360, | |
| "valid_targets_mean": 2624.1, | |
| "valid_targets_min": 815 | |
| }, | |
| { | |
| "epoch": 5.595722483046426, | |
| "grad_norm": 0.6763839272827359, | |
| "learning_rate": 4.7230277210027685e-06, | |
| "loss": 0.198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09793854504823685, | |
| "step": 5365, | |
| "valid_targets_mean": 3692.8, | |
| "valid_targets_min": 1878 | |
| }, | |
| { | |
| "epoch": 5.60093896713615, | |
| "grad_norm": 0.6534377098219409, | |
| "learning_rate": 4.689515927281427e-06, | |
| "loss": 0.1899, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09733390063047409, | |
| "step": 5370, | |
| "valid_targets_mean": 4223.1, | |
| "valid_targets_min": 929 | |
| }, | |
| { | |
| "epoch": 5.606155451225874, | |
| "grad_norm": 0.6598928238848826, | |
| "learning_rate": 4.6561076503414235e-06, | |
| "loss": 0.2003, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11475412547588348, | |
| "step": 5375, | |
| "valid_targets_mean": 4445.0, | |
| "valid_targets_min": 2695 | |
| }, | |
| { | |
| "epoch": 5.6113719353155975, | |
| "grad_norm": 0.6262577611448652, | |
| "learning_rate": 4.622803116061789e-06, | |
| "loss": 0.1887, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10232369601726532, | |
| "step": 5380, | |
| "valid_targets_mean": 7016.8, | |
| "valid_targets_min": 5239 | |
| }, | |
| { | |
| "epoch": 5.616588419405321, | |
| "grad_norm": 0.5336658158405487, | |
| "learning_rate": 4.589602549620127e-06, | |
| "loss": 0.1551, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06712451577186584, | |
| "step": 5385, | |
| "valid_targets_mean": 5891.2, | |
| "valid_targets_min": 3108 | |
| }, | |
| { | |
| "epoch": 5.621804903495044, | |
| "grad_norm": 0.48213975341203585, | |
| "learning_rate": 4.556506175491097e-06, | |
| "loss": 0.1497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06671963632106781, | |
| "step": 5390, | |
| "valid_targets_mean": 6355.6, | |
| "valid_targets_min": 4660 | |
| }, | |
| { | |
| "epoch": 5.627021387584768, | |
| "grad_norm": 0.4401148301444298, | |
| "learning_rate": 4.523514217444918e-06, | |
| "loss": 0.1453, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06751792132854462, | |
| "step": 5395, | |
| "valid_targets_mean": 5890.5, | |
| "valid_targets_min": 3138 | |
| }, | |
| { | |
| "epoch": 5.632237871674492, | |
| "grad_norm": 0.42601127196588723, | |
| "learning_rate": 4.490626898545805e-06, | |
| "loss": 0.1418, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08127633482217789, | |
| "step": 5400, | |
| "valid_targets_mean": 7187.2, | |
| "valid_targets_min": 4893 | |
| }, | |
| { | |
| "epoch": 5.6374543557642145, | |
| "grad_norm": 0.49606006793832375, | |
| "learning_rate": 4.4578444411505005e-06, | |
| "loss": 0.1598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07878682017326355, | |
| "step": 5405, | |
| "valid_targets_mean": 5556.0, | |
| "valid_targets_min": 4370 | |
| }, | |
| { | |
| "epoch": 5.642670839853938, | |
| "grad_norm": 0.495011995734162, | |
| "learning_rate": 4.425167066906777e-06, | |
| "loss": 0.1525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08289466053247452, | |
| "step": 5410, | |
| "valid_targets_mean": 5570.5, | |
| "valid_targets_min": 4576 | |
| }, | |
| { | |
| "epoch": 5.647887323943662, | |
| "grad_norm": 0.4384676262674698, | |
| "learning_rate": 4.392594996751891e-06, | |
| "loss": 0.1434, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07158589363098145, | |
| "step": 5415, | |
| "valid_targets_mean": 5741.8, | |
| "valid_targets_min": 4542 | |
| }, | |
| { | |
| "epoch": 5.653103808033386, | |
| "grad_norm": 0.6783238064529955, | |
| "learning_rate": 4.360128450911154e-06, | |
| "loss": 0.1741, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10231122374534607, | |
| "step": 5420, | |
| "valid_targets_mean": 1503.9, | |
| "valid_targets_min": 976 | |
| }, | |
| { | |
| "epoch": 5.6583202921231095, | |
| "grad_norm": 0.4385297274214473, | |
| "learning_rate": 4.3277676488963775e-06, | |
| "loss": 0.1438, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0828162282705307, | |
| "step": 5425, | |
| "valid_targets_mean": 6904.1, | |
| "valid_targets_min": 5520 | |
| }, | |
| { | |
| "epoch": 5.663536776212832, | |
| "grad_norm": 0.5140202585174065, | |
| "learning_rate": 4.295512809504447e-06, | |
| "loss": 0.1455, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07422012835741043, | |
| "step": 5430, | |
| "valid_targets_mean": 6056.1, | |
| "valid_targets_min": 4291 | |
| }, | |
| { | |
| "epoch": 5.668753260302556, | |
| "grad_norm": 0.4403055906663912, | |
| "learning_rate": 4.263364150815803e-06, | |
| "loss": 0.1548, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07317037880420685, | |
| "step": 5435, | |
| "valid_targets_mean": 6861.9, | |
| "valid_targets_min": 4993 | |
| }, | |
| { | |
| "epoch": 5.67396974439228, | |
| "grad_norm": 0.4765820975893386, | |
| "learning_rate": 4.231321890192981e-06, | |
| "loss": 0.1472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08205021172761917, | |
| "step": 5440, | |
| "valid_targets_mean": 5696.2, | |
| "valid_targets_min": 4268 | |
| }, | |
| { | |
| "epoch": 5.679186228482003, | |
| "grad_norm": 0.4404530717596097, | |
| "learning_rate": 4.19938624427914e-06, | |
| "loss": 0.1383, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0685369223356247, | |
| "step": 5445, | |
| "valid_targets_mean": 6006.2, | |
| "valid_targets_min": 4627 | |
| }, | |
| { | |
| "epoch": 5.6844027125717265, | |
| "grad_norm": 0.48698584459652244, | |
| "learning_rate": 4.167557428996611e-06, | |
| "loss": 0.1388, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0856742411851883, | |
| "step": 5450, | |
| "valid_targets_mean": 5225.6, | |
| "valid_targets_min": 4617 | |
| }, | |
| { | |
| "epoch": 5.68961919666145, | |
| "grad_norm": 0.5195839820372045, | |
| "learning_rate": 4.135835659545406e-06, | |
| "loss": 0.1299, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07734028995037079, | |
| "step": 5455, | |
| "valid_targets_mean": 7617.8, | |
| "valid_targets_min": 5121 | |
| }, | |
| { | |
| "epoch": 5.694835680751174, | |
| "grad_norm": 0.41962386163586246, | |
| "learning_rate": 4.104221150401806e-06, | |
| "loss": 0.1466, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.055547550320625305, | |
| "step": 5460, | |
| "valid_targets_mean": 5419.1, | |
| "valid_targets_min": 3873 | |
| }, | |
| { | |
| "epoch": 5.700052164840898, | |
| "grad_norm": 0.4308314083250281, | |
| "learning_rate": 4.072714115316863e-06, | |
| "loss": 0.148, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05942081660032272, | |
| "step": 5465, | |
| "valid_targets_mean": 5971.9, | |
| "valid_targets_min": 3898 | |
| }, | |
| { | |
| "epoch": 5.705268648930621, | |
| "grad_norm": 0.43749224269193804, | |
| "learning_rate": 4.041314767314983e-06, | |
| "loss": 0.1282, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.053951337933540344, | |
| "step": 5470, | |
| "valid_targets_mean": 5725.6, | |
| "valid_targets_min": 4480 | |
| }, | |
| { | |
| "epoch": 5.710485133020344, | |
| "grad_norm": 0.40069356064269673, | |
| "learning_rate": 4.010023318692502e-06, | |
| "loss": 0.1326, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06682620942592621, | |
| "step": 5475, | |
| "valid_targets_mean": 7431.5, | |
| "valid_targets_min": 6180 | |
| }, | |
| { | |
| "epoch": 5.715701617110068, | |
| "grad_norm": 0.5280423415439809, | |
| "learning_rate": 3.978839981016203e-06, | |
| "loss": 0.1528, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10533119738101959, | |
| "step": 5480, | |
| "valid_targets_mean": 6257.8, | |
| "valid_targets_min": 5397 | |
| }, | |
| { | |
| "epoch": 5.720918101199791, | |
| "grad_norm": 0.4430928980394338, | |
| "learning_rate": 3.947764965121934e-06, | |
| "loss": 0.157, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06880559027194977, | |
| "step": 5485, | |
| "valid_targets_mean": 5946.0, | |
| "valid_targets_min": 4549 | |
| }, | |
| { | |
| "epoch": 5.726134585289515, | |
| "grad_norm": 0.44159154039757703, | |
| "learning_rate": 3.916798481113144e-06, | |
| "loss": 0.1367, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08504307270050049, | |
| "step": 5490, | |
| "valid_targets_mean": 6416.8, | |
| "valid_targets_min": 4840 | |
| }, | |
| { | |
| "epoch": 5.731351069379238, | |
| "grad_norm": 0.46392605908334117, | |
| "learning_rate": 3.885940738359492e-06, | |
| "loss": 0.1417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0839645266532898, | |
| "step": 5495, | |
| "valid_targets_mean": 6426.0, | |
| "valid_targets_min": 5225 | |
| }, | |
| { | |
| "epoch": 5.736567553468962, | |
| "grad_norm": 0.5003470076519811, | |
| "learning_rate": 3.855191945495405e-06, | |
| "loss": 0.1567, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08337022364139557, | |
| "step": 5500, | |
| "valid_targets_mean": 5544.6, | |
| "valid_targets_min": 3296 | |
| }, | |
| { | |
| "epoch": 5.741784037558686, | |
| "grad_norm": 0.46791204697564037, | |
| "learning_rate": 3.824552310418703e-06, | |
| "loss": 0.1728, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07997798174619675, | |
| "step": 5505, | |
| "valid_targets_mean": 6941.2, | |
| "valid_targets_min": 5464 | |
| }, | |
| { | |
| "epoch": 5.747000521648409, | |
| "grad_norm": 0.4857777352502609, | |
| "learning_rate": 3.794022040289147e-06, | |
| "loss": 0.166, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07363554835319519, | |
| "step": 5510, | |
| "valid_targets_mean": 6252.2, | |
| "valid_targets_min": 4740 | |
| }, | |
| { | |
| "epoch": 5.7522170057381325, | |
| "grad_norm": 0.42847598212699706, | |
| "learning_rate": 3.763601341527088e-06, | |
| "loss": 0.1471, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07224708050489426, | |
| "step": 5515, | |
| "valid_targets_mean": 6288.8, | |
| "valid_targets_min": 4913 | |
| }, | |
| { | |
| "epoch": 5.757433489827856, | |
| "grad_norm": 0.42951261112572486, | |
| "learning_rate": 3.733290419812019e-06, | |
| "loss": 0.1476, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07177867740392685, | |
| "step": 5520, | |
| "valid_targets_mean": 6687.2, | |
| "valid_targets_min": 4751 | |
| }, | |
| { | |
| "epoch": 5.762649973917579, | |
| "grad_norm": 0.4556621998482358, | |
| "learning_rate": 3.7030894800812365e-06, | |
| "loss": 0.1506, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08475948870182037, | |
| "step": 5525, | |
| "valid_targets_mean": 6800.4, | |
| "valid_targets_min": 4900 | |
| }, | |
| { | |
| "epoch": 5.767866458007303, | |
| "grad_norm": 0.42444634484814975, | |
| "learning_rate": 3.672998726528414e-06, | |
| "loss": 0.1502, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07881123572587967, | |
| "step": 5530, | |
| "valid_targets_mean": 6907.8, | |
| "valid_targets_min": 4850 | |
| }, | |
| { | |
| "epoch": 5.773082942097027, | |
| "grad_norm": 0.4634334543155377, | |
| "learning_rate": 3.6430183626022574e-06, | |
| "loss": 0.1487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07411497831344604, | |
| "step": 5535, | |
| "valid_targets_mean": 5823.6, | |
| "valid_targets_min": 2910 | |
| }, | |
| { | |
| "epoch": 5.77829942618675, | |
| "grad_norm": 0.4334170773619176, | |
| "learning_rate": 3.613148591005071e-06, | |
| "loss": 0.1547, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06874310970306396, | |
| "step": 5540, | |
| "valid_targets_mean": 6160.4, | |
| "valid_targets_min": 4196 | |
| }, | |
| { | |
| "epoch": 5.783515910276473, | |
| "grad_norm": 0.5513881573364133, | |
| "learning_rate": 3.5833896136914705e-06, | |
| "loss": 0.1499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07393354922533035, | |
| "step": 5545, | |
| "valid_targets_mean": 5796.0, | |
| "valid_targets_min": 3865 | |
| }, | |
| { | |
| "epoch": 5.788732394366197, | |
| "grad_norm": 0.3897950432544831, | |
| "learning_rate": 3.553741631866938e-06, | |
| "loss": 0.138, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06556838750839233, | |
| "step": 5550, | |
| "valid_targets_mean": 7570.9, | |
| "valid_targets_min": 3627 | |
| }, | |
| { | |
| "epoch": 5.793948878455921, | |
| "grad_norm": 0.38936665955031047, | |
| "learning_rate": 3.524204845986523e-06, | |
| "loss": 0.1285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06014685705304146, | |
| "step": 5555, | |
| "valid_targets_mean": 6602.2, | |
| "valid_targets_min": 3533 | |
| }, | |
| { | |
| "epoch": 5.7991653625456445, | |
| "grad_norm": 0.48597819814209287, | |
| "learning_rate": 3.494779455753443e-06, | |
| "loss": 0.1555, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08201763033866882, | |
| "step": 5560, | |
| "valid_targets_mean": 6297.9, | |
| "valid_targets_min": 3502 | |
| }, | |
| { | |
| "epoch": 5.804381846635367, | |
| "grad_norm": 0.4905451745363948, | |
| "learning_rate": 3.4654656601177482e-06, | |
| "loss": 0.1544, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07704399526119232, | |
| "step": 5565, | |
| "valid_targets_mean": 5275.9, | |
| "valid_targets_min": 3249 | |
| }, | |
| { | |
| "epoch": 5.809598330725091, | |
| "grad_norm": 0.4174409704626744, | |
| "learning_rate": 3.4362636572749984e-06, | |
| "loss": 0.1452, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06095777451992035, | |
| "step": 5570, | |
| "valid_targets_mean": 5957.2, | |
| "valid_targets_min": 4283 | |
| }, | |
| { | |
| "epoch": 5.814814814814815, | |
| "grad_norm": 0.4305717305727701, | |
| "learning_rate": 3.4071736446648805e-06, | |
| "loss": 0.1459, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07813297212123871, | |
| "step": 5575, | |
| "valid_targets_mean": 6440.6, | |
| "valid_targets_min": 4537 | |
| }, | |
| { | |
| "epoch": 5.820031298904539, | |
| "grad_norm": 0.4012997073107626, | |
| "learning_rate": 3.3781958189699183e-06, | |
| "loss": 0.1351, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07044396549463272, | |
| "step": 5580, | |
| "valid_targets_mean": 6846.6, | |
| "valid_targets_min": 4858 | |
| }, | |
| { | |
| "epoch": 5.8252477829942615, | |
| "grad_norm": 0.4377532872476081, | |
| "learning_rate": 3.3493303761141016e-06, | |
| "loss": 0.1472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06278366595506668, | |
| "step": 5585, | |
| "valid_targets_mean": 5634.8, | |
| "valid_targets_min": 4570 | |
| }, | |
| { | |
| "epoch": 5.830464267083985, | |
| "grad_norm": 0.4378389770915067, | |
| "learning_rate": 3.320577511261589e-06, | |
| "loss": 0.1527, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.069257952272892, | |
| "step": 5590, | |
| "valid_targets_mean": 5889.1, | |
| "valid_targets_min": 3082 | |
| }, | |
| { | |
| "epoch": 5.835680751173709, | |
| "grad_norm": 0.43196780823782605, | |
| "learning_rate": 3.291937418815376e-06, | |
| "loss": 0.1396, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06850574910640717, | |
| "step": 5595, | |
| "valid_targets_mean": 6502.8, | |
| "valid_targets_min": 5362 | |
| }, | |
| { | |
| "epoch": 5.840897235263433, | |
| "grad_norm": 0.5059571064157411, | |
| "learning_rate": 3.2634102924159982e-06, | |
| "loss": 0.1561, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0819023847579956, | |
| "step": 5600, | |
| "valid_targets_mean": 6521.1, | |
| "valid_targets_min": 4056 | |
| }, | |
| { | |
| "epoch": 5.846113719353156, | |
| "grad_norm": 0.47165929680146707, | |
| "learning_rate": 3.2349963249401894e-06, | |
| "loss": 0.1393, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06192941963672638, | |
| "step": 5605, | |
| "valid_targets_mean": 5720.8, | |
| "valid_targets_min": 4499 | |
| }, | |
| { | |
| "epoch": 5.851330203442879, | |
| "grad_norm": 0.455280547578607, | |
| "learning_rate": 3.2066957084996163e-06, | |
| "loss": 0.125, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05737937241792679, | |
| "step": 5610, | |
| "valid_targets_mean": 6249.8, | |
| "valid_targets_min": 5268 | |
| }, | |
| { | |
| "epoch": 5.856546687532603, | |
| "grad_norm": 0.4543115326007631, | |
| "learning_rate": 3.178508634439539e-06, | |
| "loss": 0.1279, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07235752046108246, | |
| "step": 5615, | |
| "valid_targets_mean": 6830.9, | |
| "valid_targets_min": 5851 | |
| }, | |
| { | |
| "epoch": 5.861763171622327, | |
| "grad_norm": 0.43385277610994094, | |
| "learning_rate": 3.150435293337557e-06, | |
| "loss": 0.1304, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.061638787388801575, | |
| "step": 5620, | |
| "valid_targets_mean": 5873.2, | |
| "valid_targets_min": 5106 | |
| }, | |
| { | |
| "epoch": 5.86697965571205, | |
| "grad_norm": 0.45570415211766824, | |
| "learning_rate": 3.1224758750022934e-06, | |
| "loss": 0.1452, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08445452153682709, | |
| "step": 5625, | |
| "valid_targets_mean": 6956.4, | |
| "valid_targets_min": 5383 | |
| }, | |
| { | |
| "epoch": 5.8721961398017735, | |
| "grad_norm": 0.5334124816402946, | |
| "learning_rate": 3.0946305684721145e-06, | |
| "loss": 0.1646, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06093604117631912, | |
| "step": 5630, | |
| "valid_targets_mean": 2994.9, | |
| "valid_targets_min": 1884 | |
| }, | |
| { | |
| "epoch": 5.877412623891497, | |
| "grad_norm": 0.4580023889822626, | |
| "learning_rate": 3.066899562013872e-06, | |
| "loss": 0.1656, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06675048172473907, | |
| "step": 5635, | |
| "valid_targets_mean": 5446.6, | |
| "valid_targets_min": 3598 | |
| }, | |
| { | |
| "epoch": 5.882629107981221, | |
| "grad_norm": 0.45002717360730904, | |
| "learning_rate": 3.0392830431216037e-06, | |
| "loss": 0.1393, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06392291188240051, | |
| "step": 5640, | |
| "valid_targets_mean": 5429.4, | |
| "valid_targets_min": 3689 | |
| }, | |
| { | |
| "epoch": 5.887845592070944, | |
| "grad_norm": 0.48670249970884333, | |
| "learning_rate": 3.01178119851528e-06, | |
| "loss": 0.1478, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07764822244644165, | |
| "step": 5645, | |
| "valid_targets_mean": 7193.6, | |
| "valid_targets_min": 5487 | |
| }, | |
| { | |
| "epoch": 5.893062076160668, | |
| "grad_norm": 0.4850649856074665, | |
| "learning_rate": 2.9843942141395365e-06, | |
| "loss": 0.15, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07818207889795303, | |
| "step": 5650, | |
| "valid_targets_mean": 5895.0, | |
| "valid_targets_min": 4891 | |
| }, | |
| { | |
| "epoch": 5.898278560250391, | |
| "grad_norm": 0.44927212721316157, | |
| "learning_rate": 2.9571222751624317e-06, | |
| "loss": 0.1593, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07164573669433594, | |
| "step": 5655, | |
| "valid_targets_mean": 6422.0, | |
| "valid_targets_min": 4072 | |
| }, | |
| { | |
| "epoch": 5.903495044340115, | |
| "grad_norm": 0.4470402250731502, | |
| "learning_rate": 2.9299655659741622e-06, | |
| "loss": 0.1482, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07915350794792175, | |
| "step": 5660, | |
| "valid_targets_mean": 6154.8, | |
| "valid_targets_min": 3781 | |
| }, | |
| { | |
| "epoch": 5.908711528429838, | |
| "grad_norm": 0.4895167714500066, | |
| "learning_rate": 2.9029242701858606e-06, | |
| "loss": 0.1546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08059638738632202, | |
| "step": 5665, | |
| "valid_targets_mean": 6498.2, | |
| "valid_targets_min": 4563 | |
| }, | |
| { | |
| "epoch": 5.913928012519562, | |
| "grad_norm": 0.4109708193666919, | |
| "learning_rate": 2.8759985706283068e-06, | |
| "loss": 0.1498, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06883293390274048, | |
| "step": 5670, | |
| "valid_targets_mean": 6525.0, | |
| "valid_targets_min": 4200 | |
| }, | |
| { | |
| "epoch": 5.919144496609285, | |
| "grad_norm": 0.4194927614066242, | |
| "learning_rate": 2.8491886493507313e-06, | |
| "loss": 0.1342, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06732451915740967, | |
| "step": 5675, | |
| "valid_targets_mean": 5802.8, | |
| "valid_targets_min": 4458 | |
| }, | |
| { | |
| "epoch": 5.924360980699009, | |
| "grad_norm": 0.44206171501473734, | |
| "learning_rate": 2.8224946876195593e-06, | |
| "loss": 0.1512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09225204586982727, | |
| "step": 5680, | |
| "valid_targets_mean": 5912.8, | |
| "valid_targets_min": 4081 | |
| }, | |
| { | |
| "epoch": 5.929577464788732, | |
| "grad_norm": 0.4459755743145879, | |
| "learning_rate": 2.795916865917201e-06, | |
| "loss": 0.1883, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0860271081328392, | |
| "step": 5685, | |
| "valid_targets_mean": 6106.0, | |
| "valid_targets_min": 4298 | |
| }, | |
| { | |
| "epoch": 5.934793948878456, | |
| "grad_norm": 0.47463967081828534, | |
| "learning_rate": 2.7694553639408163e-06, | |
| "loss": 0.137, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08704885840415955, | |
| "step": 5690, | |
| "valid_targets_mean": 6761.4, | |
| "valid_targets_min": 4844 | |
| }, | |
| { | |
| "epoch": 5.9400104329681795, | |
| "grad_norm": 0.49617648867747827, | |
| "learning_rate": 2.7431103606011113e-06, | |
| "loss": 0.149, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08426280319690704, | |
| "step": 5695, | |
| "valid_targets_mean": 6613.2, | |
| "valid_targets_min": 4212 | |
| }, | |
| { | |
| "epoch": 5.945226917057903, | |
| "grad_norm": 0.45876292383981526, | |
| "learning_rate": 2.71688203402112e-06, | |
| "loss": 0.1522, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07211989909410477, | |
| "step": 5700, | |
| "valid_targets_mean": 6438.2, | |
| "valid_targets_min": 3762 | |
| }, | |
| { | |
| "epoch": 5.950443401147626, | |
| "grad_norm": 0.44038076169846796, | |
| "learning_rate": 2.690770561535019e-06, | |
| "loss": 0.131, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06130621209740639, | |
| "step": 5705, | |
| "valid_targets_mean": 6420.6, | |
| "valid_targets_min": 4726 | |
| }, | |
| { | |
| "epoch": 5.95565988523735, | |
| "grad_norm": 0.46992442462684847, | |
| "learning_rate": 2.664776119686896e-06, | |
| "loss": 0.1423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08628493547439575, | |
| "step": 5710, | |
| "valid_targets_mean": 6689.5, | |
| "valid_targets_min": 3724 | |
| }, | |
| { | |
| "epoch": 5.960876369327074, | |
| "grad_norm": 0.41705551803909896, | |
| "learning_rate": 2.6388988842295947e-06, | |
| "loss": 0.1279, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0795229896903038, | |
| "step": 5715, | |
| "valid_targets_mean": 7317.8, | |
| "valid_targets_min": 5655 | |
| }, | |
| { | |
| "epoch": 5.966092853416797, | |
| "grad_norm": 0.4973872709884148, | |
| "learning_rate": 2.6131390301234927e-06, | |
| "loss": 0.1483, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06989811360836029, | |
| "step": 5720, | |
| "valid_targets_mean": 6667.6, | |
| "valid_targets_min": 5185 | |
| }, | |
| { | |
| "epoch": 5.97130933750652, | |
| "grad_norm": 0.43655268652331336, | |
| "learning_rate": 2.587496731535326e-06, | |
| "loss": 0.1373, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06090911477804184, | |
| "step": 5725, | |
| "valid_targets_mean": 6522.6, | |
| "valid_targets_min": 4656 | |
| }, | |
| { | |
| "epoch": 5.976525821596244, | |
| "grad_norm": 0.3980610944525635, | |
| "learning_rate": 2.561972161837041e-06, | |
| "loss": 0.1434, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06291040033102036, | |
| "step": 5730, | |
| "valid_targets_mean": 5751.9, | |
| "valid_targets_min": 4920 | |
| }, | |
| { | |
| "epoch": 5.981742305685968, | |
| "grad_norm": 0.38083773200221993, | |
| "learning_rate": 2.536565493604575e-06, | |
| "loss": 0.143, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05380579084157944, | |
| "step": 5735, | |
| "valid_targets_mean": 7976.4, | |
| "valid_targets_min": 5856 | |
| }, | |
| { | |
| "epoch": 5.9869587897756915, | |
| "grad_norm": 0.4657342466380366, | |
| "learning_rate": 2.511276898616737e-06, | |
| "loss": 0.1373, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07087098062038422, | |
| "step": 5740, | |
| "valid_targets_mean": 6129.6, | |
| "valid_targets_min": 3939 | |
| }, | |
| { | |
| "epoch": 5.992175273865414, | |
| "grad_norm": 0.44601794648942017, | |
| "learning_rate": 2.4861065478539925e-06, | |
| "loss": 0.1501, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07668866217136383, | |
| "step": 5745, | |
| "valid_targets_mean": 6461.5, | |
| "valid_targets_min": 5219 | |
| }, | |
| { | |
| "epoch": 5.997391757955138, | |
| "grad_norm": 0.5029989846780429, | |
| "learning_rate": 2.4610546114973666e-06, | |
| "loss": 0.1537, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08470433950424194, | |
| "step": 5750, | |
| "valid_targets_mean": 5153.0, | |
| "valid_targets_min": 2301 | |
| }, | |
| { | |
| "epoch": 6.002086593635889, | |
| "grad_norm": 0.6078283117544341, | |
| "learning_rate": 2.4361212589272488e-06, | |
| "loss": 0.1708, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11601318418979645, | |
| "step": 5755, | |
| "valid_targets_mean": 5843.4, | |
| "valid_targets_min": 3674 | |
| }, | |
| { | |
| "epoch": 6.007303077725613, | |
| "grad_norm": 0.6957747501890312, | |
| "learning_rate": 2.41130665872227e-06, | |
| "loss": 0.2046, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06193386763334274, | |
| "step": 5760, | |
| "valid_targets_mean": 2402.5, | |
| "valid_targets_min": 981 | |
| }, | |
| { | |
| "epoch": 6.012519561815337, | |
| "grad_norm": 0.6335822251138544, | |
| "learning_rate": 2.3866109786581484e-06, | |
| "loss": 0.2114, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10176193714141846, | |
| "step": 5765, | |
| "valid_targets_mean": 4376.8, | |
| "valid_targets_min": 1877 | |
| }, | |
| { | |
| "epoch": 6.01773604590506, | |
| "grad_norm": 0.6300833459718725, | |
| "learning_rate": 2.3620343857065776e-06, | |
| "loss": 0.205, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11292354762554169, | |
| "step": 5770, | |
| "valid_targets_mean": 5255.8, | |
| "valid_targets_min": 2973 | |
| }, | |
| { | |
| "epoch": 6.022952529994783, | |
| "grad_norm": 0.6164960801161613, | |
| "learning_rate": 2.3375770460340654e-06, | |
| "loss": 0.203, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1153225526213646, | |
| "step": 5775, | |
| "valid_targets_mean": 4958.8, | |
| "valid_targets_min": 980 | |
| }, | |
| { | |
| "epoch": 6.028169014084507, | |
| "grad_norm": 0.6217009802905054, | |
| "learning_rate": 2.313239125000841e-06, | |
| "loss": 0.2191, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1079532578587532, | |
| "step": 5780, | |
| "valid_targets_mean": 4641.2, | |
| "valid_targets_min": 1512 | |
| }, | |
| { | |
| "epoch": 6.033385498174231, | |
| "grad_norm": 0.5735626789686626, | |
| "learning_rate": 2.2890207871597192e-06, | |
| "loss": 0.1955, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06880943477153778, | |
| "step": 5785, | |
| "valid_targets_mean": 3139.4, | |
| "valid_targets_min": 1848 | |
| }, | |
| { | |
| "epoch": 6.0386019822639545, | |
| "grad_norm": 0.5365073012025556, | |
| "learning_rate": 2.2649221962549905e-06, | |
| "loss": 0.2066, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08455201983451843, | |
| "step": 5790, | |
| "valid_targets_mean": 4646.2, | |
| "valid_targets_min": 1381 | |
| }, | |
| { | |
| "epoch": 6.043818466353677, | |
| "grad_norm": 0.527044321562882, | |
| "learning_rate": 2.2409435152213123e-06, | |
| "loss": 0.2048, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10714255273342133, | |
| "step": 5795, | |
| "valid_targets_mean": 6490.9, | |
| "valid_targets_min": 3600 | |
| }, | |
| { | |
| "epoch": 6.049034950443401, | |
| "grad_norm": 0.627211425767137, | |
| "learning_rate": 2.217084906182629e-06, | |
| "loss": 0.2188, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09740322828292847, | |
| "step": 5800, | |
| "valid_targets_mean": 4469.8, | |
| "valid_targets_min": 2540 | |
| }, | |
| { | |
| "epoch": 6.054251434533125, | |
| "grad_norm": 0.576511383061421, | |
| "learning_rate": 2.1933465304510394e-06, | |
| "loss": 0.2094, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10105922818183899, | |
| "step": 5805, | |
| "valid_targets_mean": 5648.5, | |
| "valid_targets_min": 1409 | |
| }, | |
| { | |
| "epoch": 6.059467918622849, | |
| "grad_norm": 0.5819654926916293, | |
| "learning_rate": 2.1697285485257245e-06, | |
| "loss": 0.2044, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10021500289440155, | |
| "step": 5810, | |
| "valid_targets_mean": 4748.8, | |
| "valid_targets_min": 1720 | |
| }, | |
| { | |
| "epoch": 6.0646844027125715, | |
| "grad_norm": 1.2112063601809795, | |
| "learning_rate": 2.1462311200918816e-06, | |
| "loss": 0.2095, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1097966730594635, | |
| "step": 5815, | |
| "valid_targets_mean": 4611.0, | |
| "valid_targets_min": 1982 | |
| }, | |
| { | |
| "epoch": 6.069900886802295, | |
| "grad_norm": 0.5962548766722615, | |
| "learning_rate": 2.122854404019601e-06, | |
| "loss": 0.2045, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08937934041023254, | |
| "step": 5820, | |
| "valid_targets_mean": 4092.4, | |
| "valid_targets_min": 1598 | |
| }, | |
| { | |
| "epoch": 6.075117370892019, | |
| "grad_norm": 0.6288447469087993, | |
| "learning_rate": 2.0995985583628366e-06, | |
| "loss": 0.2013, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10005679726600647, | |
| "step": 5825, | |
| "valid_targets_mean": 3817.2, | |
| "valid_targets_min": 1451 | |
| }, | |
| { | |
| "epoch": 6.080333854981743, | |
| "grad_norm": 0.5923974582603837, | |
| "learning_rate": 2.076463740358299e-06, | |
| "loss": 0.1984, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09131435304880142, | |
| "step": 5830, | |
| "valid_targets_mean": 3677.9, | |
| "valid_targets_min": 1861 | |
| }, | |
| { | |
| "epoch": 6.085550339071466, | |
| "grad_norm": 0.6846940474651547, | |
| "learning_rate": 2.053450106424426e-06, | |
| "loss": 0.1961, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10072162747383118, | |
| "step": 5835, | |
| "valid_targets_mean": 3359.1, | |
| "valid_targets_min": 855 | |
| }, | |
| { | |
| "epoch": 6.090766823161189, | |
| "grad_norm": 0.6391089927470843, | |
| "learning_rate": 2.030557812160301e-06, | |
| "loss": 0.1995, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11131231486797333, | |
| "step": 5840, | |
| "valid_targets_mean": 4017.1, | |
| "valid_targets_min": 2724 | |
| }, | |
| { | |
| "epoch": 6.095983307250913, | |
| "grad_norm": 0.7038607663797679, | |
| "learning_rate": 2.0077870123446107e-06, | |
| "loss": 0.2104, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11045323312282562, | |
| "step": 5845, | |
| "valid_targets_mean": 3765.0, | |
| "valid_targets_min": 1362 | |
| }, | |
| { | |
| "epoch": 6.101199791340637, | |
| "grad_norm": 0.6143015908646067, | |
| "learning_rate": 1.985137860934594e-06, | |
| "loss": 0.1991, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07774122804403305, | |
| "step": 5850, | |
| "valid_targets_mean": 3980.6, | |
| "valid_targets_min": 1477 | |
| }, | |
| { | |
| "epoch": 6.10641627543036, | |
| "grad_norm": 0.678777969600946, | |
| "learning_rate": 1.962610511065013e-06, | |
| "loss": 0.2067, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09677927941083908, | |
| "step": 5855, | |
| "valid_targets_mean": 4153.5, | |
| "valid_targets_min": 1020 | |
| }, | |
| { | |
| "epoch": 6.1116327595200834, | |
| "grad_norm": 0.6973630382121379, | |
| "learning_rate": 1.940205115047098e-06, | |
| "loss": 0.1912, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0999145358800888, | |
| "step": 5860, | |
| "valid_targets_mean": 3231.6, | |
| "valid_targets_min": 1915 | |
| }, | |
| { | |
| "epoch": 6.116849243609807, | |
| "grad_norm": 0.6659822459876025, | |
| "learning_rate": 1.917921824367539e-06, | |
| "loss": 0.1874, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08446033298969269, | |
| "step": 5865, | |
| "valid_targets_mean": 3469.8, | |
| "valid_targets_min": 865 | |
| }, | |
| { | |
| "epoch": 6.122065727699531, | |
| "grad_norm": 0.7563516647163375, | |
| "learning_rate": 1.8957607896874419e-06, | |
| "loss": 0.1875, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10787837952375412, | |
| "step": 5870, | |
| "valid_targets_mean": 3280.4, | |
| "valid_targets_min": 1693 | |
| }, | |
| { | |
| "epoch": 6.127282211789254, | |
| "grad_norm": 0.6695427277233599, | |
| "learning_rate": 1.8737221608413314e-06, | |
| "loss": 0.2046, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08913981914520264, | |
| "step": 5875, | |
| "valid_targets_mean": 3160.6, | |
| "valid_targets_min": 2050 | |
| }, | |
| { | |
| "epoch": 6.132498695878978, | |
| "grad_norm": 0.6520039357936902, | |
| "learning_rate": 1.8518060868361099e-06, | |
| "loss": 0.1999, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09661843627691269, | |
| "step": 5880, | |
| "valid_targets_mean": 3768.2, | |
| "valid_targets_min": 1739 | |
| }, | |
| { | |
| "epoch": 6.137715179968701, | |
| "grad_norm": 0.6678150213518621, | |
| "learning_rate": 1.8300127158500714e-06, | |
| "loss": 0.1882, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09485475718975067, | |
| "step": 5885, | |
| "valid_targets_mean": 4639.2, | |
| "valid_targets_min": 1795 | |
| }, | |
| { | |
| "epoch": 6.142931664058425, | |
| "grad_norm": 0.6477438554017697, | |
| "learning_rate": 1.8083421952319047e-06, | |
| "loss": 0.1945, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11559544503688812, | |
| "step": 5890, | |
| "valid_targets_mean": 4694.4, | |
| "valid_targets_min": 1934 | |
| }, | |
| { | |
| "epoch": 6.148148148148148, | |
| "grad_norm": 0.7186023918088242, | |
| "learning_rate": 1.786794671499672e-06, | |
| "loss": 0.1933, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10853292047977448, | |
| "step": 5895, | |
| "valid_targets_mean": 3986.6, | |
| "valid_targets_min": 2557 | |
| }, | |
| { | |
| "epoch": 6.153364632237872, | |
| "grad_norm": 0.7213323411871344, | |
| "learning_rate": 1.7653702903398384e-06, | |
| "loss": 0.1942, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1300193965435028, | |
| "step": 5900, | |
| "valid_targets_mean": 4261.6, | |
| "valid_targets_min": 2663 | |
| }, | |
| { | |
| "epoch": 6.158581116327595, | |
| "grad_norm": 0.6832617681391988, | |
| "learning_rate": 1.7440691966062816e-06, | |
| "loss": 0.1884, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09236248582601547, | |
| "step": 5905, | |
| "valid_targets_mean": 3949.8, | |
| "valid_targets_min": 2016 | |
| }, | |
| { | |
| "epoch": 6.163797600417318, | |
| "grad_norm": 0.7234538410262235, | |
| "learning_rate": 1.722891534319313e-06, | |
| "loss": 0.1941, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08755500614643097, | |
| "step": 5910, | |
| "valid_targets_mean": 2903.9, | |
| "valid_targets_min": 1367 | |
| }, | |
| { | |
| "epoch": 6.169014084507042, | |
| "grad_norm": 0.6746206110165813, | |
| "learning_rate": 1.7018374466646981e-06, | |
| "loss": 0.1968, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09650535136461258, | |
| "step": 5915, | |
| "valid_targets_mean": 3871.6, | |
| "valid_targets_min": 2279 | |
| }, | |
| { | |
| "epoch": 6.174230568596766, | |
| "grad_norm": 0.7000728094632472, | |
| "learning_rate": 1.6809070759927015e-06, | |
| "loss": 0.1887, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08844681829214096, | |
| "step": 5920, | |
| "valid_targets_mean": 3322.5, | |
| "valid_targets_min": 2419 | |
| }, | |
| { | |
| "epoch": 6.1794470526864895, | |
| "grad_norm": 0.7304644017110606, | |
| "learning_rate": 1.6601005638171065e-06, | |
| "loss": 0.1909, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08332537114620209, | |
| "step": 5925, | |
| "valid_targets_mean": 2314.0, | |
| "valid_targets_min": 1283 | |
| }, | |
| { | |
| "epoch": 6.184663536776213, | |
| "grad_norm": 0.7805798041163284, | |
| "learning_rate": 1.639418050814281e-06, | |
| "loss": 0.194, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12057353556156158, | |
| "step": 5930, | |
| "valid_targets_mean": 3027.4, | |
| "valid_targets_min": 1636 | |
| }, | |
| { | |
| "epoch": 6.189880020865936, | |
| "grad_norm": 0.6896086475953697, | |
| "learning_rate": 1.6188596768221976e-06, | |
| "loss": 0.1916, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10906557738780975, | |
| "step": 5935, | |
| "valid_targets_mean": 4359.0, | |
| "valid_targets_min": 1629 | |
| }, | |
| { | |
| "epoch": 6.19509650495566, | |
| "grad_norm": 0.8229920669883567, | |
| "learning_rate": 1.5984255808395198e-06, | |
| "loss": 0.1953, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09602940082550049, | |
| "step": 5940, | |
| "valid_targets_mean": 3364.5, | |
| "valid_targets_min": 2080 | |
| }, | |
| { | |
| "epoch": 6.200312989045384, | |
| "grad_norm": 0.6781561666665922, | |
| "learning_rate": 1.5781159010246306e-06, | |
| "loss": 0.1957, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08884884417057037, | |
| "step": 5945, | |
| "valid_targets_mean": 3020.6, | |
| "valid_targets_min": 1145 | |
| }, | |
| { | |
| "epoch": 6.2055294731351065, | |
| "grad_norm": 0.6767914867939295, | |
| "learning_rate": 1.557930774694727e-06, | |
| "loss": 0.1904, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10639166086912155, | |
| "step": 5950, | |
| "valid_targets_mean": 4282.6, | |
| "valid_targets_min": 1858 | |
| }, | |
| { | |
| "epoch": 6.21074595722483, | |
| "grad_norm": 0.6142469140357641, | |
| "learning_rate": 1.5378703383248694e-06, | |
| "loss": 0.1977, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08721175789833069, | |
| "step": 5955, | |
| "valid_targets_mean": 3524.8, | |
| "valid_targets_min": 1639 | |
| }, | |
| { | |
| "epoch": 6.215962441314554, | |
| "grad_norm": 0.6740361081085235, | |
| "learning_rate": 1.5179347275470812e-06, | |
| "loss": 0.1909, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08677750825881958, | |
| "step": 5960, | |
| "valid_targets_mean": 3797.8, | |
| "valid_targets_min": 1394 | |
| }, | |
| { | |
| "epoch": 6.221178925404278, | |
| "grad_norm": 0.6588422835493394, | |
| "learning_rate": 1.4981240771494032e-06, | |
| "loss": 0.2006, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12048211693763733, | |
| "step": 5965, | |
| "valid_targets_mean": 4671.1, | |
| "valid_targets_min": 1844 | |
| }, | |
| { | |
| "epoch": 6.2263954094940015, | |
| "grad_norm": 0.6808142058321249, | |
| "learning_rate": 1.4784385210750052e-06, | |
| "loss": 0.1879, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07778944075107574, | |
| "step": 5970, | |
| "valid_targets_mean": 3266.2, | |
| "valid_targets_min": 1305 | |
| }, | |
| { | |
| "epoch": 6.231611893583724, | |
| "grad_norm": 0.7175220199097666, | |
| "learning_rate": 1.458878192421278e-06, | |
| "loss": 0.1942, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08991429209709167, | |
| "step": 5975, | |
| "valid_targets_mean": 3436.2, | |
| "valid_targets_min": 1168 | |
| }, | |
| { | |
| "epoch": 6.236828377673448, | |
| "grad_norm": 0.7044190239817425, | |
| "learning_rate": 1.4394432234389167e-06, | |
| "loss": 0.1905, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10944026708602905, | |
| "step": 5980, | |
| "valid_targets_mean": 3810.1, | |
| "valid_targets_min": 1412 | |
| }, | |
| { | |
| "epoch": 6.242044861763172, | |
| "grad_norm": 0.6638673626754885, | |
| "learning_rate": 1.4201337455310537e-06, | |
| "loss": 0.1867, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0686335414648056, | |
| "step": 5985, | |
| "valid_targets_mean": 3273.1, | |
| "valid_targets_min": 1607 | |
| }, | |
| { | |
| "epoch": 6.247261345852895, | |
| "grad_norm": 0.6876570021462683, | |
| "learning_rate": 1.4009498892523388e-06, | |
| "loss": 0.1992, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09525492042303085, | |
| "step": 5990, | |
| "valid_targets_mean": 3412.0, | |
| "valid_targets_min": 1507 | |
| }, | |
| { | |
| "epoch": 6.2524778299426185, | |
| "grad_norm": 0.666161694737711, | |
| "learning_rate": 1.3818917843080848e-06, | |
| "loss": 0.1947, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07782313227653503, | |
| "step": 5995, | |
| "valid_targets_mean": 3766.6, | |
| "valid_targets_min": 1317 | |
| }, | |
| { | |
| "epoch": 6.257694314032342, | |
| "grad_norm": 0.6271491920704946, | |
| "learning_rate": 1.3629595595533673e-06, | |
| "loss": 0.195, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09563714265823364, | |
| "step": 6000, | |
| "valid_targets_mean": 4007.0, | |
| "valid_targets_min": 3218 | |
| }, | |
| { | |
| "epoch": 6.262910798122066, | |
| "grad_norm": 0.6651350804870795, | |
| "learning_rate": 1.3441533429921804e-06, | |
| "loss": 0.1993, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12437031418085098, | |
| "step": 6005, | |
| "valid_targets_mean": 4801.1, | |
| "valid_targets_min": 2999 | |
| }, | |
| { | |
| "epoch": 6.26812728221179, | |
| "grad_norm": 0.6799930171864703, | |
| "learning_rate": 1.3254732617765375e-06, | |
| "loss": 0.1924, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10185380280017853, | |
| "step": 6010, | |
| "valid_targets_mean": 3517.0, | |
| "valid_targets_min": 1739 | |
| }, | |
| { | |
| "epoch": 6.273343766301513, | |
| "grad_norm": 0.6373463560272252, | |
| "learning_rate": 1.3069194422056454e-06, | |
| "loss": 0.1898, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.099636010825634, | |
| "step": 6015, | |
| "valid_targets_mean": 4233.4, | |
| "valid_targets_min": 1140 | |
| }, | |
| { | |
| "epoch": 6.278560250391236, | |
| "grad_norm": 0.6443071620700319, | |
| "learning_rate": 1.2884920097250197e-06, | |
| "loss": 0.1828, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08788240700960159, | |
| "step": 6020, | |
| "valid_targets_mean": 3580.4, | |
| "valid_targets_min": 2272 | |
| }, | |
| { | |
| "epoch": 6.28377673448096, | |
| "grad_norm": 0.7049082867576395, | |
| "learning_rate": 1.2701910889256651e-06, | |
| "loss": 0.1899, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10381342470645905, | |
| "step": 6025, | |
| "valid_targets_mean": 3325.6, | |
| "valid_targets_min": 1183 | |
| }, | |
| { | |
| "epoch": 6.288993218570683, | |
| "grad_norm": 0.6882756765763872, | |
| "learning_rate": 1.2520168035432102e-06, | |
| "loss": 0.1754, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12534037232398987, | |
| "step": 6030, | |
| "valid_targets_mean": 4843.9, | |
| "valid_targets_min": 3041 | |
| }, | |
| { | |
| "epoch": 6.294209702660407, | |
| "grad_norm": 0.6591408675216334, | |
| "learning_rate": 1.2339692764570853e-06, | |
| "loss": 0.1995, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09397697448730469, | |
| "step": 6035, | |
| "valid_targets_mean": 3706.8, | |
| "valid_targets_min": 1726 | |
| }, | |
| { | |
| "epoch": 6.29942618675013, | |
| "grad_norm": 0.5669883417440953, | |
| "learning_rate": 1.2160486296896834e-06, | |
| "loss": 0.192, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1007290631532669, | |
| "step": 6040, | |
| "valid_targets_mean": 5485.0, | |
| "valid_targets_min": 1743 | |
| }, | |
| { | |
| "epoch": 6.304642670839854, | |
| "grad_norm": 0.6058799909136552, | |
| "learning_rate": 1.198254984405538e-06, | |
| "loss": 0.1955, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10285159945487976, | |
| "step": 6045, | |
| "valid_targets_mean": 4971.1, | |
| "valid_targets_min": 2258 | |
| }, | |
| { | |
| "epoch": 6.309859154929577, | |
| "grad_norm": 0.7294749288209422, | |
| "learning_rate": 1.1805884609105012e-06, | |
| "loss": 0.1981, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09438608586788177, | |
| "step": 6050, | |
| "valid_targets_mean": 3890.1, | |
| "valid_targets_min": 1971 | |
| }, | |
| { | |
| "epoch": 6.315075639019301, | |
| "grad_norm": 0.6585485185144788, | |
| "learning_rate": 1.1630491786509457e-06, | |
| "loss": 0.2002, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10699489712715149, | |
| "step": 6055, | |
| "valid_targets_mean": 4203.0, | |
| "valid_targets_min": 1651 | |
| }, | |
| { | |
| "epoch": 6.3202921231090246, | |
| "grad_norm": 0.7001385216107489, | |
| "learning_rate": 1.1456372562129281e-06, | |
| "loss": 0.194, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08782413601875305, | |
| "step": 6060, | |
| "valid_targets_mean": 2842.0, | |
| "valid_targets_min": 1374 | |
| }, | |
| { | |
| "epoch": 6.325508607198748, | |
| "grad_norm": 0.6784128406450317, | |
| "learning_rate": 1.1283528113214114e-06, | |
| "loss": 0.1868, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09216022491455078, | |
| "step": 6065, | |
| "valid_targets_mean": 3439.2, | |
| "valid_targets_min": 1625 | |
| }, | |
| { | |
| "epoch": 6.330725091288471, | |
| "grad_norm": 0.6570087498388871, | |
| "learning_rate": 1.1111959608394662e-06, | |
| "loss": 0.1783, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08298131078481674, | |
| "step": 6070, | |
| "valid_targets_mean": 4143.6, | |
| "valid_targets_min": 997 | |
| }, | |
| { | |
| "epoch": 6.335941575378195, | |
| "grad_norm": 0.6660420880757796, | |
| "learning_rate": 1.094166820767464e-06, | |
| "loss": 0.1821, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08115790784358978, | |
| "step": 6075, | |
| "valid_targets_mean": 3539.4, | |
| "valid_targets_min": 1179 | |
| }, | |
| { | |
| "epoch": 6.341158059467919, | |
| "grad_norm": 0.7010685197964164, | |
| "learning_rate": 1.0772655062423176e-06, | |
| "loss": 0.1808, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10414253175258636, | |
| "step": 6080, | |
| "valid_targets_mean": 3322.1, | |
| "valid_targets_min": 1632 | |
| }, | |
| { | |
| "epoch": 6.346374543557642, | |
| "grad_norm": 0.6815806889768682, | |
| "learning_rate": 1.0604921315366767e-06, | |
| "loss": 0.1735, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0816994234919548, | |
| "step": 6085, | |
| "valid_targets_mean": 3812.1, | |
| "valid_targets_min": 1792 | |
| }, | |
| { | |
| "epoch": 6.351591027647365, | |
| "grad_norm": 0.6394795943241389, | |
| "learning_rate": 1.0438468100581823e-06, | |
| "loss": 0.1738, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10343250632286072, | |
| "step": 6090, | |
| "valid_targets_mean": 4096.8, | |
| "valid_targets_min": 2148 | |
| }, | |
| { | |
| "epoch": 6.356807511737089, | |
| "grad_norm": 0.707735301885829, | |
| "learning_rate": 1.027329654348672e-06, | |
| "loss": 0.1854, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10993263870477676, | |
| "step": 6095, | |
| "valid_targets_mean": 3878.5, | |
| "valid_targets_min": 1818 | |
| }, | |
| { | |
| "epoch": 6.362023995826813, | |
| "grad_norm": 0.6259057268974373, | |
| "learning_rate": 1.01094077608344e-06, | |
| "loss": 0.2026, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08726441860198975, | |
| "step": 6100, | |
| "valid_targets_mean": 4388.8, | |
| "valid_targets_min": 1987 | |
| }, | |
| { | |
| "epoch": 6.3672404799165365, | |
| "grad_norm": 0.8305443435248903, | |
| "learning_rate": 9.946802860704706e-07, | |
| "loss": 0.1851, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1039142906665802, | |
| "step": 6105, | |
| "valid_targets_mean": 3343.8, | |
| "valid_targets_min": 2481 | |
| }, | |
| { | |
| "epoch": 6.372456964006259, | |
| "grad_norm": 0.6896916846152915, | |
| "learning_rate": 9.785482942497037e-07, | |
| "loss": 0.1873, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09098956733942032, | |
| "step": 6110, | |
| "valid_targets_mean": 3440.0, | |
| "valid_targets_min": 2013 | |
| }, | |
| { | |
| "epoch": 6.377673448095983, | |
| "grad_norm": 0.6451912293472071, | |
| "learning_rate": 9.625449096922667e-07, | |
| "loss": 0.1861, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09754312038421631, | |
| "step": 6115, | |
| "valid_targets_mean": 4068.9, | |
| "valid_targets_min": 1860 | |
| }, | |
| { | |
| "epoch": 6.382889932185707, | |
| "grad_norm": 0.6530731229435907, | |
| "learning_rate": 9.466702405997674e-07, | |
| "loss": 0.1894, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09619583934545517, | |
| "step": 6120, | |
| "valid_targets_mean": 4278.2, | |
| "valid_targets_min": 1914 | |
| }, | |
| { | |
| "epoch": 6.388106416275431, | |
| "grad_norm": 0.7191449977323401, | |
| "learning_rate": 9.309243943035295e-07, | |
| "loss": 0.1849, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10144870728254318, | |
| "step": 6125, | |
| "valid_targets_mean": 3908.8, | |
| "valid_targets_min": 3065 | |
| }, | |
| { | |
| "epoch": 6.3933229003651535, | |
| "grad_norm": 0.6578399946548106, | |
| "learning_rate": 9.153074772638937e-07, | |
| "loss": 0.175, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09457392990589142, | |
| "step": 6130, | |
| "valid_targets_mean": 3868.6, | |
| "valid_targets_min": 1109 | |
| }, | |
| { | |
| "epoch": 6.398539384454877, | |
| "grad_norm": 0.6960426017459621, | |
| "learning_rate": 8.99819595069491e-07, | |
| "loss": 0.1916, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0976584255695343, | |
| "step": 6135, | |
| "valid_targets_mean": 3548.1, | |
| "valid_targets_min": 2359 | |
| }, | |
| { | |
| "epoch": 6.403755868544601, | |
| "grad_norm": 0.6845200487581575, | |
| "learning_rate": 8.844608524365172e-07, | |
| "loss": 0.1813, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09304177761077881, | |
| "step": 6140, | |
| "valid_targets_mean": 3140.6, | |
| "valid_targets_min": 1243 | |
| }, | |
| { | |
| "epoch": 6.408972352634325, | |
| "grad_norm": 0.7351698724134363, | |
| "learning_rate": 8.692313532080443e-07, | |
| "loss": 0.1891, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0956580638885498, | |
| "step": 6145, | |
| "valid_targets_mean": 3165.6, | |
| "valid_targets_min": 1426 | |
| }, | |
| { | |
| "epoch": 6.414188836724048, | |
| "grad_norm": 0.7024232040374484, | |
| "learning_rate": 8.54131200353292e-07, | |
| "loss": 0.1963, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08580676466226578, | |
| "step": 6150, | |
| "valid_targets_mean": 3397.1, | |
| "valid_targets_min": 1661 | |
| }, | |
| { | |
| "epoch": 6.419405320813771, | |
| "grad_norm": 0.6521668361049059, | |
| "learning_rate": 8.391604959669619e-07, | |
| "loss": 0.1907, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10645134747028351, | |
| "step": 6155, | |
| "valid_targets_mean": 4858.0, | |
| "valid_targets_min": 1529 | |
| }, | |
| { | |
| "epoch": 6.424621804903495, | |
| "grad_norm": 0.7398931316042223, | |
| "learning_rate": 8.243193412685246e-07, | |
| "loss": 0.2032, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11448778212070465, | |
| "step": 6160, | |
| "valid_targets_mean": 3518.6, | |
| "valid_targets_min": 1450 | |
| }, | |
| { | |
| "epoch": 6.429838288993219, | |
| "grad_norm": 0.7234207275799033, | |
| "learning_rate": 8.096078366015514e-07, | |
| "loss": 0.1866, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09650101512670517, | |
| "step": 6165, | |
| "valid_targets_mean": 3491.0, | |
| "valid_targets_min": 1625 | |
| }, | |
| { | |
| "epoch": 6.435054773082942, | |
| "grad_norm": 0.6507384923215745, | |
| "learning_rate": 7.950260814330169e-07, | |
| "loss": 0.2011, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09962242096662521, | |
| "step": 6170, | |
| "valid_targets_mean": 4356.9, | |
| "valid_targets_min": 1174 | |
| }, | |
| { | |
| "epoch": 6.4402712571726655, | |
| "grad_norm": 0.7266592230415524, | |
| "learning_rate": 7.805741743526441e-07, | |
| "loss": 0.1933, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10199202597141266, | |
| "step": 6175, | |
| "valid_targets_mean": 3630.6, | |
| "valid_targets_min": 2882 | |
| }, | |
| { | |
| "epoch": 6.445487741262389, | |
| "grad_norm": 0.6818119646664422, | |
| "learning_rate": 7.662522130722294e-07, | |
| "loss": 0.1961, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10238732397556305, | |
| "step": 6180, | |
| "valid_targets_mean": 4041.8, | |
| "valid_targets_min": 1670 | |
| }, | |
| { | |
| "epoch": 6.450704225352113, | |
| "grad_norm": 0.6764831169988177, | |
| "learning_rate": 7.520602944249855e-07, | |
| "loss": 0.1907, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07282723486423492, | |
| "step": 6185, | |
| "valid_targets_mean": 2842.4, | |
| "valid_targets_min": 1852 | |
| }, | |
| { | |
| "epoch": 6.455920709441836, | |
| "grad_norm": 0.7450286201819744, | |
| "learning_rate": 7.379985143648815e-07, | |
| "loss": 0.1923, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1116485595703125, | |
| "step": 6190, | |
| "valid_targets_mean": 4498.4, | |
| "valid_targets_min": 1296 | |
| }, | |
| { | |
| "epoch": 6.46113719353156, | |
| "grad_norm": 0.7024689401609936, | |
| "learning_rate": 7.240669679660017e-07, | |
| "loss": 0.1946, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09308739006519318, | |
| "step": 6195, | |
| "valid_targets_mean": 3104.5, | |
| "valid_targets_min": 719 | |
| }, | |
| { | |
| "epoch": 6.466353677621283, | |
| "grad_norm": 0.6810964071692703, | |
| "learning_rate": 7.102657494218879e-07, | |
| "loss": 0.1952, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09859105199575424, | |
| "step": 6200, | |
| "valid_targets_mean": 4356.2, | |
| "valid_targets_min": 3238 | |
| }, | |
| { | |
| "epoch": 6.471570161711007, | |
| "grad_norm": 0.7045298238870286, | |
| "learning_rate": 6.965949520449311e-07, | |
| "loss": 0.18, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07638949155807495, | |
| "step": 6205, | |
| "valid_targets_mean": 3871.8, | |
| "valid_targets_min": 1372 | |
| }, | |
| { | |
| "epoch": 6.47678664580073, | |
| "grad_norm": 0.6699885379375601, | |
| "learning_rate": 6.830546682657035e-07, | |
| "loss": 0.1812, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09469474852085114, | |
| "step": 6210, | |
| "valid_targets_mean": 3531.6, | |
| "valid_targets_min": 2370 | |
| }, | |
| { | |
| "epoch": 6.482003129890454, | |
| "grad_norm": 0.6469122391908388, | |
| "learning_rate": 6.696449896323698e-07, | |
| "loss": 0.1889, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09193412959575653, | |
| "step": 6215, | |
| "valid_targets_mean": 4020.2, | |
| "valid_targets_min": 2453 | |
| }, | |
| { | |
| "epoch": 6.487219613980177, | |
| "grad_norm": 0.7029900408015306, | |
| "learning_rate": 6.563660068100408e-07, | |
| "loss": 0.1826, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09727063775062561, | |
| "step": 6220, | |
| "valid_targets_mean": 3526.0, | |
| "valid_targets_min": 1426 | |
| }, | |
| { | |
| "epoch": 6.492436098069901, | |
| "grad_norm": 0.7131734799923847, | |
| "learning_rate": 6.432178095801678e-07, | |
| "loss": 0.1988, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08982507884502411, | |
| "step": 6225, | |
| "valid_targets_mean": 3186.2, | |
| "valid_targets_min": 1728 | |
| }, | |
| { | |
| "epoch": 6.497652582159624, | |
| "grad_norm": 0.8328488163162495, | |
| "learning_rate": 6.302004868399514e-07, | |
| "loss": 0.1883, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10563303530216217, | |
| "step": 6230, | |
| "valid_targets_mean": 3751.8, | |
| "valid_targets_min": 1941 | |
| }, | |
| { | |
| "epoch": 6.502869066249348, | |
| "grad_norm": 0.6666746203714553, | |
| "learning_rate": 6.173141266017113e-07, | |
| "loss": 0.1865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07839032262563705, | |
| "step": 6235, | |
| "valid_targets_mean": 3928.8, | |
| "valid_targets_min": 852 | |
| }, | |
| { | |
| "epoch": 6.5080855503390715, | |
| "grad_norm": 0.750527425135286, | |
| "learning_rate": 6.045588159923266e-07, | |
| "loss": 0.1967, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12108542025089264, | |
| "step": 6240, | |
| "valid_targets_mean": 2807.4, | |
| "valid_targets_min": 1025 | |
| }, | |
| { | |
| "epoch": 6.513302034428795, | |
| "grad_norm": 0.7132755747222744, | |
| "learning_rate": 5.919346412526117e-07, | |
| "loss": 0.1859, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10507765412330627, | |
| "step": 6245, | |
| "valid_targets_mean": 4664.0, | |
| "valid_targets_min": 1564 | |
| }, | |
| { | |
| "epoch": 6.518518518518518, | |
| "grad_norm": 0.6446197338366578, | |
| "learning_rate": 5.794416877367526e-07, | |
| "loss": 0.1894, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09801037609577179, | |
| "step": 6250, | |
| "valid_targets_mean": 4929.6, | |
| "valid_targets_min": 2762 | |
| }, | |
| { | |
| "epoch": 6.523735002608242, | |
| "grad_norm": 0.6325049462572384, | |
| "learning_rate": 5.670800399117316e-07, | |
| "loss": 0.1733, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08655712008476257, | |
| "step": 6255, | |
| "valid_targets_mean": 3609.4, | |
| "valid_targets_min": 2426 | |
| }, | |
| { | |
| "epoch": 6.528951486697966, | |
| "grad_norm": 0.572375646271786, | |
| "learning_rate": 5.548497813567455e-07, | |
| "loss": 0.1833, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08341727405786514, | |
| "step": 6260, | |
| "valid_targets_mean": 5444.6, | |
| "valid_targets_min": 2678 | |
| }, | |
| { | |
| "epoch": 6.534167970787689, | |
| "grad_norm": 0.6626198522216694, | |
| "learning_rate": 5.427509947626486e-07, | |
| "loss": 0.1849, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08186571300029755, | |
| "step": 6265, | |
| "valid_targets_mean": 3776.9, | |
| "valid_targets_min": 2769 | |
| }, | |
| { | |
| "epoch": 6.539384454877412, | |
| "grad_norm": 0.7037970947842952, | |
| "learning_rate": 5.307837619313949e-07, | |
| "loss": 0.1764, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09408968687057495, | |
| "step": 6270, | |
| "valid_targets_mean": 3707.6, | |
| "valid_targets_min": 993 | |
| }, | |
| { | |
| "epoch": 6.544600938967136, | |
| "grad_norm": 0.7360642529828448, | |
| "learning_rate": 5.189481637754679e-07, | |
| "loss": 0.1765, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09180660545825958, | |
| "step": 6275, | |
| "valid_targets_mean": 3851.9, | |
| "valid_targets_min": 1672 | |
| }, | |
| { | |
| "epoch": 6.54981742305686, | |
| "grad_norm": 0.6635385456966342, | |
| "learning_rate": 5.072442803173649e-07, | |
| "loss": 0.1756, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07385655492544174, | |
| "step": 6280, | |
| "valid_targets_mean": 3157.9, | |
| "valid_targets_min": 1243 | |
| }, | |
| { | |
| "epoch": 6.5550339071465835, | |
| "grad_norm": 0.6233750477365968, | |
| "learning_rate": 4.956721906890228e-07, | |
| "loss": 0.1801, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08773411810398102, | |
| "step": 6285, | |
| "valid_targets_mean": 4257.1, | |
| "valid_targets_min": 2637 | |
| }, | |
| { | |
| "epoch": 6.560250391236306, | |
| "grad_norm": 0.6738790652231611, | |
| "learning_rate": 4.842319731313016e-07, | |
| "loss": 0.1955, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08718082308769226, | |
| "step": 6290, | |
| "valid_targets_mean": 3795.2, | |
| "valid_targets_min": 1465 | |
| }, | |
| { | |
| "epoch": 6.56546687532603, | |
| "grad_norm": 0.70413177478708, | |
| "learning_rate": 4.729237049934621e-07, | |
| "loss": 0.1894, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08500716835260391, | |
| "step": 6295, | |
| "valid_targets_mean": 3117.1, | |
| "valid_targets_min": 1583 | |
| }, | |
| { | |
| "epoch": 6.570683359415754, | |
| "grad_norm": 0.7379139115105866, | |
| "learning_rate": 4.6174746273261793e-07, | |
| "loss": 0.1818, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09032528847455978, | |
| "step": 6300, | |
| "valid_targets_mean": 2977.4, | |
| "valid_targets_min": 1621 | |
| }, | |
| { | |
| "epoch": 6.575899843505478, | |
| "grad_norm": 0.6985531803090992, | |
| "learning_rate": 4.507033219132395e-07, | |
| "loss": 0.1744, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09670068323612213, | |
| "step": 6305, | |
| "valid_targets_mean": 3406.8, | |
| "valid_targets_min": 2156 | |
| }, | |
| { | |
| "epoch": 6.5811163275952005, | |
| "grad_norm": 0.7348821996051618, | |
| "learning_rate": 4.3979135720664035e-07, | |
| "loss": 0.186, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09527929872274399, | |
| "step": 6310, | |
| "valid_targets_mean": 4144.6, | |
| "valid_targets_min": 2756 | |
| }, | |
| { | |
| "epoch": 6.586332811684924, | |
| "grad_norm": 0.6600406000835288, | |
| "learning_rate": 4.2901164239046443e-07, | |
| "loss": 0.1808, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10472133010625839, | |
| "step": 6315, | |
| "valid_targets_mean": 3816.6, | |
| "valid_targets_min": 1776 | |
| }, | |
| { | |
| "epoch": 6.591549295774648, | |
| "grad_norm": 0.712562273543921, | |
| "learning_rate": 4.1836425034819106e-07, | |
| "loss": 0.1934, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13425195217132568, | |
| "step": 6320, | |
| "valid_targets_mean": 4737.0, | |
| "valid_targets_min": 2526 | |
| }, | |
| { | |
| "epoch": 6.596765779864372, | |
| "grad_norm": 0.6904084955533912, | |
| "learning_rate": 4.078492530686506e-07, | |
| "loss": 0.1845, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09922734647989273, | |
| "step": 6325, | |
| "valid_targets_mean": 3746.9, | |
| "valid_targets_min": 2409 | |
| }, | |
| { | |
| "epoch": 6.601982263954095, | |
| "grad_norm": 0.6484257966174485, | |
| "learning_rate": 3.9746672164551856e-07, | |
| "loss": 0.1875, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07884103059768677, | |
| "step": 6330, | |
| "valid_targets_mean": 3389.6, | |
| "valid_targets_min": 1931 | |
| }, | |
| { | |
| "epoch": 6.607198748043818, | |
| "grad_norm": 0.720749284172451, | |
| "learning_rate": 3.872167262768578e-07, | |
| "loss": 0.1951, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10760441422462463, | |
| "step": 6335, | |
| "valid_targets_mean": 4183.8, | |
| "valid_targets_min": 2179 | |
| }, | |
| { | |
| "epoch": 6.612415232133542, | |
| "grad_norm": 0.6204313910535714, | |
| "learning_rate": 3.7709933626461916e-07, | |
| "loss": 0.175, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07175396382808685, | |
| "step": 6340, | |
| "valid_targets_mean": 6285.2, | |
| "valid_targets_min": 4772 | |
| }, | |
| { | |
| "epoch": 6.617631716223266, | |
| "grad_norm": 0.6097348603434453, | |
| "learning_rate": 3.671146200142017e-07, | |
| "loss": 0.1493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06819523870944977, | |
| "step": 6345, | |
| "valid_targets_mean": 6168.4, | |
| "valid_targets_min": 4703 | |
| }, | |
| { | |
| "epoch": 6.622848200312989, | |
| "grad_norm": 0.6020208458182181, | |
| "learning_rate": 3.5726264503396e-07, | |
| "loss": 0.1453, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0590323805809021, | |
| "step": 6350, | |
| "valid_targets_mean": 5867.9, | |
| "valid_targets_min": 4896 | |
| }, | |
| { | |
| "epoch": 6.6280646844027125, | |
| "grad_norm": 0.5449319131610738, | |
| "learning_rate": 3.475434779347731e-07, | |
| "loss": 0.1431, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0723414272069931, | |
| "step": 6355, | |
| "valid_targets_mean": 6087.8, | |
| "valid_targets_min": 3783 | |
| }, | |
| { | |
| "epoch": 6.633281168492436, | |
| "grad_norm": 0.5571232964446577, | |
| "learning_rate": 3.3795718442957593e-07, | |
| "loss": 0.1433, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06882646679878235, | |
| "step": 6360, | |
| "valid_targets_mean": 6401.1, | |
| "valid_targets_min": 5374 | |
| }, | |
| { | |
| "epoch": 6.63849765258216, | |
| "grad_norm": 0.5275527380414031, | |
| "learning_rate": 3.2850382933292903e-07, | |
| "loss": 0.1569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06789778172969818, | |
| "step": 6365, | |
| "valid_targets_mean": 6622.6, | |
| "valid_targets_min": 4482 | |
| }, | |
| { | |
| "epoch": 6.643714136671883, | |
| "grad_norm": 0.5380214631827989, | |
| "learning_rate": 3.1918347656056946e-07, | |
| "loss": 0.1501, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06307361274957657, | |
| "step": 6370, | |
| "valid_targets_mean": 6003.2, | |
| "valid_targets_min": 2956 | |
| }, | |
| { | |
| "epoch": 6.648930620761607, | |
| "grad_norm": 0.4973806083293555, | |
| "learning_rate": 3.0999618912898267e-07, | |
| "loss": 0.1416, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06776538491249084, | |
| "step": 6375, | |
| "valid_targets_mean": 6501.6, | |
| "valid_targets_min": 5720 | |
| }, | |
| { | |
| "epoch": 6.65414710485133, | |
| "grad_norm": 0.516525612459278, | |
| "learning_rate": 3.009420291549825e-07, | |
| "loss": 0.1732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06719283759593964, | |
| "step": 6380, | |
| "valid_targets_mean": 6840.0, | |
| "valid_targets_min": 4198 | |
| }, | |
| { | |
| "epoch": 6.659363588941054, | |
| "grad_norm": 0.5023279818908133, | |
| "learning_rate": 2.9202105785527623e-07, | |
| "loss": 0.1421, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06675324589014053, | |
| "step": 6385, | |
| "valid_targets_mean": 6326.4, | |
| "valid_targets_min": 4837 | |
| }, | |
| { | |
| "epoch": 6.664580073030777, | |
| "grad_norm": 0.5717600258618898, | |
| "learning_rate": 2.8323333554606923e-07, | |
| "loss": 0.1547, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05757368355989456, | |
| "step": 6390, | |
| "valid_targets_mean": 3354.9, | |
| "valid_targets_min": 2790 | |
| }, | |
| { | |
| "epoch": 6.669796557120501, | |
| "grad_norm": 0.4689428489698588, | |
| "learning_rate": 2.745789216426387e-07, | |
| "loss": 0.1392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07211098074913025, | |
| "step": 6395, | |
| "valid_targets_mean": 5954.2, | |
| "valid_targets_min": 3635 | |
| }, | |
| { | |
| "epoch": 6.675013041210224, | |
| "grad_norm": 0.48565940812750613, | |
| "learning_rate": 2.660578746589515e-07, | |
| "loss": 0.1477, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06415621191263199, | |
| "step": 6400, | |
| "valid_targets_mean": 6036.0, | |
| "valid_targets_min": 4393 | |
| }, | |
| { | |
| "epoch": 6.680229525299948, | |
| "grad_norm": 0.45776365520278234, | |
| "learning_rate": 2.5767025220724717e-07, | |
| "loss": 0.1352, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07989485561847687, | |
| "step": 6405, | |
| "valid_targets_mean": 8602.2, | |
| "valid_targets_min": 6322 | |
| }, | |
| { | |
| "epoch": 6.685446009389671, | |
| "grad_norm": 0.5744901153723982, | |
| "learning_rate": 2.4941611099766896e-07, | |
| "loss": 0.132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.060449644923210144, | |
| "step": 6410, | |
| "valid_targets_mean": 1596.1, | |
| "valid_targets_min": 501 | |
| }, | |
| { | |
| "epoch": 6.690662493479395, | |
| "grad_norm": 0.5485949319087033, | |
| "learning_rate": 2.4129550683786194e-07, | |
| "loss": 0.1368, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07872801274061203, | |
| "step": 6415, | |
| "valid_targets_mean": 5933.8, | |
| "valid_targets_min": 3625 | |
| }, | |
| { | |
| "epoch": 6.6958789775691185, | |
| "grad_norm": 0.49233042999536897, | |
| "learning_rate": 2.3330849463261363e-07, | |
| "loss": 0.1453, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07555273175239563, | |
| "step": 6420, | |
| "valid_targets_mean": 4973.1, | |
| "valid_targets_min": 3623 | |
| }, | |
| { | |
| "epoch": 6.701095461658842, | |
| "grad_norm": 0.47383143781809084, | |
| "learning_rate": 2.2545512838346716e-07, | |
| "loss": 0.1415, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06657905876636505, | |
| "step": 6425, | |
| "valid_targets_mean": 6099.6, | |
| "valid_targets_min": 4625 | |
| }, | |
| { | |
| "epoch": 6.706311945748565, | |
| "grad_norm": 0.4532682992608997, | |
| "learning_rate": 2.1773546118836418e-07, | |
| "loss": 0.1254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06096294894814491, | |
| "step": 6430, | |
| "valid_targets_mean": 5994.6, | |
| "valid_targets_min": 4720 | |
| }, | |
| { | |
| "epoch": 6.711528429838289, | |
| "grad_norm": 0.511117705395177, | |
| "learning_rate": 2.1014954524128493e-07, | |
| "loss": 0.1335, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07455316931009293, | |
| "step": 6435, | |
| "valid_targets_mean": 7425.9, | |
| "valid_targets_min": 5948 | |
| }, | |
| { | |
| "epoch": 6.716744913928013, | |
| "grad_norm": 0.4560122145471545, | |
| "learning_rate": 2.0269743183189528e-07, | |
| "loss": 0.155, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09339310228824615, | |
| "step": 6440, | |
| "valid_targets_mean": 5896.1, | |
| "valid_targets_min": 4690 | |
| }, | |
| { | |
| "epoch": 6.721961398017736, | |
| "grad_norm": 0.43026163880483287, | |
| "learning_rate": 1.9537917134520023e-07, | |
| "loss": 0.1477, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06275922060012817, | |
| "step": 6445, | |
| "valid_targets_mean": 5856.2, | |
| "valid_targets_min": 4262 | |
| }, | |
| { | |
| "epoch": 6.727177882107459, | |
| "grad_norm": 0.4917395163424751, | |
| "learning_rate": 1.881948132611977e-07, | |
| "loss": 0.1365, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06881976127624512, | |
| "step": 6450, | |
| "valid_targets_mean": 5960.1, | |
| "valid_targets_min": 5149 | |
| }, | |
| { | |
| "epoch": 6.732394366197183, | |
| "grad_norm": 0.49937760356264693, | |
| "learning_rate": 1.811444061545542e-07, | |
| "loss": 0.1428, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06982825696468353, | |
| "step": 6455, | |
| "valid_targets_mean": 5329.4, | |
| "valid_targets_min": 3450 | |
| }, | |
| { | |
| "epoch": 6.737610850286907, | |
| "grad_norm": 0.5473929893933744, | |
| "learning_rate": 1.7422799769426736e-07, | |
| "loss": 0.159, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08833189308643341, | |
| "step": 6460, | |
| "valid_targets_mean": 6738.6, | |
| "valid_targets_min": 3531 | |
| }, | |
| { | |
| "epoch": 6.7428273343766305, | |
| "grad_norm": 0.5299222064749929, | |
| "learning_rate": 1.674456346433506e-07, | |
| "loss": 0.1722, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09204494953155518, | |
| "step": 6465, | |
| "valid_targets_mean": 7328.4, | |
| "valid_targets_min": 4617 | |
| }, | |
| { | |
| "epoch": 6.748043818466353, | |
| "grad_norm": 0.4832652634365656, | |
| "learning_rate": 1.607973628585091e-07, | |
| "loss": 0.1597, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06422878801822662, | |
| "step": 6470, | |
| "valid_targets_mean": 5306.0, | |
| "valid_targets_min": 3685 | |
| }, | |
| { | |
| "epoch": 6.753260302556077, | |
| "grad_norm": 0.47748203491144553, | |
| "learning_rate": 1.542832272898398e-07, | |
| "loss": 0.1461, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06896630674600601, | |
| "step": 6475, | |
| "valid_targets_mean": 5627.4, | |
| "valid_targets_min": 1902 | |
| }, | |
| { | |
| "epoch": 6.758476786645801, | |
| "grad_norm": 0.47684679813740355, | |
| "learning_rate": 1.4790327198051624e-07, | |
| "loss": 0.1458, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07093920558691025, | |
| "step": 6480, | |
| "valid_targets_mean": 7195.9, | |
| "valid_targets_min": 4412 | |
| }, | |
| { | |
| "epoch": 6.763693270735525, | |
| "grad_norm": 0.5289158367195713, | |
| "learning_rate": 1.416575400664999e-07, | |
| "loss": 0.1507, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06557299196720123, | |
| "step": 6485, | |
| "valid_targets_mean": 5172.0, | |
| "valid_targets_min": 3161 | |
| }, | |
| { | |
| "epoch": 6.7689097548252475, | |
| "grad_norm": 0.4525966616107485, | |
| "learning_rate": 1.3554607377624263e-07, | |
| "loss": 0.1464, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07581068575382233, | |
| "step": 6490, | |
| "valid_targets_mean": 7083.6, | |
| "valid_targets_min": 5428 | |
| }, | |
| { | |
| "epoch": 6.774126238914971, | |
| "grad_norm": 0.49679325772986693, | |
| "learning_rate": 1.295689144304091e-07, | |
| "loss": 0.1499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08488409221172333, | |
| "step": 6495, | |
| "valid_targets_mean": 6086.4, | |
| "valid_targets_min": 2364 | |
| }, | |
| { | |
| "epoch": 6.779342723004695, | |
| "grad_norm": 0.4756991665867179, | |
| "learning_rate": 1.237261024415881e-07, | |
| "loss": 0.152, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07247477769851685, | |
| "step": 6500, | |
| "valid_targets_mean": 6284.8, | |
| "valid_targets_min": 4793 | |
| }, | |
| { | |
| "epoch": 6.784559207094419, | |
| "grad_norm": 0.48485980321825084, | |
| "learning_rate": 1.1801767731401958e-07, | |
| "loss": 0.1482, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07554655522108078, | |
| "step": 6505, | |
| "valid_targets_mean": 6081.4, | |
| "valid_targets_min": 4588 | |
| }, | |
| { | |
| "epoch": 6.789775691184142, | |
| "grad_norm": 0.47230914172727073, | |
| "learning_rate": 1.1244367764333464e-07, | |
| "loss": 0.1321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0634116679430008, | |
| "step": 6510, | |
| "valid_targets_mean": 7343.1, | |
| "valid_targets_min": 5585 | |
| }, | |
| { | |
| "epoch": 6.794992175273865, | |
| "grad_norm": 0.5027268153310445, | |
| "learning_rate": 1.0700414111629365e-07, | |
| "loss": 0.1347, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08672606945037842, | |
| "step": 6515, | |
| "valid_targets_mean": 6391.6, | |
| "valid_targets_min": 3985 | |
| }, | |
| { | |
| "epoch": 6.800208659363589, | |
| "grad_norm": 0.48160431262184494, | |
| "learning_rate": 1.0169910451052422e-07, | |
| "loss": 0.1523, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08598476648330688, | |
| "step": 6520, | |
| "valid_targets_mean": 7515.0, | |
| "valid_targets_min": 5325 | |
| }, | |
| { | |
| "epoch": 6.805425143453313, | |
| "grad_norm": 0.4955341832313151, | |
| "learning_rate": 9.652860369428141e-08, | |
| "loss": 0.1515, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08407383412122726, | |
| "step": 6525, | |
| "valid_targets_mean": 6863.9, | |
| "valid_targets_min": 4423 | |
| }, | |
| { | |
| "epoch": 6.810641627543036, | |
| "grad_norm": 0.476987865769532, | |
| "learning_rate": 9.149267362619895e-08, | |
| "loss": 0.1434, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07479429244995117, | |
| "step": 6530, | |
| "valid_targets_mean": 5935.5, | |
| "valid_targets_min": 4064 | |
| }, | |
| { | |
| "epoch": 6.8158581116327595, | |
| "grad_norm": 0.4427987408658055, | |
| "learning_rate": 8.659134835504956e-08, | |
| "loss": 0.1423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07114644348621368, | |
| "step": 6535, | |
| "valid_targets_mean": 7679.9, | |
| "valid_targets_min": 5036 | |
| }, | |
| { | |
| "epoch": 6.821074595722483, | |
| "grad_norm": 0.5253285286162036, | |
| "learning_rate": 8.18246610195339e-08, | |
| "loss": 0.1378, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08310030400753021, | |
| "step": 6540, | |
| "valid_targets_mean": 6041.1, | |
| "valid_targets_min": 5000 | |
| }, | |
| { | |
| "epoch": 6.826291079812207, | |
| "grad_norm": 0.505125461904702, | |
| "learning_rate": 7.719264384802527e-08, | |
| "loss": 0.1424, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07492141425609589, | |
| "step": 6545, | |
| "valid_targets_mean": 6254.0, | |
| "valid_targets_min": 4602 | |
| }, | |
| { | |
| "epoch": 6.83150756390193, | |
| "grad_norm": 0.47099617133632965, | |
| "learning_rate": 7.269532815838532e-08, | |
| "loss": 0.1497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06015133857727051, | |
| "step": 6550, | |
| "valid_targets_mean": 5628.6, | |
| "valid_targets_min": 4475 | |
| }, | |
| { | |
| "epoch": 6.836724047991654, | |
| "grad_norm": 0.4516515781598282, | |
| "learning_rate": 6.833274435772196e-08, | |
| "loss": 0.1378, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06385660171508789, | |
| "step": 6555, | |
| "valid_targets_mean": 6075.8, | |
| "valid_targets_min": 4824 | |
| }, | |
| { | |
| "epoch": 6.841940532081377, | |
| "grad_norm": 0.47736199363193704, | |
| "learning_rate": 6.410492194220963e-08, | |
| "loss": 0.156, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07481502741575241, | |
| "step": 6560, | |
| "valid_targets_mean": 5807.0, | |
| "valid_targets_min": 4982 | |
| }, | |
| { | |
| "epoch": 6.847157016171101, | |
| "grad_norm": 0.4238300717708793, | |
| "learning_rate": 6.001188949687153e-08, | |
| "loss": 0.1327, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06115148216485977, | |
| "step": 6565, | |
| "valid_targets_mean": 5453.1, | |
| "valid_targets_min": 4443 | |
| }, | |
| { | |
| "epoch": 6.852373500260824, | |
| "grad_norm": 0.5763528254668782, | |
| "learning_rate": 5.6053674695395463e-08, | |
| "loss": 0.1238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05990096181631088, | |
| "step": 6570, | |
| "valid_targets_mean": 6058.6, | |
| "valid_targets_min": 4802 | |
| }, | |
| { | |
| "epoch": 6.857589984350548, | |
| "grad_norm": 0.43070355608364835, | |
| "learning_rate": 5.22303042999428e-08, | |
| "loss": 0.1269, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05511939525604248, | |
| "step": 6575, | |
| "valid_targets_mean": 5375.4, | |
| "valid_targets_min": 4559 | |
| }, | |
| { | |
| "epoch": 6.862806468440271, | |
| "grad_norm": 0.4824945004654264, | |
| "learning_rate": 4.8541804160968655e-08, | |
| "loss": 0.1332, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07177352905273438, | |
| "step": 6580, | |
| "valid_targets_mean": 6076.2, | |
| "valid_targets_min": 4573 | |
| }, | |
| { | |
| "epoch": 6.868022952529994, | |
| "grad_norm": 0.471132843706313, | |
| "learning_rate": 4.498819921705089e-08, | |
| "loss": 0.1472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08584603667259216, | |
| "step": 6585, | |
| "valid_targets_mean": 6920.9, | |
| "valid_targets_min": 4491 | |
| }, | |
| { | |
| "epoch": 6.873239436619718, | |
| "grad_norm": 0.613261764384988, | |
| "learning_rate": 4.15695134947125e-08, | |
| "loss": 0.1676, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10297256708145142, | |
| "step": 6590, | |
| "valid_targets_mean": 4242.4, | |
| "valid_targets_min": 817 | |
| }, | |
| { | |
| "epoch": 6.878455920709442, | |
| "grad_norm": 0.4711101504665452, | |
| "learning_rate": 3.8285770108272836e-08, | |
| "loss": 0.1546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06880757212638855, | |
| "step": 6595, | |
| "valid_targets_mean": 5535.4, | |
| "valid_targets_min": 3893 | |
| }, | |
| { | |
| "epoch": 6.8836724047991655, | |
| "grad_norm": 0.5373622226755685, | |
| "learning_rate": 3.5136991259672183e-08, | |
| "loss": 0.1398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06807295978069305, | |
| "step": 6600, | |
| "valid_targets_mean": 5532.1, | |
| "valid_targets_min": 4622 | |
| }, | |
| { | |
| "epoch": 6.888888888888889, | |
| "grad_norm": 0.45853274459329846, | |
| "learning_rate": 3.212319823834298e-08, | |
| "loss": 0.1435, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0664399266242981, | |
| "step": 6605, | |
| "valid_targets_mean": 7928.9, | |
| "valid_targets_min": 5695 | |
| }, | |
| { | |
| "epoch": 6.894105372978612, | |
| "grad_norm": 0.49768269587166664, | |
| "learning_rate": 2.9244411421049946e-08, | |
| "loss": 0.1523, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07913493365049362, | |
| "step": 6610, | |
| "valid_targets_mean": 6435.6, | |
| "valid_targets_min": 4778 | |
| }, | |
| { | |
| "epoch": 6.899321857068336, | |
| "grad_norm": 0.5269060580904159, | |
| "learning_rate": 2.6500650271759077e-08, | |
| "loss": 0.1604, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0811527743935585, | |
| "step": 6615, | |
| "valid_targets_mean": 6724.8, | |
| "valid_targets_min": 5299 | |
| }, | |
| { | |
| "epoch": 6.90453834115806, | |
| "grad_norm": 0.5097653563434373, | |
| "learning_rate": 2.3891933341506636e-08, | |
| "loss": 0.1469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08272428810596466, | |
| "step": 6620, | |
| "valid_targets_mean": 6748.9, | |
| "valid_targets_min": 4245 | |
| }, | |
| { | |
| "epoch": 6.9097548252477825, | |
| "grad_norm": 0.48301627175649453, | |
| "learning_rate": 2.1418278268270365e-08, | |
| "loss": 0.1509, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07479394972324371, | |
| "step": 6625, | |
| "valid_targets_mean": 5589.4, | |
| "valid_targets_min": 4409 | |
| }, | |
| { | |
| "epoch": 6.914971309337506, | |
| "grad_norm": 0.444626480990525, | |
| "learning_rate": 1.907970177684737e-08, | |
| "loss": 0.1434, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07348085939884186, | |
| "step": 6630, | |
| "valid_targets_mean": 6732.4, | |
| "valid_targets_min": 4797 | |
| }, | |
| { | |
| "epoch": 6.92018779342723, | |
| "grad_norm": 0.44967507417378166, | |
| "learning_rate": 1.687621967875863e-08, | |
| "loss": 0.1363, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07124006748199463, | |
| "step": 6635, | |
| "valid_targets_mean": 6316.6, | |
| "valid_targets_min": 5240 | |
| }, | |
| { | |
| "epoch": 6.925404277516954, | |
| "grad_norm": 0.5171480339773938, | |
| "learning_rate": 1.4807846872113563e-08, | |
| "loss": 0.1938, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1960332691669464, | |
| "step": 6640, | |
| "valid_targets_mean": 6448.6, | |
| "valid_targets_min": 4778 | |
| }, | |
| { | |
| "epoch": 6.9306207616066775, | |
| "grad_norm": 0.46163641539197164, | |
| "learning_rate": 1.2874597341534512e-08, | |
| "loss": 0.1396, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.056532472372055054, | |
| "step": 6645, | |
| "valid_targets_mean": 6228.1, | |
| "valid_targets_min": 4027 | |
| }, | |
| { | |
| "epoch": 6.9358372456964, | |
| "grad_norm": 0.4573727448799637, | |
| "learning_rate": 1.1076484158047962e-08, | |
| "loss": 0.1391, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07274767756462097, | |
| "step": 6650, | |
| "valid_targets_mean": 6315.6, | |
| "valid_targets_min": 4547 | |
| }, | |
| { | |
| "epoch": 6.941053729786124, | |
| "grad_norm": 0.49050767259566147, | |
| "learning_rate": 9.413519479004596e-09, | |
| "loss": 0.1476, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07301096618175507, | |
| "step": 6655, | |
| "valid_targets_mean": 5824.5, | |
| "valid_targets_min": 4611 | |
| }, | |
| { | |
| "epoch": 6.946270213875848, | |
| "grad_norm": 0.43658165491615974, | |
| "learning_rate": 7.885714547990475e-09, | |
| "loss": 0.1472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.061631184071302414, | |
| "step": 6660, | |
| "valid_targets_mean": 6651.8, | |
| "valid_targets_min": 4329 | |
| }, | |
| { | |
| "epoch": 6.951486697965571, | |
| "grad_norm": 0.45600773244930565, | |
| "learning_rate": 6.493079694753768e-09, | |
| "loss": 0.1286, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06703155487775803, | |
| "step": 6665, | |
| "valid_targets_mean": 6751.9, | |
| "valid_targets_min": 4594 | |
| }, | |
| { | |
| "epoch": 6.9567031820552945, | |
| "grad_norm": 0.4597047283504256, | |
| "learning_rate": 5.235624335133693e-09, | |
| "loss": 0.1433, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0761105939745903, | |
| "step": 6670, | |
| "valid_targets_mean": 6895.8, | |
| "valid_targets_min": 4707 | |
| }, | |
| { | |
| "epoch": 6.961919666145018, | |
| "grad_norm": 0.4808853814035739, | |
| "learning_rate": 4.113356971002791e-09, | |
| "loss": 0.1348, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07656501233577728, | |
| "step": 6675, | |
| "valid_targets_mean": 6377.2, | |
| "valid_targets_min": 5224 | |
| }, | |
| { | |
| "epoch": 6.967136150234742, | |
| "grad_norm": 0.45271787129292623, | |
| "learning_rate": 3.126285190195866e-09, | |
| "loss": 0.1369, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06831462681293488, | |
| "step": 6680, | |
| "valid_targets_mean": 6458.5, | |
| "valid_targets_min": 4735 | |
| }, | |
| { | |
| "epoch": 6.972352634324466, | |
| "grad_norm": 0.48694726652947273, | |
| "learning_rate": 2.2744156664766813e-09, | |
| "loss": 0.1387, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07829150557518005, | |
| "step": 6685, | |
| "valid_targets_mean": 7325.6, | |
| "valid_targets_min": 6050 | |
| }, | |
| { | |
| "epoch": 6.977569118414189, | |
| "grad_norm": 0.44803652264596455, | |
| "learning_rate": 1.5577541594802293e-09, | |
| "loss": 0.1453, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08373796939849854, | |
| "step": 6690, | |
| "valid_targets_mean": 7341.4, | |
| "valid_targets_min": 4982 | |
| }, | |
| { | |
| "epoch": 6.982785602503912, | |
| "grad_norm": 0.4143012684137875, | |
| "learning_rate": 9.7630551467498e-10, | |
| "loss": 0.1316, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06451661139726639, | |
| "step": 6695, | |
| "valid_targets_mean": 7924.5, | |
| "valid_targets_min": 6578 | |
| }, | |
| { | |
| "epoch": 6.988002086593636, | |
| "grad_norm": 0.4761349933407689, | |
| "learning_rate": 5.300736633317982e-10, | |
| "loss": 0.1445, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07810164988040924, | |
| "step": 6700, | |
| "valid_targets_mean": 6218.9, | |
| "valid_targets_min": 4682 | |
| }, | |
| { | |
| "epoch": 6.993218570683359, | |
| "grad_norm": 0.5065464017319322, | |
| "learning_rate": 2.1906162250395768e-10, | |
| "loss": 0.1472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08083474636077881, | |
| "step": 6705, | |
| "valid_targets_mean": 6293.5, | |
| "valid_targets_min": 5075 | |
| }, | |
| { | |
| "epoch": 6.998435054773083, | |
| "grad_norm": 0.5251022683122148, | |
| "learning_rate": 4.3271494996055544e-11, | |
| "loss": 0.1497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07129688560962677, | |
| "step": 6710, | |
| "valid_targets_mean": 5323.8, | |
| "valid_targets_min": 2821 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11204883456230164, | |
| "step": 6712, | |
| "total_flos": 3.0947556970284974e+18, | |
| "train_loss": 0.09641505422623421, | |
| "train_runtime": 96649.0246, | |
| "train_samples_per_second": 1.11, | |
| "train_steps_per_second": 0.069, | |
| "valid_targets_mean": 4614.6, | |
| "valid_targets_min": 1320 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 6713, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.0947556970284974e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |