delete torchao dependency, create our own exact API-matched version of Float8Linear, document it very well. for some poorly understood reason, the performance is not only ~identical but actually runs 3% faster. despite of it being significantly simpler and much less code. i don't fully understand why/how atm

2026-02-10 18:46:39 +00:00
parent 1ec0a34779
commit e569b59f92
4 changed files with 275 additions and 13 deletions
@@ -20,7 +20,6 @@ dependencies = [
    "tiktoken>=0.11.0",
    "tokenizers>=0.22.0",
    "torch==2.9.1",
-    "torchao==0.15.0",
    "transformers>=4.57.3",
    "uvicorn>=0.36.0",
    "wandb>=0.21.3",