update readme.md.

b4rtaz · Jun 1, 2024 · e8dc8ec · e8dc8ec
1 parent 961db29
commit e8dc8ec
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -17,6 +17,7 @@ Python 3 and C++ compiler required. The command will download the model and the
 
 | Model                   | Purpose   | Size     | Command                                   |
 | ----------------------- | --------- | -------- | ----------------------------------------- |
+| TinyLlama 1.1B 3T Q40   | Benchmark | 844 MB   | `python launch.py tinyllama_1_1b_3t_q40`  |
 | Llama 3 8B Q40          | Benchmark | 6.32 GB  | `python launch.py llama3_8b_q40`          |
 | Llama 3 8B Instruct Q40 | Chat, API | 6.32 GB  | `python launch.py llama3_8b_instruct_q40` |
 

diff --git a/launch.py b/launch.py
@@ -4,6 +4,11 @@
 
 # ['model-url', 'tokenizer-url', 'weights-float-type', 'buffer-float-type', 'model-type']
 MODELS = {
+    'tinyllama_1_1b_3t_q40': [
+        'https://huggingface.co/b4rtaz/TinyLlama-1.1B-3T-Distributed-Llama/resolve/main/dllama_model_tinylama_1.1b_3t_q40.m?download=true',
+        'https://huggingface.co/b4rtaz/TinyLlama-1.1B-3T-Distributed-Llama/resolve/main/dllama_tokenizer_tinylama_1.1b_3t.t?download=true',
+        'q40', 'q80', 'base'
+    ],
     'llama3_8b_q40': [
         'https://huggingface.co/b4rtaz/Llama-3-8B-Q40-Distributed-Llama/resolve/main/dllama_model_meta-llama-3-8b_q40.m?download=true',
         'https://huggingface.co/b4rtaz/Llama-3-8B-Q40-Distributed-Llama/resolve/main/dllama_tokenizer_llama3.t?download=true',