allow multiple GPUs to do inference in a data parallel way

2025-10-15 19:12:19 +00:00
parent 190d9515d0
commit 01fb290f53
2 changed files with 145 additions and 73 deletions
@@ -327,7 +327,6 @@
                    },
                    body: JSON.stringify({
                        messages: messages,
-                        stream: true,
                        temperature: 0.8,
                        max_tokens: 512
                    }),