allow multiple GPUs to do inference in a data parallel way
This commit is contained in:
@@ -327,7 +327,6 @@
|
||||
},
|
||||
body: JSON.stringify({
|
||||
messages: messages,
|
||||
stream: true,
|
||||
temperature: 0.8,
|
||||
max_tokens: 512
|
||||
}),
|
||||
|
||||
Reference in New Issue
Block a user