|
@@ -37,6 +37,46 @@ with torch.no_grad():
|
|
|
outputs = outputs[:, inputs['input_ids'].shape[1]:]
|
|
outputs = outputs[:, inputs['input_ids'].shape[1]:]
|
|
|
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
|
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
|
|
```
|
|
```
|
|
|
|
|
+使用 `vllm==0.5.1` 快速启动 [codegeex4-all-9b](https://huggingface.co/THUDM/codegeex4-all-9b):
|
|
|
|
|
+
|
|
|
|
|
+```python
|
|
|
|
|
+from transformers import AutoTokenizer
|
|
|
|
|
+from vllm import LLM, SamplingParams
|
|
|
|
|
+
|
|
|
|
|
+# CodeGeeX4-ALL-9B
|
|
|
|
|
+# max_model_len, tp_size = 1048576, 4
|
|
|
|
|
+# 如果出现内存不足(OOM),减少max_model_len,或增加tp_size
|
|
|
|
|
+max_model_len, tp_size = 131072, 1
|
|
|
|
|
+model_name = "codegeex4-all-9b"
|
|
|
|
|
+prompt = [{"role": "user", "content": "Hello"}]
|
|
|
|
|
+
|
|
|
|
|
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
|
|
|
|
+llm = LLM(
|
|
|
|
|
+ model=model_name,
|
|
|
|
|
+ tensor_parallel_size=tp_size,
|
|
|
|
|
+ max_model_len=max_model_len,
|
|
|
|
|
+ trust_remote_code=True,
|
|
|
|
|
+ enforce_eager=True,
|
|
|
|
|
+ # 如果出现OOM,尝试使用以下参数
|
|
|
|
|
+ # enable_chunked_prefill=True,
|
|
|
|
|
+ # max_num_batched_tokens=8192
|
|
|
|
|
+)
|
|
|
|
|
+stop_token_ids = [151329, 151336, 151338]
|
|
|
|
|
+sampling_params = SamplingParams(temperature=0.95, max_tokens=1024, stop_token_ids=stop_token_ids)
|
|
|
|
|
+
|
|
|
|
|
+inputs = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
|
|
|
|
|
+outputs = llm.generate(prompts=inputs, sampling_params=sampling_params)
|
|
|
|
|
+
|
|
|
|
|
+print(outputs[0].outputs[0].text)
|
|
|
|
|
+```
|
|
|
|
|
+
|
|
|
|
|
+通过 vllm 设置 OpenAI 兼容服务,详细信息请查看 [OpenAI 兼容服务器](https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html):
|
|
|
|
|
+
|
|
|
|
|
+```bash
|
|
|
|
|
+python -m vllm.entrypoints.openai.api_server \
|
|
|
|
|
+ --model THUDM/codegeex4-all-9b \
|
|
|
|
|
+ --trust_remote_code
|
|
|
|
|
+```
|
|
|
|
|
|
|
|
## 用户指南
|
|
## 用户指南
|
|
|
我们为 CodeGeeX4-ALL-9B 提供了用户指南,帮助用户快速了解和使用该模型:
|
|
我们为 CodeGeeX4-ALL-9B 提供了用户指南,帮助用户快速了解和使用该模型:
|