codegeex.py 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. from typing import Iterator
  2. import torch
  3. from langchain_core.language_models.chat_models import BaseChatModel
  4. from langchain_core.messages import BaseMessage, AIMessageChunk
  5. from langchain_core.outputs import ChatGenerationChunk, ChatResult, ChatGeneration
  6. from pydantic import Field
  7. from transformers import AutoModel, AutoTokenizer
  8. from utils.prompts import SYS_PROMPT
  9. class CodegeexChatModel(BaseChatModel):
  10. device: str = Field(description="device to load the model")
  11. tokenizer = Field(description="model's tokenizer")
  12. model = Field(description="Codegeex model")
  13. temperature: float = Field(description="temperature to use for the model.")
  14. def __init__(self, args):
  15. super().__init__()
  16. self.device = args.device
  17. self.tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path, trust_remote_code=True)
  18. self.model = AutoModel.from_pretrained(
  19. args.model_name_or_path,
  20. trust_remote_code=True
  21. ).to(args.device).eval()
  22. self.temperature = args.temperature
  23. print("Model has been initialized.")
  24. def _llm_type(self) -> str:
  25. return "codegeex"
  26. @torch.inference_mode()
  27. def _generate(self, messages, **kwargs):
  28. try:
  29. response, _ = self.model.chat(
  30. self.tokenizer,
  31. query=messages[0].content,
  32. history=[{"role": "system", "content": SYS_PROMPT}],
  33. max_new_tokens=1024,
  34. temperature=self.temperature
  35. )
  36. return ChatResult(generations=[ChatGeneration(message=BaseMessage(content=response, type='ai'))])
  37. except Exception as e:
  38. return ChatResult(generations=[ChatGeneration(message=BaseMessage(content=repr(e), type='ai'))])
  39. def _stream(self, messages: list[BaseMessage], **kwargs) -> Iterator[ChatGenerationChunk]:
  40. try:
  41. for response, _ in self.model.stream_chat(
  42. self.tokenizer,
  43. query=messages[0].content,
  44. history=[{"role": "system", "content": SYS_PROMPT}],
  45. max_new_tokens=1024,
  46. temperature=self.temperature
  47. ):
  48. yield ChatGenerationChunk(message=AIMessageChunk(content=response))
  49. except Exception as e:
  50. yield ChatGenerationChunk(message=AIMessageChunk(content=f"Fail to generate, cause by {e}"))