104 lines
3.1 KiB
Python
104 lines
3.1 KiB
Python
import time
|
|
import uuid
|
|
from collections.abc import Iterator
|
|
|
|
from llama_index.llms import ChatResponse, CompletionResponse
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
class OpenAIDelta(BaseModel):
|
|
"""A piece of completion that needs to be concatenated to get the full message."""
|
|
|
|
content: str | None
|
|
|
|
|
|
class OpenAIMessage(BaseModel):
|
|
"""Inference result, with the source of the message.
|
|
|
|
Role could be the assistant or system
|
|
(providing a default response, not AI generated).
|
|
"""
|
|
|
|
role: str = Field(default="user", enum=["assistant", "system", "user"])
|
|
content: str | None
|
|
|
|
|
|
class OpenAIChoice(BaseModel):
|
|
"""Response from AI.
|
|
|
|
Either the delta or the message will be present, but never both.
|
|
"""
|
|
|
|
finish_reason: str | None = Field(examples=["stop"])
|
|
delta: OpenAIDelta | None = None
|
|
message: OpenAIMessage | None = None
|
|
index: int = 0
|
|
|
|
|
|
class OpenAICompletion(BaseModel):
|
|
"""Clone of OpenAI Completion model.
|
|
|
|
For more information see: https://platform.openai.com/docs/api-reference/chat/object
|
|
"""
|
|
|
|
id: str
|
|
object: str = Field("completion", enum=["completion", "completion.chunk"])
|
|
created: int = Field(..., examples=[1623340000])
|
|
model: str = Field(enum=["private-gpt"])
|
|
choices: list[OpenAIChoice]
|
|
|
|
@classmethod
|
|
def from_text(
|
|
cls, text: str | None, finish_reason: str | None = None
|
|
) -> "OpenAICompletion":
|
|
return OpenAICompletion(
|
|
id=str(uuid.uuid4()),
|
|
object="completion",
|
|
created=int(time.time()),
|
|
model="private-gpt",
|
|
choices=[
|
|
OpenAIChoice(
|
|
message=OpenAIMessage(role="assistant", content=text),
|
|
finish_reason=finish_reason,
|
|
)
|
|
],
|
|
)
|
|
|
|
@classmethod
|
|
def json_from_delta(
|
|
cls, *, text: str | None, finish_reason: str | None = None
|
|
) -> str:
|
|
chunk = OpenAICompletion(
|
|
id=str(uuid.uuid4()),
|
|
object="completion.chunk",
|
|
created=int(time.time()),
|
|
model="private-gpt",
|
|
choices=[
|
|
OpenAIChoice(
|
|
delta=OpenAIDelta(content=text),
|
|
finish_reason=finish_reason,
|
|
)
|
|
],
|
|
)
|
|
|
|
return chunk.model_dump_json()
|
|
|
|
|
|
def to_openai_response(response: str | ChatResponse) -> OpenAICompletion:
|
|
if isinstance(response, ChatResponse):
|
|
return OpenAICompletion.from_text(response.delta, finish_reason="stop")
|
|
else:
|
|
return OpenAICompletion.from_text(response, finish_reason="stop")
|
|
|
|
|
|
def to_openai_sse_stream(
|
|
response_generator: Iterator[str | CompletionResponse | ChatResponse],
|
|
) -> Iterator[str]:
|
|
for response in response_generator:
|
|
if isinstance(response, CompletionResponse | ChatResponse):
|
|
yield f"data: {OpenAICompletion.json_from_delta(text=response.delta)}\n\n"
|
|
else:
|
|
yield f"data: {OpenAICompletion.json_from_delta(text=response)}\n\n"
|
|
yield f"data: {OpenAICompletion.json_from_delta(text=None, finish_reason='stop')}\n\n"
|
|
yield "data: [DONE]\n\n"
|