fix(llm): special tokens and leading space (#1831)

This commit is contained in:
Pablo Orgaz 2024-04-04 14:37:29 +02:00 committed by GitHub
parent 08c4ab175e
commit 347be643f7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 8 additions and 1 deletions

View File

@ -243,12 +243,19 @@ class SagemakerLLM(CustomLLM):
event_stream = resp["Body"]
start_json = b"{"
stop_token = "<|endoftext|>"
first_token = True
for line in LineIterator(event_stream):
if line != b"" and start_json in line:
data = json.loads(line[line.find(start_json) :].decode("utf-8"))
if data["token"]["text"] != stop_token:
special = data["token"]["special"]
stop = data["token"]["text"] == stop_token
if not special and not stop:
delta = data["token"]["text"]
# trim the leading space for the first token if present
if first_token:
delta = delta.lstrip()
first_token = False
text += delta
yield CompletionResponse(delta=delta, text=text, raw=data)