fix(llm): special tokens and leading space (#1831)
This commit is contained in:
parent
08c4ab175e
commit
347be643f7
|
@ -243,12 +243,19 @@ class SagemakerLLM(CustomLLM):
|
|||
event_stream = resp["Body"]
|
||||
start_json = b"{"
|
||||
stop_token = "<|endoftext|>"
|
||||
first_token = True
|
||||
|
||||
for line in LineIterator(event_stream):
|
||||
if line != b"" and start_json in line:
|
||||
data = json.loads(line[line.find(start_json) :].decode("utf-8"))
|
||||
if data["token"]["text"] != stop_token:
|
||||
special = data["token"]["special"]
|
||||
stop = data["token"]["text"] == stop_token
|
||||
if not special and not stop:
|
||||
delta = data["token"]["text"]
|
||||
# trim the leading space for the first token if present
|
||||
if first_token:
|
||||
delta = delta.lstrip()
|
||||
first_token = False
|
||||
text += delta
|
||||
yield CompletionResponse(delta=delta, text=text, raw=data)
|
||||
|
||||
|
|
Loading…
Reference in New Issue