Skip to content

Commit

Permalink
Clojure version: support response streaming
Browse files Browse the repository at this point in the history
  • Loading branch information
ariya committed Oct 1, 2024
1 parent b70c009 commit cb60164
Showing 1 changed file with 53 additions and 15 deletions.
68 changes: 53 additions & 15 deletions ask-llm.clj
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
(def LLM-API-BASE-URL (or (System/getenv "LLM_API_BASE_URL") "https://api.openai.com/v1"))
(def LLM-API-KEY (or (System/getenv "LLM_API_KEY") (System/getenv "OPENAI_API_KEY")))
(def LLM-CHAT-MODEL (System/getenv "LLM_CHAT_MODEL"))
(def LLM-STREAMING (not= "no" (System/getenv "LLM_STREAMING")))

(def LLM-DEBUG (System/getenv "LLM_DEBUG"))

Expand All @@ -15,11 +16,6 @@
:authorization (str "Bearer " bearer)}
{:content-type "application/json"}))

(defn http-post [url bearer payload]
(-> (http/post url {:headers (http-json-headers bearer)
:body (json/encode payload)})
:body (json/parse-string true)))

(def LLM-CHAT-URL (str LLM-API-BASE-URL "/chat/completions"))

(def SYSTEM-PROMPT "Answer the question politely and concisely.")
Expand All @@ -30,19 +26,61 @@
(defn add-message! [role content]
(swap! llm-messages conj {:role role :content content}))

(defn chat [messages]
(let [body {:messages messages
:model (or LLM-CHAT-MODEL "gpt-4o-mini")
:stop ["<|im_end|>" "<|end|>" "<|eot_id|>"]
:max_tokens 200
:temperature 0}
response (http-post LLM-CHAT-URL LLM-API-KEY body)]
(-> response :choices first :message :content str/trim)))
(defn make-reader [response]
(java.io.BufferedReader. (java.io.InputStreamReader. (:body response))))

(defn json-parse [str]
(json/parse-string str true))

(defn parse-line [line handler]
(try
(some-> line json-parse
:choices first :delta :content
(#(do (when handler (handler %))
(str %))))
(catch Exception e nil)))

(defn decode-stream [response handler]
(with-open [reader (make-reader response)]
(loop [answer ""]
(if-let [line (.readLine reader)]
(let [trimmed-line (str/trim line)]
(cond
(str/blank? trimmed-line) (recur answer)
(str/starts-with? trimmed-line "data: ")
(let [new-answer (parse-line (str/trim (subs trimmed-line 6)) handler)]
(recur (or new-answer answer)))
:else (recur answer)))
answer))))

(defn chat [messages handler]
(let [stream (and LLM-STREAMING (some? handler))
payload {:messages messages
:model (or LLM-CHAT-MODEL "gpt-4o-mini")
:stop ["<|im_end|>" "<|end|>" "<|eot_id|>"]
:max_tokens 200
:temperature 0
:stream stream}
options {:headers (http-json-headers LLM-API-KEY)
:body (json/encode payload)}
options (if stream (assoc options :as :stream) options)
response (http/post LLM-CHAT-URL options)]
(if stream
(decode-stream response handler)
(let [body (-> response :body json-parse)
answer (-> body :choices first :message :content str/trim)]
(when handler (handler answer))
answer))))

(defn print-stdout [str]
(print str)
(flush))

(defn ask-llm [question]
(add-message! "user" question)
(let [answer (chat @llm-messages)]
(let [answer (chat @llm-messages print-stdout)]
(add-message! "assistant" answer)
(println)
answer))

(defmacro measure-time [f]
Expand All @@ -58,7 +96,7 @@
(flush)
(let [question (read-line)]
(when question
(-> question ask-llm println measure-time)
(-> question ask-llm measure-time)
(println)
(flush)
(recur)))))
Expand Down

0 comments on commit cb60164

Please sign in to comment.