lmstudio-ai · yagil · Feb 27, 2025 · Feb 27, 2025
diff --git a/1_python/1_llm-prediction/parameters.md b/1_python/1_llm-prediction/parameters.md
@@ -23,7 +23,7 @@ Set inference-time parameters such as `temperature`, `maxTokens`, `topP` and mor
     ".complete()":
       language: python
       code: |
-        result = model.respond(chat, config={
+        result = model.complete(chat, config={
             "temperature": 0.6,
             "maxTokens": 50,
             "stop": ["\n\n"],
@@ -51,9 +51,9 @@ The `.model()` retrieves a handle to a model that has already been loaded, or lo
       language: python
       code: |
         import lmstudio as lms
-        model = lms.llm("qwen2.5-7b-instruct", config = {
-            contextLength: 8192,
-            gpuOffload: 0.5,
+        model = lms.llm("qwen2.5-7b-instruct", config={
+            "contextLength": 8192,
+            "gpuOffload": 0.5,
         })
 
     "Python (scoped resource API)":
@@ -63,10 +63,11 @@ The `.model()` retrieves a handle to a model that has already been loaded, or lo
         with lms.Client() as client:
             model = client.llm.model(
                 "qwen2.5-7b-instruct",
-                config = {
-                    contextLength: 8192,
-                    gpuOffload: 0.5,
-            })
+                config={
+                    "contextLength": 8192,
+                    "gpuOffload": 0.5,
+                }
+            )
 
 ```
 
@@ -83,9 +84,9 @@ The `.load_new_instance()` method creates a new model instance and loads it with
       code: |
         import lmstudio as lms
         client = lms.get_default_client()
-        model = client.llm.load_new_instance("qwen2.5-7b-instruct", config = {
-            contextLength: 8192,
-            gpuOffload: 0.5,
+        model = client.llm.load_new_instance("qwen2.5-7b-instruct", config={
+            "contextLength": 8192,
+            "gpuOffload": 0.5,
         })
 
     "Python (scoped resource API)":
@@ -95,10 +96,11 @@ The `.load_new_instance()` method creates a new model instance and loads it with
         with lms.Client() as client:
             model = client.llm.load_new_instance(
                 "qwen2.5-7b-instruct",
-                config = {
-                    contextLength: 8192,
-                    gpuOffload: 0.5,
-            })
+                config={
+                    "contextLength": 8192,
+                    "gpuOffload": 0.5,
+                }
+            )
 
 ```
 

diff --git a/1_python/2_agent/act.md b/1_python/2_agent/act.md
@@ -99,7 +99,7 @@ The following code creates a conversation loop with an LLM agent that can create
                 return "Error: {exc!r}"
             return "File created."
 
-        def print_content(fragment):
+        def print_fragment(fragment, *args):
             print(fragment.content, end="", flush=True)
 
         model = lms.llm()
@@ -117,8 +117,9 @@ The following code creates a conversation loop with an LLM agent that can create
             print("Bot: ", end="", flush=True)
             model.act(
                 chat,
+                [create_file],
                 on_message=chat.append,
-                on_fragment=print_fragment,
+                on_prediction_fragment=print_fragment,
             )
             print()
 

diff --git a/1_python/3_embedding/index.md b/1_python/3_embedding/index.md
@@ -26,7 +26,7 @@ To convert a string to a vector representation, pass it to the `embed` method on
       code: |
         import lmstudio as lms
 
-        model = lms.embedding.model("nomic-embed-text-v1.5")
+        model = lms.embedding_model("nomic-embed-text-v1.5")
 
         embedding = model.embed("Hello, world!")
 

diff --git a/1_python/4_tokenization/index.md b/1_python/4_tokenization/index.md
@@ -19,7 +19,7 @@ You can tokenize a string with a loaded LLM or embedding model using the SDK. In
 
         model = lms.llm()
 
-        tokens = llm.tokenize("Hello, world!")
+        tokens = model.tokenize("Hello, world!")
 
         print(tokens) # Array of token IDs.
 ```
@@ -33,7 +33,7 @@ If you only care about the number of tokens, simply check the length of the resu
     "Python (convenience API)":
       language: python
       code: |
-        token_count = len(llm.tokenize("Hello, world!"))
+        token_count = len(model.tokenize("Hello, world!"))
         print("Token count:", token_count)
 ```
 
@@ -71,7 +71,7 @@ You can determine if a given conversation fits into a model's context by doing t
             ]
         })
 
-        print("Fits", does_chat_fit_in_context(model, chat))
+        print("Fits in context:", does_chat_fit_in_context(model, chat))
 
 ```
 

diff --git a/1_python/5_manage-models/list-downloaded.md b/1_python/5_manage-models/list-downloaded.md
@@ -21,7 +21,7 @@ downloaded model reference to be converted in the full SDK handle for a loaded m
         llm_only = lms.list_downloaded_models("llm")
         embedding_only = lms.list_downloaded_models("embedding")
 
-        for model in downloaded_models:
+        for model in downloaded:
             print(model)
 
     "Python (scoped resource API)":
@@ -34,7 +34,7 @@ downloaded model reference to be converted in the full SDK handle for a loaded m
             llm_only = client.llm.list_downloaded()
             embedding_only = client.embedding.list_downloaded()
 
-        for model in downloaded_models:
+        for model in downloaded:
             print(model)
 
 ```

diff --git a/1_python/5_manage-models/list-loaded.md b/1_python/5_manage-models/list-loaded.md
@@ -23,6 +23,8 @@ This will give you results equivalent to using [`lms ps`](../../cli/ps) in the C
         llm_only = lms.list_loaded_models("llm")
         embedding_only = lms.list_loaded_models("embedding")
 
+        print(all_loaded_models)
+
     Python (scoped resource API):
       language: python
       code: |
@@ -33,4 +35,6 @@ This will give you results equivalent to using [`lms ps`](../../cli/ps) in the C
             llm_only = client.llm.list_loaded()
             embedding_only = client.embedding.list_loaded()
 
+            print(all_loaded_models)
+
 ```
diff --git a/1_python/5_manage-models/loading.md b/1_python/5_manage-models/loading.md
@@ -86,7 +86,7 @@ This allows you to have multiple instances of the same or different models loade
       code: |
         import lmstudio as lms
 
-        client = lms.get_default_client
+        client = lms.get_default_client()
         llama = client.llm.load_new_instance("llama-3.2-1b-instruct")
         another_llama = client.llm.load_new_instance("llama-3.2-1b-instruct", "second-llama")
 

diff --git a/1_python/6_model-info/get-context-length.md b/1_python/6_model-info/get-context-length.md
@@ -54,6 +54,6 @@ You can determine if a given conversation fits into a model's context by doing t
             ]
         })
 
-        print("Fits", does_chat_fit_in_context(model, chat))
+        print("Fits in context:", does_chat_fit_in_context(model, chat))
 
 ```
-Original file line number
+Diff line change
@@ Expand Up @@
                 ]
             })
-            print("Fits", does_chat_fit_in_context(model, chat))
+            print("Fits in context:", does_chat_fit_in_context(model, chat))
     ```