microsoft · pelikhan · Jan 27, 2025 · Jan 27, 2025 · Jan 27, 2025 · Jan 27, 2025
diff --git a/docs/public/schemas/config.json b/docs/public/schemas/config.json
@@ -12,13 +12,35 @@
             "description": "List of files to include in the project",
             "type": "array",
             "items": {
-                "type": "string"
+                "type": "string",
+                "description": "Path to a file or a glob pattern to include in the project"
             }
         },
+        "modelEncodings": {
+            "type": "object",
+            "patternProperties": {
+                "^[a-zA-Z0-9_:]+$": {
+                    "type": "string",
+                    "description": "Encoding identifier",
+                    "enum": [
+                        "o1",
+                        "gpt-4o",
+                        "gpt-3.5-turbo",
+                        "text-davinci-003",
+                        "o200k_base",
+                        "cl100k_base",
+                        "p50k_base",
+                        "r50k_base"
+                    ]
+                }
+            },
+            "additionalProperties": true,
+            "description": "Equivalent encoders for model identifiers"
+        },
         "modelAliases": {
             "type": "object",
             "patternProperties": {
-                "^[a-zA-Z0-9_]+$": {
+                "^[a-zA-Z0-9_:]+$": {
                     "oneOf": [
                         {
                             "type": "string",

diff --git a/docs/src/content/docs/reference/configuration-files.mdx b/docs/src/content/docs/reference/configuration-files.mdx
@@ -13,20 +13,21 @@
 ```json title="genaiscript.config.json"
 {
    "$schema": "https://microsoft.github.io/genaiscript/schemas/config.json"
 }
 ```
 
-
 ## File resolution
 
 GenAIScript will scan for the following configuration files
 and merge their content into the final configuration.
 
--   `~/genaiscript.config.yaml`
--   `~/genaiscript.config.json`
--   `./genaiscript.config.yaml`
--   `./genaiscript.config.json`
+- `~/genaiscript.config.yaml`
+- `~/genaiscript.config.json`
+- `./genaiscript.config.yaml`
+- `./genaiscript.config.json`
+
+The JSON files support the [JSON5](https://json5.org/) format (including comments, trailing commas, etc...).
 
 ## Schema
 
 The configuration schema is at https://microsoft.github.io/genaiscript/schemas/config.json .
@@ -46,3 +47,31 @@
 include:
     - "globalpath/*.genai.mjs"
 ```
+
+## `modelAliases` property
+
+The `modelAliases` property allows you to provide aliases for model names.
+
+```js
+{
+    "modelAliases": {
+        "llama32": "ollama:llama3.2:1b",
+        "llama32hot": {
+            "model": "ollama:llama3.2:1b",
+            "temperature": 2
+        }
+    }
+}
+```
+
+## `modelEncodings` property
+
+The `modelEncodings` property allows you to provide the encoding for the model.
+
+```js
+{
+    "modelEncodings": {
+        "azure:gpt__4o_random_name": "gpt-4o"
+    }
+}
+```
diff --git a/packages/cli/src/nodehost.ts b/packages/cli/src/nodehost.ts
@@ -38,7 +38,7 @@ import {
     ModelConfiguration,
 } from "../../core/src/host"
 import { TraceOptions } from "../../core/src/trace"
-import { logError, logVerbose } from "../../core/src/util"
+import { assert, logError, logVerbose } from "../../core/src/util"
 import { parseModelIdentifier } from "../../core/src/models"
 import { LanguageModel } from "../../core/src/chat"
 import { errorMessage, NotSupportedError } from "../../core/src/error"
@@ -92,6 +92,7 @@ export class NodeHost implements RuntimeHost {
         script: {},
         config: {},
     }
+    private _config: HostConfiguration
     readonly userInputQueue = new PLimitPromiseQueue(1)
     readonly azureToken: AzureTokenResolver
     readonly azureServerlessToken: AzureTokenResolver
@@ -195,7 +196,12 @@ export class NodeHost implements RuntimeHost {
             if (res.error) throw res.error
         }
         await parseDefaultsFromEnv(process.env)
-        return config
+        return (this._config = config)
+    }
+
+    get config() {
+        assert(!!this._config, "Host configuration not loaded")
+        return this._config
     }
 
     static async install(dotEnvPath?: string) {

diff --git a/packages/core/src/config.ts b/packages/core/src/config.ts
@@ -59,6 +59,10 @@ export async function resolveGlobalConfiguration(
                         config?.modelAliases || {},
                         parsed?.modelAliases || {}
                     ),
+                    modelEncodings: structuralMerge(
+                        config?.modelEncodings || {},
+                        parsed?.modelEncodings || {}
+                    ),
                 })
             }
         }

diff --git a/packages/core/src/encoders.ts b/packages/core/src/encoders.ts
@@ -22,6 +22,9 @@ export async function resolveTokenEncoder(
     const { model } = parseModelIdentifier(modelId)
     const module = model.toLowerCase() // Assign model to module for dynamic import path
 
+    const { modelEncodings } = runtimeHost?.config || {}
+    const encoding = modelEncodings?.[modelId] || module
+
     const encoderOptions = {
         disallowedSpecial: new Set<string>(),
     } satisfies EncodeOptions
@@ -31,7 +34,7 @@ export async function resolveTokenEncoder(
             encode,
             decode,
             default: api,
-        } = await import(`gpt-tokenizer/model/${module}`)
+        } = await import(`gpt-tokenizer/model/${encoding}`)
         assert(!!encode)
         const { modelName } = api
         const size =

diff --git a/packages/core/src/host.ts b/packages/core/src/host.ts
@@ -144,7 +144,18 @@ export interface RuntimeHost extends Host {
         value: string | Omit<ModelConfiguration, "source">
     ): void
 
+    /**
+     * Reloads the configuration
+     */
     readConfig(): Promise<HostConfiguration>
+    /**
+     * Gets the current loaded configuration
+     */
+    get config(): HostConfiguration
+    /**
+     * Reads a secret
+     * @param name
+     */
     readSecret(name: string): Promise<string | undefined>
     // executes a process
     exec(

diff --git a/packages/core/src/hostconfiguration.ts b/packages/core/src/hostconfiguration.ts
@@ -18,4 +18,9 @@ export interface HostConfiguration {
      * Configures a list of known aliases. Overriden by environment variables and CLI arguments
      */
     modelAliases?: Record<string, string | ModelConfiguration>
+
+    /**
+     * Model identifier to encoding mapping
+     */
+    modelEncodings?: Record<string, string>
 }
diff --git a/packages/core/src/testhost.ts b/packages/core/src/testhost.ts
@@ -94,6 +94,10 @@ export class TestHost implements RuntimeHost {
         return {}
     }
 
+    get config() {
+        return {}
+    }
+
     contentSafety(
         id?: "azure",
         options?: TraceOptions

diff --git a/packages/sample/genaiscript.config.json b/packages/sample/genaiscript.config.json
@@ -7,5 +7,8 @@
             "model": "ollama:llama3.2:1b",
             "temperature": 2
         }
+    },
+    "modelEncodings": {
+        "azure:gpt__4o_random_name": "gpt-4o"
     }
 }