Merge pull request #88 from ajcwebdev/llm-cost

Calculate and Log Total Token Cost Based on LLM Model
ajcwebdev · Jan 1, 2025 · 623565d · 623565d
2 parents 6c459ad + 060f725
commit 623565d
Show file tree

Hide file tree

Showing 26 changed files with 1,370 additions and 271 deletions.
diff --git a/package.json b/package.json
@@ -37,13 +37,21 @@
     "bench-medium": "tsx --test test/bench/medium.test.ts",
     "bench-large": "tsx --test test/bench/large.test.ts",
     "bench-turbo": "tsx --test test/bench/turbo.test.ts",
+    "test-models-chatgpt": "tsx --test test/models/chatgpt.test.ts",
+    "test-models-claude": "tsx --test test/models/claude.test.ts",
+    "test-models-cohere": "tsx --test test/models/cohere.test.ts",
+    "test-models-gemini": "tsx --test test/models/gemini.test.ts",
+    "test-models-mistral": "tsx --test test/models/mistral.test.ts",
+    "test-models-fireworks": "tsx --test test/models/fireworks.test.ts",
+    "test-models-together": "tsx --test test/models/together.test.ts",
+    "test-models-groq": "tsx --test test/models/groq.test.ts",
     "test-local": "tsx --test test/local.test.ts",
     "test-docker": "tsx --test test/docker.test.ts",
     "test-services": "tsx --test test/services.test.ts",
     "test-all": "tsx --test test/all.test.ts",
     "ta": "tsx --test test/all.test.ts",
     "clean": "tsx scripts/cleanContent.ts",
-    "docker-cli": "docker run --rm -v $PWD/content:/usr/src/app/content autoshow",
+    "docker-cli": "docker run --rm --env-file .env -v $PWD/content:/usr/src/app/content autoshow",
     "docker-serve": "docker run -d -p 3000:3000 -v $PWD/content:/usr/src/app/content autoshow serve",
     "prune": "docker system prune -af --volumes && docker image prune -af && docker container prune -f && docker volume prune -af",
     "bun": "bun --env-file=.env --no-warnings src/cli/commander.ts",

diff --git a/src/llms/chatgpt.ts b/src/llms/chatgpt.ts
@@ -4,7 +4,7 @@ import { writeFile } from 'node:fs/promises'
 import { env } from 'node:process'
 import { OpenAI } from 'openai'
 import { GPT_MODELS } from '../utils/globals'
-import { l, err } from '../utils/logging'
+import { err, logAPIResults } from '../utils/logging'
 import type { LLMFunction, ChatGPTModelType } from '../types/llms'
 
 /**
@@ -35,8 +35,8 @@ export const callChatGPT: LLMFunction = async (
     // Call the OpenAI chat completions API
     const response = await openai.chat.completions.create({
       model: actualModel,
-      max_tokens: 4000, // Maximum number of tokens in the response
-      messages: [{ role: 'user', content: promptAndTranscript }], // The input message (transcript content)
+      max_completion_tokens: 4000,
+      messages: [{ role: 'user', content: promptAndTranscript }],
     })
 
     // Check if we have a valid response
@@ -45,20 +45,21 @@ export const callChatGPT: LLMFunction = async (
       throw new Error('No valid response received from the API')
     }
 
-    // Get the content and other details safely
-    const content = firstChoice.message.content
-    const finish_reason = firstChoice.finish_reason ?? 'unknown'
-    const usedModel = response.model
-    const usage = response.usage
-    const { prompt_tokens, completion_tokens, total_tokens } = usage ?? {}
-
     // Write the generated content to the output file
-    await writeFile(tempPath, content)
+    await writeFile(tempPath, firstChoice.message.content)
 
-    l.wait(`  - Finish Reason: ${finish_reason}\n  - ChatGPT Model: ${usedModel}`)
-    l.wait(`  - Token Usage:\n    - ${prompt_tokens} prompt tokens\n    - ${completion_tokens} completion tokens\n    - ${total_tokens} total tokens`)
+    // Log API results using the standardized logging function
+    logAPIResults({
+      modelName: actualModel,
+      stopReason: firstChoice.finish_reason ?? 'unknown',
+      tokenUsage: {
+        input: response.usage?.prompt_tokens,
+        output: response.usage?.completion_tokens,
+        total: response.usage?.total_tokens
+      }
+    })
   } catch (error) {
     err(`Error in callChatGPT: ${(error as Error).message}`)
-    throw error // Re-throw the error for handling in the calling function
+    throw error
   }
 }
diff --git a/src/llms/claude.ts b/src/llms/claude.ts
@@ -4,7 +4,7 @@ import { writeFile } from 'node:fs/promises'
 import { env } from 'node:process'
 import { Anthropic } from '@anthropic-ai/sdk'
 import { CLAUDE_MODELS } from '../utils/globals'
-import { l, err } from '../utils/logging'
+import { err, logAPIResults } from '../utils/logging'
 import type { LLMFunction, ClaudeModelType } from '../types/llms'
 
 /**
@@ -39,18 +39,8 @@ export const callClaude: LLMFunction = async (
       messages: [{ role: 'user', content: promptAndTranscript }] // The input message (transcript content)
     })
 
-    // Destructure the response to get relevant information
-    const {
-      content,
-      model: usedModel, // The actual model used
-      usage, // Token usage information
-      stop_reason // Reason why the generation stopped
-    } = response
-
-    const { input_tokens, output_tokens } = usage
-
     // Extract text content from the response
-    const textContent = extractTextContent(content)
+    const textContent = extractTextContent(response.content)
 
     // Write the generated text to the output file
     if (textContent) {
@@ -59,8 +49,16 @@ export const callClaude: LLMFunction = async (
       throw new Error('No text content generated from the API')
     }
 
-    l.wait(`  - Stop Reason: ${stop_reason}\n  - Model: ${usedModel}`)
-    l.wait(`  - Token Usage:\n    - ${input_tokens} input tokens\n    - ${output_tokens} output tokens`)
+    // Log API results using the standardized logging function
+    logAPIResults({
+      modelName: actualModel,
+      stopReason: response.stop_reason ?? 'unknown',
+      tokenUsage: {
+        input: response.usage.input_tokens,
+        output: response.usage.output_tokens,
+        total: response.usage.input_tokens + response.usage.output_tokens
+      }
+    })
   } catch (error) {
     err(`Error in callClaude: ${(error as Error).message}`)
     throw error // Re-throw the error for handling in the calling function

diff --git a/src/llms/cohere.ts b/src/llms/cohere.ts
@@ -4,7 +4,7 @@ import { writeFile } from 'node:fs/promises'
 import { env } from 'node:process'
 import { CohereClient } from 'cohere-ai'
 import { COHERE_MODELS } from '../utils/globals'
-import { l, err } from '../utils/logging'
+import { err, logAPIResults } from '../utils/logging'
 import type { LLMFunction, CohereModelType } from '../types/llms'
 
 /**
@@ -35,7 +35,6 @@ export const callCohere: LLMFunction = async (
     // Call the Cohere chat API
     const response = await cohere.chat({
       model: actualModel,
-      // max_tokens: ?, // Cohere doesn't seem to have a max_tokens parameter for chat
       message: promptAndTranscript // The input message (prompt and transcript content)
     })
 
@@ -51,8 +50,16 @@ export const callCohere: LLMFunction = async (
     // Write the generated text to the output file
     await writeFile(tempPath, text)
 
-    l.wait(`\n  Finish Reason: ${finishReason}\n  Model: ${actualModel}`)
-    l.wait(`  Token Usage:\n    - ${inputTokens} input tokens\n    - ${outputTokens} output tokens`)
+    // Log API results using the standardized logging function
+    logAPIResults({
+      modelName: actualModel,
+      stopReason: finishReason ?? 'unknown',
+      tokenUsage: {
+        input: inputTokens,
+        output: outputTokens,
+        total: inputTokens && outputTokens ? inputTokens + outputTokens : undefined
+      }
+    })
   } catch (error) {
     err(`Error in callCohere: ${(error as Error).message}`)
     throw error // Re-throw the error for handling in the calling function

diff --git a/src/llms/fireworks.ts b/src/llms/fireworks.ts
@@ -3,7 +3,7 @@
 import { writeFile } from 'node:fs/promises'
 import { env } from 'node:process'
 import { FIREWORKS_MODELS } from '../utils/globals'
-import { l, err } from '../utils/logging'
+import { err, logAPIResults } from '../utils/logging'
 import type { LLMFunction, FireworksModelType, FireworksResponse } from '../types/llms'
 
 /**
@@ -17,19 +17,22 @@ import type { LLMFunction, FireworksModelType, FireworksResponse } from '../type
 export const callFireworks: LLMFunction = async (
   promptAndTranscript: string,
   tempPath: string,
-  model: string = 'LLAMA_3_2_3B'
+  model: string | FireworksModelType = 'LLAMA_3_2_3B'
 ): Promise<void> => {
   // Check if the FIREWORKS_API_KEY environment variable is set
   if (!env['FIREWORKS_API_KEY']) {
     throw new Error('FIREWORKS_API_KEY environment variable is not set. Please set it to your Fireworks API key.')
   }
 
   try {
-    const actualModel = (FIREWORKS_MODELS[model as FireworksModelType] || FIREWORKS_MODELS.LLAMA_3_2_3B).modelId
+    // Get the model configuration and ID, defaulting to LLAMA_3_2_3B if not found
+    const modelKey = typeof model === 'string' ? model : 'LLAMA_3_2_3B'
+    const modelConfig = FIREWORKS_MODELS[modelKey as FireworksModelType] || FIREWORKS_MODELS.LLAMA_3_2_3B
+    const modelId = modelConfig.modelId
 
     // Prepare the request body
     const requestBody = {
-      model: actualModel,
+      model: modelId,
       messages: [
         {
           role: 'user',
@@ -58,22 +61,24 @@ export const callFireworks: LLMFunction = async (
 
     // Extract the generated content
     const content = data.choices[0]?.message?.content
-    const finishReason = data.choices[0]?.finish_reason
-    const usedModel = data.model
-    const usage = data.usage
-    const { prompt_tokens, completion_tokens, total_tokens } = usage
 
     if (!content) {
       throw new Error('No content generated from the Fireworks API')
     }
 
     // Write the generated content to the specified output file
     await writeFile(tempPath, content)
-    l.wait(`\n  Fireworks response saved to ${tempPath}`)
 
-    // Log finish reason, used model, and token usage
-    l.wait(`\n  Finish Reason: ${finishReason}\n  Model Used: ${usedModel}`)
-    l.wait(`  Token Usage:\n    - ${prompt_tokens} prompt tokens\n    - ${completion_tokens} completion tokens\n    - ${total_tokens} total tokens`)
+    // Log API results using the model key
+    logAPIResults({
+      modelName: modelKey,
+      stopReason: data.choices[0]?.finish_reason ?? 'unknown',
+      tokenUsage: {
+        input: data.usage.prompt_tokens,
+        output: data.usage.completion_tokens,
+        total: data.usage.total_tokens
+      }
+    })
   } catch (error) {
     // Log any errors that occur during the process
     err(`Error in callFireworks: ${(error as Error).message}`)

diff --git a/src/llms/gemini.ts b/src/llms/gemini.ts
@@ -4,7 +4,7 @@ import { writeFile } from 'node:fs/promises'
 import { env } from 'node:process'
 import { GoogleGenerativeAI } from "@google/generative-ai"
 import { GEMINI_MODELS } from '../utils/globals'
-import { l, err } from '../utils/logging'
+import { err, logAPIResults } from '../utils/logging'
 import type { LLMFunction, GeminiModelType } from '../types/llms'
 
 /**
@@ -57,7 +57,21 @@ export const callGemini: LLMFunction = async (
 
       // Write the generated text to the output file
       await writeFile(tempPath, text)
-      l.wait(`\nModel: ${actualModel}`)
+
+      // Get token usage from the response metadata
+      const { usageMetadata } = response
+      const { promptTokenCount, candidatesTokenCount, totalTokenCount } = usageMetadata ?? {}
+
+      // Log API results using the standardized logging function
+      logAPIResults({
+        modelName: actualModel,
+        stopReason: 'complete',
+        tokenUsage: {
+          input: promptTokenCount,
+          output: candidatesTokenCount,
+          total: totalTokenCount
+        }
+      })
 
       return
     } catch (error) {

diff --git a/src/llms/groq.ts b/src/llms/groq.ts
@@ -3,8 +3,8 @@
 import { writeFile } from 'node:fs/promises'
 import { env } from 'node:process'
 import { GROQ_MODELS } from '../utils/globals'
-import { l, err } from '../utils/logging'
-import type { GroqChatCompletionResponse, GroqModelType } from '../types/llms'
+import { err, logAPIResults } from '../utils/logging'
+import type { LLMFunction, GroqModelType, GroqChatCompletionResponse } from '../types/llms'
 
 // Define the Groq API URL
 const GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions'
@@ -13,20 +13,27 @@ const GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions'
  * Function to call the Groq chat completion API.
  * @param {string} promptAndTranscript - The combined prompt and transcript text to process.
  * @param {string} tempPath - The temporary file path to write the LLM output.
- * @param {string} model - The model to use, e.g., 'MIXTRAL_8X7B_32768'.
+ * @param {string} model - The model to use, e.g., 'LLAMA_3_2_1B_PREVIEW'.
  */
-export const callGroq = async (promptAndTranscript: string, tempPath: string, model: string = 'MIXTRAL_8X7B_32768'): Promise<void> => {
+export const callGroq: LLMFunction = async (
+  promptAndTranscript: string,
+  tempPath: string,
+  model: string | GroqModelType = 'LLAMA_3_2_1B_PREVIEW'
+): Promise<void> => {
   // Ensure that the API key is set
   if (!env['GROQ_API_KEY']) {
     throw new Error('GROQ_API_KEY environment variable is not set. Please set it to your Groq API key.')
   }
 
   try {
-    const actualModel = (GROQ_MODELS[model as GroqModelType] || GROQ_MODELS.MIXTRAL_8X7B_32768).modelId
+    // Get the model configuration and ID, defaulting to LLAMA_3_2_1B_PREVIEW if not found
+    const modelKey = typeof model === 'string' ? model : 'LLAMA_3_2_1B_PREVIEW'
+    const modelConfig = GROQ_MODELS[modelKey as GroqModelType] || GROQ_MODELS.LLAMA_3_2_1B_PREVIEW
+    const modelId = modelConfig.modelId
 
     // Prepare the request body
     const requestBody = {
-      model: actualModel,
+      model: modelId,
       messages: [
         {
           role: 'user',
@@ -53,25 +60,27 @@ export const callGroq = async (promptAndTranscript: string, tempPath: string, mo
     }
 
     // Parse the JSON response
-    const data = (await response.json()) as GroqChatCompletionResponse
+    const data = await response.json() as GroqChatCompletionResponse
 
     // Extract the generated content
     const content = data.choices[0]?.message?.content
-    const finishReason = data.choices[0]?.finish_reason
-    const usedModel = data.model
-    const usage = data.usage
-    const { prompt_tokens, completion_tokens, total_tokens } = usage ?? {}
-
     if (!content) {
       throw new Error('No content generated from the Groq API')
     }
 
     // Write the generated content to the specified output file
     await writeFile(tempPath, content)
 
-    // Log finish reason, used model, and token usage
-    l.wait(`\n  Finish Reason: ${finishReason}\n  Model Used: ${usedModel}`)
-    l.wait(`  Token Usage:\n    - ${prompt_tokens} prompt tokens\n    - ${completion_tokens} completion tokens\n    - ${total_tokens} total tokens`)
+    // Log API results using the standardized logging function
+    logAPIResults({
+      modelName: modelKey,
+      stopReason: data.choices[0]?.finish_reason ?? 'unknown',
+      tokenUsage: {
+        input: data.usage?.prompt_tokens,
+        output: data.usage?.completion_tokens,
+        total: data.usage?.total_tokens
+      }
+    })
   } catch (error) {
     // Log any errors that occur during the process
     err(`Error in callGroq: ${(error as Error).message}`)

diff --git a/src/llms/mistral.ts b/src/llms/mistral.ts
@@ -4,7 +4,7 @@ import { writeFile } from 'node:fs/promises'
 import { env } from 'node:process'
 import { Mistral } from '@mistralai/mistralai'
 import { MISTRAL_MODELS } from '../utils/globals'
-import { l, err } from '../utils/logging'
+import { err, logAPIResults } from '../utils/logging'
 import type { LLMFunction, MistralModelType } from '../types/llms'
 
 /**
@@ -31,12 +31,10 @@ export const callMistral: LLMFunction = async (
   try {
     // Select the actual model to use, defaulting to MISTRAL_NEMO if the specified model is not found
     const actualModel = (MISTRAL_MODELS[model as MistralModelType] || MISTRAL_MODELS.MISTRAL_NEMO).modelId
-    l.wait(`\n  Using Mistral model:\n    - ${actualModel}`)
 
     // Make API call to Mistral AI for chat completion
     const response = await mistral.chat.complete({
       model: actualModel,
-      // max_tokens: ?,  // Uncomment and set if you want to limit the response length
       messages: [{ role: 'user', content: promptAndTranscript }],
     })
 
@@ -51,16 +49,21 @@ export const callMistral: LLMFunction = async (
     }
 
     const content = firstChoice.message.content
-    const finishReason = firstChoice.finishReason ?? 'unknown'
-    const usage = response.usage ?? { promptTokens: 0, completionTokens: 0, totalTokens: 0 }
     const contentString = Array.isArray(content) ? content.join('') : content
 
     // Write the generated content to the specified output file
     await writeFile(tempPath, contentString)
 
-    // Log finish reason, used model, and token usage
-    l.wait(`\n  Finish Reason: ${finishReason}\n  Model Used: ${actualModel}`)
-    l.wait(`  Token Usage:\n    - ${usage.promptTokens} prompt tokens\n    - ${usage.completionTokens} completion tokens\n    - ${usage.totalTokens} total tokens`)
+    // Log API results using the standardized logging function
+    logAPIResults({
+      modelName: actualModel,
+      stopReason: firstChoice.finishReason ?? 'unknown',
+      tokenUsage: {
+        input: response.usage?.promptTokens,
+        output: response.usage?.completionTokens,
+        total: response.usage?.totalTokens
+      }
+    })
 
   } catch (error) {
     // Log any errors that occur during the process