Skip to content

Commit

Permalink
Merge pull request #88 from ajcwebdev/llm-cost
Browse files Browse the repository at this point in the history
Calculate and Log Total Token Cost Based on LLM Model
  • Loading branch information
ajcwebdev authored Jan 1, 2025
2 parents 6c459ad + 060f725 commit 623565d
Show file tree
Hide file tree
Showing 26 changed files with 1,370 additions and 271 deletions.
10 changes: 9 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,21 @@
"bench-medium": "tsx --test test/bench/medium.test.ts",
"bench-large": "tsx --test test/bench/large.test.ts",
"bench-turbo": "tsx --test test/bench/turbo.test.ts",
"test-models-chatgpt": "tsx --test test/models/chatgpt.test.ts",
"test-models-claude": "tsx --test test/models/claude.test.ts",
"test-models-cohere": "tsx --test test/models/cohere.test.ts",
"test-models-gemini": "tsx --test test/models/gemini.test.ts",
"test-models-mistral": "tsx --test test/models/mistral.test.ts",
"test-models-fireworks": "tsx --test test/models/fireworks.test.ts",
"test-models-together": "tsx --test test/models/together.test.ts",
"test-models-groq": "tsx --test test/models/groq.test.ts",
"test-local": "tsx --test test/local.test.ts",
"test-docker": "tsx --test test/docker.test.ts",
"test-services": "tsx --test test/services.test.ts",
"test-all": "tsx --test test/all.test.ts",
"ta": "tsx --test test/all.test.ts",
"clean": "tsx scripts/cleanContent.ts",
"docker-cli": "docker run --rm -v $PWD/content:/usr/src/app/content autoshow",
"docker-cli": "docker run --rm --env-file .env -v $PWD/content:/usr/src/app/content autoshow",
"docker-serve": "docker run -d -p 3000:3000 -v $PWD/content:/usr/src/app/content autoshow serve",
"prune": "docker system prune -af --volumes && docker image prune -af && docker container prune -f && docker volume prune -af",
"bun": "bun --env-file=.env --no-warnings src/cli/commander.ts",
Expand Down
29 changes: 15 additions & 14 deletions src/llms/chatgpt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { writeFile } from 'node:fs/promises'
import { env } from 'node:process'
import { OpenAI } from 'openai'
import { GPT_MODELS } from '../utils/globals'
import { l, err } from '../utils/logging'
import { err, logAPIResults } from '../utils/logging'
import type { LLMFunction, ChatGPTModelType } from '../types/llms'

/**
Expand Down Expand Up @@ -35,8 +35,8 @@ export const callChatGPT: LLMFunction = async (
// Call the OpenAI chat completions API
const response = await openai.chat.completions.create({
model: actualModel,
max_tokens: 4000, // Maximum number of tokens in the response
messages: [{ role: 'user', content: promptAndTranscript }], // The input message (transcript content)
max_completion_tokens: 4000,
messages: [{ role: 'user', content: promptAndTranscript }],
})

// Check if we have a valid response
Expand All @@ -45,20 +45,21 @@ export const callChatGPT: LLMFunction = async (
throw new Error('No valid response received from the API')
}

// Get the content and other details safely
const content = firstChoice.message.content
const finish_reason = firstChoice.finish_reason ?? 'unknown'
const usedModel = response.model
const usage = response.usage
const { prompt_tokens, completion_tokens, total_tokens } = usage ?? {}

// Write the generated content to the output file
await writeFile(tempPath, content)
await writeFile(tempPath, firstChoice.message.content)

l.wait(` - Finish Reason: ${finish_reason}\n - ChatGPT Model: ${usedModel}`)
l.wait(` - Token Usage:\n - ${prompt_tokens} prompt tokens\n - ${completion_tokens} completion tokens\n - ${total_tokens} total tokens`)
// Log API results using the standardized logging function
logAPIResults({
modelName: actualModel,
stopReason: firstChoice.finish_reason ?? 'unknown',
tokenUsage: {
input: response.usage?.prompt_tokens,
output: response.usage?.completion_tokens,
total: response.usage?.total_tokens
}
})
} catch (error) {
err(`Error in callChatGPT: ${(error as Error).message}`)
throw error // Re-throw the error for handling in the calling function
throw error
}
}
26 changes: 12 additions & 14 deletions src/llms/claude.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { writeFile } from 'node:fs/promises'
import { env } from 'node:process'
import { Anthropic } from '@anthropic-ai/sdk'
import { CLAUDE_MODELS } from '../utils/globals'
import { l, err } from '../utils/logging'
import { err, logAPIResults } from '../utils/logging'
import type { LLMFunction, ClaudeModelType } from '../types/llms'

/**
Expand Down Expand Up @@ -39,18 +39,8 @@ export const callClaude: LLMFunction = async (
messages: [{ role: 'user', content: promptAndTranscript }] // The input message (transcript content)
})

// Destructure the response to get relevant information
const {
content,
model: usedModel, // The actual model used
usage, // Token usage information
stop_reason // Reason why the generation stopped
} = response

const { input_tokens, output_tokens } = usage

// Extract text content from the response
const textContent = extractTextContent(content)
const textContent = extractTextContent(response.content)

// Write the generated text to the output file
if (textContent) {
Expand All @@ -59,8 +49,16 @@ export const callClaude: LLMFunction = async (
throw new Error('No text content generated from the API')
}

l.wait(` - Stop Reason: ${stop_reason}\n - Model: ${usedModel}`)
l.wait(` - Token Usage:\n - ${input_tokens} input tokens\n - ${output_tokens} output tokens`)
// Log API results using the standardized logging function
logAPIResults({
modelName: actualModel,
stopReason: response.stop_reason ?? 'unknown',
tokenUsage: {
input: response.usage.input_tokens,
output: response.usage.output_tokens,
total: response.usage.input_tokens + response.usage.output_tokens
}
})
} catch (error) {
err(`Error in callClaude: ${(error as Error).message}`)
throw error // Re-throw the error for handling in the calling function
Expand Down
15 changes: 11 additions & 4 deletions src/llms/cohere.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { writeFile } from 'node:fs/promises'
import { env } from 'node:process'
import { CohereClient } from 'cohere-ai'
import { COHERE_MODELS } from '../utils/globals'
import { l, err } from '../utils/logging'
import { err, logAPIResults } from '../utils/logging'
import type { LLMFunction, CohereModelType } from '../types/llms'

/**
Expand Down Expand Up @@ -35,7 +35,6 @@ export const callCohere: LLMFunction = async (
// Call the Cohere chat API
const response = await cohere.chat({
model: actualModel,
// max_tokens: ?, // Cohere doesn't seem to have a max_tokens parameter for chat
message: promptAndTranscript // The input message (prompt and transcript content)
})

Expand All @@ -51,8 +50,16 @@ export const callCohere: LLMFunction = async (
// Write the generated text to the output file
await writeFile(tempPath, text)

l.wait(`\n Finish Reason: ${finishReason}\n Model: ${actualModel}`)
l.wait(` Token Usage:\n - ${inputTokens} input tokens\n - ${outputTokens} output tokens`)
// Log API results using the standardized logging function
logAPIResults({
modelName: actualModel,
stopReason: finishReason ?? 'unknown',
tokenUsage: {
input: inputTokens,
output: outputTokens,
total: inputTokens && outputTokens ? inputTokens + outputTokens : undefined
}
})
} catch (error) {
err(`Error in callCohere: ${(error as Error).message}`)
throw error // Re-throw the error for handling in the calling function
Expand Down
29 changes: 17 additions & 12 deletions src/llms/fireworks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import { writeFile } from 'node:fs/promises'
import { env } from 'node:process'
import { FIREWORKS_MODELS } from '../utils/globals'
import { l, err } from '../utils/logging'
import { err, logAPIResults } from '../utils/logging'
import type { LLMFunction, FireworksModelType, FireworksResponse } from '../types/llms'

/**
Expand All @@ -17,19 +17,22 @@ import type { LLMFunction, FireworksModelType, FireworksResponse } from '../type
export const callFireworks: LLMFunction = async (
promptAndTranscript: string,
tempPath: string,
model: string = 'LLAMA_3_2_3B'
model: string | FireworksModelType = 'LLAMA_3_2_3B'
): Promise<void> => {
// Check if the FIREWORKS_API_KEY environment variable is set
if (!env['FIREWORKS_API_KEY']) {
throw new Error('FIREWORKS_API_KEY environment variable is not set. Please set it to your Fireworks API key.')
}

try {
const actualModel = (FIREWORKS_MODELS[model as FireworksModelType] || FIREWORKS_MODELS.LLAMA_3_2_3B).modelId
// Get the model configuration and ID, defaulting to LLAMA_3_2_3B if not found
const modelKey = typeof model === 'string' ? model : 'LLAMA_3_2_3B'
const modelConfig = FIREWORKS_MODELS[modelKey as FireworksModelType] || FIREWORKS_MODELS.LLAMA_3_2_3B
const modelId = modelConfig.modelId

// Prepare the request body
const requestBody = {
model: actualModel,
model: modelId,
messages: [
{
role: 'user',
Expand Down Expand Up @@ -58,22 +61,24 @@ export const callFireworks: LLMFunction = async (

// Extract the generated content
const content = data.choices[0]?.message?.content
const finishReason = data.choices[0]?.finish_reason
const usedModel = data.model
const usage = data.usage
const { prompt_tokens, completion_tokens, total_tokens } = usage

if (!content) {
throw new Error('No content generated from the Fireworks API')
}

// Write the generated content to the specified output file
await writeFile(tempPath, content)
l.wait(`\n Fireworks response saved to ${tempPath}`)

// Log finish reason, used model, and token usage
l.wait(`\n Finish Reason: ${finishReason}\n Model Used: ${usedModel}`)
l.wait(` Token Usage:\n - ${prompt_tokens} prompt tokens\n - ${completion_tokens} completion tokens\n - ${total_tokens} total tokens`)
// Log API results using the model key
logAPIResults({
modelName: modelKey,
stopReason: data.choices[0]?.finish_reason ?? 'unknown',
tokenUsage: {
input: data.usage.prompt_tokens,
output: data.usage.completion_tokens,
total: data.usage.total_tokens
}
})
} catch (error) {
// Log any errors that occur during the process
err(`Error in callFireworks: ${(error as Error).message}`)
Expand Down
18 changes: 16 additions & 2 deletions src/llms/gemini.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { writeFile } from 'node:fs/promises'
import { env } from 'node:process'
import { GoogleGenerativeAI } from "@google/generative-ai"
import { GEMINI_MODELS } from '../utils/globals'
import { l, err } from '../utils/logging'
import { err, logAPIResults } from '../utils/logging'
import type { LLMFunction, GeminiModelType } from '../types/llms'

/**
Expand Down Expand Up @@ -57,7 +57,21 @@ export const callGemini: LLMFunction = async (

// Write the generated text to the output file
await writeFile(tempPath, text)
l.wait(`\nModel: ${actualModel}`)

// Get token usage from the response metadata
const { usageMetadata } = response
const { promptTokenCount, candidatesTokenCount, totalTokenCount } = usageMetadata ?? {}

// Log API results using the standardized logging function
logAPIResults({
modelName: actualModel,
stopReason: 'complete',
tokenUsage: {
input: promptTokenCount,
output: candidatesTokenCount,
total: totalTokenCount
}
})

return
} catch (error) {
Expand Down
39 changes: 24 additions & 15 deletions src/llms/groq.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import { writeFile } from 'node:fs/promises'
import { env } from 'node:process'
import { GROQ_MODELS } from '../utils/globals'
import { l, err } from '../utils/logging'
import type { GroqChatCompletionResponse, GroqModelType } from '../types/llms'
import { err, logAPIResults } from '../utils/logging'
import type { LLMFunction, GroqModelType, GroqChatCompletionResponse } from '../types/llms'

// Define the Groq API URL
const GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions'
Expand All @@ -13,20 +13,27 @@ const GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions'
* Function to call the Groq chat completion API.
* @param {string} promptAndTranscript - The combined prompt and transcript text to process.
* @param {string} tempPath - The temporary file path to write the LLM output.
* @param {string} model - The model to use, e.g., 'MIXTRAL_8X7B_32768'.
* @param {string} model - The model to use, e.g., 'LLAMA_3_2_1B_PREVIEW'.
*/
export const callGroq = async (promptAndTranscript: string, tempPath: string, model: string = 'MIXTRAL_8X7B_32768'): Promise<void> => {
export const callGroq: LLMFunction = async (
promptAndTranscript: string,
tempPath: string,
model: string | GroqModelType = 'LLAMA_3_2_1B_PREVIEW'
): Promise<void> => {
// Ensure that the API key is set
if (!env['GROQ_API_KEY']) {
throw new Error('GROQ_API_KEY environment variable is not set. Please set it to your Groq API key.')
}

try {
const actualModel = (GROQ_MODELS[model as GroqModelType] || GROQ_MODELS.MIXTRAL_8X7B_32768).modelId
// Get the model configuration and ID, defaulting to LLAMA_3_2_1B_PREVIEW if not found
const modelKey = typeof model === 'string' ? model : 'LLAMA_3_2_1B_PREVIEW'
const modelConfig = GROQ_MODELS[modelKey as GroqModelType] || GROQ_MODELS.LLAMA_3_2_1B_PREVIEW
const modelId = modelConfig.modelId

// Prepare the request body
const requestBody = {
model: actualModel,
model: modelId,
messages: [
{
role: 'user',
Expand All @@ -53,25 +60,27 @@ export const callGroq = async (promptAndTranscript: string, tempPath: string, mo
}

// Parse the JSON response
const data = (await response.json()) as GroqChatCompletionResponse
const data = await response.json() as GroqChatCompletionResponse

// Extract the generated content
const content = data.choices[0]?.message?.content
const finishReason = data.choices[0]?.finish_reason
const usedModel = data.model
const usage = data.usage
const { prompt_tokens, completion_tokens, total_tokens } = usage ?? {}

if (!content) {
throw new Error('No content generated from the Groq API')
}

// Write the generated content to the specified output file
await writeFile(tempPath, content)

// Log finish reason, used model, and token usage
l.wait(`\n Finish Reason: ${finishReason}\n Model Used: ${usedModel}`)
l.wait(` Token Usage:\n - ${prompt_tokens} prompt tokens\n - ${completion_tokens} completion tokens\n - ${total_tokens} total tokens`)
// Log API results using the standardized logging function
logAPIResults({
modelName: modelKey,
stopReason: data.choices[0]?.finish_reason ?? 'unknown',
tokenUsage: {
input: data.usage?.prompt_tokens,
output: data.usage?.completion_tokens,
total: data.usage?.total_tokens
}
})
} catch (error) {
// Log any errors that occur during the process
err(`Error in callGroq: ${(error as Error).message}`)
Expand Down
19 changes: 11 additions & 8 deletions src/llms/mistral.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { writeFile } from 'node:fs/promises'
import { env } from 'node:process'
import { Mistral } from '@mistralai/mistralai'
import { MISTRAL_MODELS } from '../utils/globals'
import { l, err } from '../utils/logging'
import { err, logAPIResults } from '../utils/logging'
import type { LLMFunction, MistralModelType } from '../types/llms'

/**
Expand All @@ -31,12 +31,10 @@ export const callMistral: LLMFunction = async (
try {
// Select the actual model to use, defaulting to MISTRAL_NEMO if the specified model is not found
const actualModel = (MISTRAL_MODELS[model as MistralModelType] || MISTRAL_MODELS.MISTRAL_NEMO).modelId
l.wait(`\n Using Mistral model:\n - ${actualModel}`)

// Make API call to Mistral AI for chat completion
const response = await mistral.chat.complete({
model: actualModel,
// max_tokens: ?, // Uncomment and set if you want to limit the response length
messages: [{ role: 'user', content: promptAndTranscript }],
})

Expand All @@ -51,16 +49,21 @@ export const callMistral: LLMFunction = async (
}

const content = firstChoice.message.content
const finishReason = firstChoice.finishReason ?? 'unknown'
const usage = response.usage ?? { promptTokens: 0, completionTokens: 0, totalTokens: 0 }
const contentString = Array.isArray(content) ? content.join('') : content

// Write the generated content to the specified output file
await writeFile(tempPath, contentString)

// Log finish reason, used model, and token usage
l.wait(`\n Finish Reason: ${finishReason}\n Model Used: ${actualModel}`)
l.wait(` Token Usage:\n - ${usage.promptTokens} prompt tokens\n - ${usage.completionTokens} completion tokens\n - ${usage.totalTokens} total tokens`)
// Log API results using the standardized logging function
logAPIResults({
modelName: actualModel,
stopReason: firstChoice.finishReason ?? 'unknown',
tokenUsage: {
input: response.usage?.promptTokens,
output: response.usage?.completionTokens,
total: response.usage?.totalTokens
}
})

} catch (error) {
// Log any errors that occur during the process
Expand Down
Loading

0 comments on commit 623565d

Please sign in to comment.