Skip to content

Commit 060f725

Browse files
committed
calculate and log cost based on llm model
1 parent 6c459ad commit 060f725

26 files changed

+1370
-271
lines changed

package.json

+9-1
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,21 @@
3737
"bench-medium": "tsx --test test/bench/medium.test.ts",
3838
"bench-large": "tsx --test test/bench/large.test.ts",
3939
"bench-turbo": "tsx --test test/bench/turbo.test.ts",
40+
"test-models-chatgpt": "tsx --test test/models/chatgpt.test.ts",
41+
"test-models-claude": "tsx --test test/models/claude.test.ts",
42+
"test-models-cohere": "tsx --test test/models/cohere.test.ts",
43+
"test-models-gemini": "tsx --test test/models/gemini.test.ts",
44+
"test-models-mistral": "tsx --test test/models/mistral.test.ts",
45+
"test-models-fireworks": "tsx --test test/models/fireworks.test.ts",
46+
"test-models-together": "tsx --test test/models/together.test.ts",
47+
"test-models-groq": "tsx --test test/models/groq.test.ts",
4048
"test-local": "tsx --test test/local.test.ts",
4149
"test-docker": "tsx --test test/docker.test.ts",
4250
"test-services": "tsx --test test/services.test.ts",
4351
"test-all": "tsx --test test/all.test.ts",
4452
"ta": "tsx --test test/all.test.ts",
4553
"clean": "tsx scripts/cleanContent.ts",
46-
"docker-cli": "docker run --rm -v $PWD/content:/usr/src/app/content autoshow",
54+
"docker-cli": "docker run --rm --env-file .env -v $PWD/content:/usr/src/app/content autoshow",
4755
"docker-serve": "docker run -d -p 3000:3000 -v $PWD/content:/usr/src/app/content autoshow serve",
4856
"prune": "docker system prune -af --volumes && docker image prune -af && docker container prune -f && docker volume prune -af",
4957
"bun": "bun --env-file=.env --no-warnings src/cli/commander.ts",

src/llms/chatgpt.ts

+15-14
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { writeFile } from 'node:fs/promises'
44
import { env } from 'node:process'
55
import { OpenAI } from 'openai'
66
import { GPT_MODELS } from '../utils/globals'
7-
import { l, err } from '../utils/logging'
7+
import { err, logAPIResults } from '../utils/logging'
88
import type { LLMFunction, ChatGPTModelType } from '../types/llms'
99

1010
/**
@@ -35,8 +35,8 @@ export const callChatGPT: LLMFunction = async (
3535
// Call the OpenAI chat completions API
3636
const response = await openai.chat.completions.create({
3737
model: actualModel,
38-
max_tokens: 4000, // Maximum number of tokens in the response
39-
messages: [{ role: 'user', content: promptAndTranscript }], // The input message (transcript content)
38+
max_completion_tokens: 4000,
39+
messages: [{ role: 'user', content: promptAndTranscript }],
4040
})
4141

4242
// Check if we have a valid response
@@ -45,20 +45,21 @@ export const callChatGPT: LLMFunction = async (
4545
throw new Error('No valid response received from the API')
4646
}
4747

48-
// Get the content and other details safely
49-
const content = firstChoice.message.content
50-
const finish_reason = firstChoice.finish_reason ?? 'unknown'
51-
const usedModel = response.model
52-
const usage = response.usage
53-
const { prompt_tokens, completion_tokens, total_tokens } = usage ?? {}
54-
5548
// Write the generated content to the output file
56-
await writeFile(tempPath, content)
49+
await writeFile(tempPath, firstChoice.message.content)
5750

58-
l.wait(` - Finish Reason: ${finish_reason}\n - ChatGPT Model: ${usedModel}`)
59-
l.wait(` - Token Usage:\n - ${prompt_tokens} prompt tokens\n - ${completion_tokens} completion tokens\n - ${total_tokens} total tokens`)
51+
// Log API results using the standardized logging function
52+
logAPIResults({
53+
modelName: actualModel,
54+
stopReason: firstChoice.finish_reason ?? 'unknown',
55+
tokenUsage: {
56+
input: response.usage?.prompt_tokens,
57+
output: response.usage?.completion_tokens,
58+
total: response.usage?.total_tokens
59+
}
60+
})
6061
} catch (error) {
6162
err(`Error in callChatGPT: ${(error as Error).message}`)
62-
throw error // Re-throw the error for handling in the calling function
63+
throw error
6364
}
6465
}

src/llms/claude.ts

+12-14
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { writeFile } from 'node:fs/promises'
44
import { env } from 'node:process'
55
import { Anthropic } from '@anthropic-ai/sdk'
66
import { CLAUDE_MODELS } from '../utils/globals'
7-
import { l, err } from '../utils/logging'
7+
import { err, logAPIResults } from '../utils/logging'
88
import type { LLMFunction, ClaudeModelType } from '../types/llms'
99

1010
/**
@@ -39,18 +39,8 @@ export const callClaude: LLMFunction = async (
3939
messages: [{ role: 'user', content: promptAndTranscript }] // The input message (transcript content)
4040
})
4141

42-
// Destructure the response to get relevant information
43-
const {
44-
content,
45-
model: usedModel, // The actual model used
46-
usage, // Token usage information
47-
stop_reason // Reason why the generation stopped
48-
} = response
49-
50-
const { input_tokens, output_tokens } = usage
51-
5242
// Extract text content from the response
53-
const textContent = extractTextContent(content)
43+
const textContent = extractTextContent(response.content)
5444

5545
// Write the generated text to the output file
5646
if (textContent) {
@@ -59,8 +49,16 @@ export const callClaude: LLMFunction = async (
5949
throw new Error('No text content generated from the API')
6050
}
6151

62-
l.wait(` - Stop Reason: ${stop_reason}\n - Model: ${usedModel}`)
63-
l.wait(` - Token Usage:\n - ${input_tokens} input tokens\n - ${output_tokens} output tokens`)
52+
// Log API results using the standardized logging function
53+
logAPIResults({
54+
modelName: actualModel,
55+
stopReason: response.stop_reason ?? 'unknown',
56+
tokenUsage: {
57+
input: response.usage.input_tokens,
58+
output: response.usage.output_tokens,
59+
total: response.usage.input_tokens + response.usage.output_tokens
60+
}
61+
})
6462
} catch (error) {
6563
err(`Error in callClaude: ${(error as Error).message}`)
6664
throw error // Re-throw the error for handling in the calling function

src/llms/cohere.ts

+11-4
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { writeFile } from 'node:fs/promises'
44
import { env } from 'node:process'
55
import { CohereClient } from 'cohere-ai'
66
import { COHERE_MODELS } from '../utils/globals'
7-
import { l, err } from '../utils/logging'
7+
import { err, logAPIResults } from '../utils/logging'
88
import type { LLMFunction, CohereModelType } from '../types/llms'
99

1010
/**
@@ -35,7 +35,6 @@ export const callCohere: LLMFunction = async (
3535
// Call the Cohere chat API
3636
const response = await cohere.chat({
3737
model: actualModel,
38-
// max_tokens: ?, // Cohere doesn't seem to have a max_tokens parameter for chat
3938
message: promptAndTranscript // The input message (prompt and transcript content)
4039
})
4140

@@ -51,8 +50,16 @@ export const callCohere: LLMFunction = async (
5150
// Write the generated text to the output file
5251
await writeFile(tempPath, text)
5352

54-
l.wait(`\n Finish Reason: ${finishReason}\n Model: ${actualModel}`)
55-
l.wait(` Token Usage:\n - ${inputTokens} input tokens\n - ${outputTokens} output tokens`)
53+
// Log API results using the standardized logging function
54+
logAPIResults({
55+
modelName: actualModel,
56+
stopReason: finishReason ?? 'unknown',
57+
tokenUsage: {
58+
input: inputTokens,
59+
output: outputTokens,
60+
total: inputTokens && outputTokens ? inputTokens + outputTokens : undefined
61+
}
62+
})
5663
} catch (error) {
5764
err(`Error in callCohere: ${(error as Error).message}`)
5865
throw error // Re-throw the error for handling in the calling function

src/llms/fireworks.ts

+17-12
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import { writeFile } from 'node:fs/promises'
44
import { env } from 'node:process'
55
import { FIREWORKS_MODELS } from '../utils/globals'
6-
import { l, err } from '../utils/logging'
6+
import { err, logAPIResults } from '../utils/logging'
77
import type { LLMFunction, FireworksModelType, FireworksResponse } from '../types/llms'
88

99
/**
@@ -17,19 +17,22 @@ import type { LLMFunction, FireworksModelType, FireworksResponse } from '../type
1717
export const callFireworks: LLMFunction = async (
1818
promptAndTranscript: string,
1919
tempPath: string,
20-
model: string = 'LLAMA_3_2_3B'
20+
model: string | FireworksModelType = 'LLAMA_3_2_3B'
2121
): Promise<void> => {
2222
// Check if the FIREWORKS_API_KEY environment variable is set
2323
if (!env['FIREWORKS_API_KEY']) {
2424
throw new Error('FIREWORKS_API_KEY environment variable is not set. Please set it to your Fireworks API key.')
2525
}
2626

2727
try {
28-
const actualModel = (FIREWORKS_MODELS[model as FireworksModelType] || FIREWORKS_MODELS.LLAMA_3_2_3B).modelId
28+
// Get the model configuration and ID, defaulting to LLAMA_3_2_3B if not found
29+
const modelKey = typeof model === 'string' ? model : 'LLAMA_3_2_3B'
30+
const modelConfig = FIREWORKS_MODELS[modelKey as FireworksModelType] || FIREWORKS_MODELS.LLAMA_3_2_3B
31+
const modelId = modelConfig.modelId
2932

3033
// Prepare the request body
3134
const requestBody = {
32-
model: actualModel,
35+
model: modelId,
3336
messages: [
3437
{
3538
role: 'user',
@@ -58,22 +61,24 @@ export const callFireworks: LLMFunction = async (
5861

5962
// Extract the generated content
6063
const content = data.choices[0]?.message?.content
61-
const finishReason = data.choices[0]?.finish_reason
62-
const usedModel = data.model
63-
const usage = data.usage
64-
const { prompt_tokens, completion_tokens, total_tokens } = usage
6564

6665
if (!content) {
6766
throw new Error('No content generated from the Fireworks API')
6867
}
6968

7069
// Write the generated content to the specified output file
7170
await writeFile(tempPath, content)
72-
l.wait(`\n Fireworks response saved to ${tempPath}`)
7371

74-
// Log finish reason, used model, and token usage
75-
l.wait(`\n Finish Reason: ${finishReason}\n Model Used: ${usedModel}`)
76-
l.wait(` Token Usage:\n - ${prompt_tokens} prompt tokens\n - ${completion_tokens} completion tokens\n - ${total_tokens} total tokens`)
72+
// Log API results using the model key
73+
logAPIResults({
74+
modelName: modelKey,
75+
stopReason: data.choices[0]?.finish_reason ?? 'unknown',
76+
tokenUsage: {
77+
input: data.usage.prompt_tokens,
78+
output: data.usage.completion_tokens,
79+
total: data.usage.total_tokens
80+
}
81+
})
7782
} catch (error) {
7883
// Log any errors that occur during the process
7984
err(`Error in callFireworks: ${(error as Error).message}`)

src/llms/gemini.ts

+16-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { writeFile } from 'node:fs/promises'
44
import { env } from 'node:process'
55
import { GoogleGenerativeAI } from "@google/generative-ai"
66
import { GEMINI_MODELS } from '../utils/globals'
7-
import { l, err } from '../utils/logging'
7+
import { err, logAPIResults } from '../utils/logging'
88
import type { LLMFunction, GeminiModelType } from '../types/llms'
99

1010
/**
@@ -57,7 +57,21 @@ export const callGemini: LLMFunction = async (
5757

5858
// Write the generated text to the output file
5959
await writeFile(tempPath, text)
60-
l.wait(`\nModel: ${actualModel}`)
60+
61+
// Get token usage from the response metadata
62+
const { usageMetadata } = response
63+
const { promptTokenCount, candidatesTokenCount, totalTokenCount } = usageMetadata ?? {}
64+
65+
// Log API results using the standardized logging function
66+
logAPIResults({
67+
modelName: actualModel,
68+
stopReason: 'complete',
69+
tokenUsage: {
70+
input: promptTokenCount,
71+
output: candidatesTokenCount,
72+
total: totalTokenCount
73+
}
74+
})
6175

6276
return
6377
} catch (error) {

src/llms/groq.ts

+24-15
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
import { writeFile } from 'node:fs/promises'
44
import { env } from 'node:process'
55
import { GROQ_MODELS } from '../utils/globals'
6-
import { l, err } from '../utils/logging'
7-
import type { GroqChatCompletionResponse, GroqModelType } from '../types/llms'
6+
import { err, logAPIResults } from '../utils/logging'
7+
import type { LLMFunction, GroqModelType, GroqChatCompletionResponse } from '../types/llms'
88

99
// Define the Groq API URL
1010
const GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions'
@@ -13,20 +13,27 @@ const GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions'
1313
* Function to call the Groq chat completion API.
1414
* @param {string} promptAndTranscript - The combined prompt and transcript text to process.
1515
* @param {string} tempPath - The temporary file path to write the LLM output.
16-
* @param {string} model - The model to use, e.g., 'MIXTRAL_8X7B_32768'.
16+
* @param {string} model - The model to use, e.g., 'LLAMA_3_2_1B_PREVIEW'.
1717
*/
18-
export const callGroq = async (promptAndTranscript: string, tempPath: string, model: string = 'MIXTRAL_8X7B_32768'): Promise<void> => {
18+
export const callGroq: LLMFunction = async (
19+
promptAndTranscript: string,
20+
tempPath: string,
21+
model: string | GroqModelType = 'LLAMA_3_2_1B_PREVIEW'
22+
): Promise<void> => {
1923
// Ensure that the API key is set
2024
if (!env['GROQ_API_KEY']) {
2125
throw new Error('GROQ_API_KEY environment variable is not set. Please set it to your Groq API key.')
2226
}
2327

2428
try {
25-
const actualModel = (GROQ_MODELS[model as GroqModelType] || GROQ_MODELS.MIXTRAL_8X7B_32768).modelId
29+
// Get the model configuration and ID, defaulting to LLAMA_3_2_1B_PREVIEW if not found
30+
const modelKey = typeof model === 'string' ? model : 'LLAMA_3_2_1B_PREVIEW'
31+
const modelConfig = GROQ_MODELS[modelKey as GroqModelType] || GROQ_MODELS.LLAMA_3_2_1B_PREVIEW
32+
const modelId = modelConfig.modelId
2633

2734
// Prepare the request body
2835
const requestBody = {
29-
model: actualModel,
36+
model: modelId,
3037
messages: [
3138
{
3239
role: 'user',
@@ -53,25 +60,27 @@ export const callGroq = async (promptAndTranscript: string, tempPath: string, mo
5360
}
5461

5562
// Parse the JSON response
56-
const data = (await response.json()) as GroqChatCompletionResponse
63+
const data = await response.json() as GroqChatCompletionResponse
5764

5865
// Extract the generated content
5966
const content = data.choices[0]?.message?.content
60-
const finishReason = data.choices[0]?.finish_reason
61-
const usedModel = data.model
62-
const usage = data.usage
63-
const { prompt_tokens, completion_tokens, total_tokens } = usage ?? {}
64-
6567
if (!content) {
6668
throw new Error('No content generated from the Groq API')
6769
}
6870

6971
// Write the generated content to the specified output file
7072
await writeFile(tempPath, content)
7173

72-
// Log finish reason, used model, and token usage
73-
l.wait(`\n Finish Reason: ${finishReason}\n Model Used: ${usedModel}`)
74-
l.wait(` Token Usage:\n - ${prompt_tokens} prompt tokens\n - ${completion_tokens} completion tokens\n - ${total_tokens} total tokens`)
74+
// Log API results using the standardized logging function
75+
logAPIResults({
76+
modelName: modelKey,
77+
stopReason: data.choices[0]?.finish_reason ?? 'unknown',
78+
tokenUsage: {
79+
input: data.usage?.prompt_tokens,
80+
output: data.usage?.completion_tokens,
81+
total: data.usage?.total_tokens
82+
}
83+
})
7584
} catch (error) {
7685
// Log any errors that occur during the process
7786
err(`Error in callGroq: ${(error as Error).message}`)

src/llms/mistral.ts

+11-8
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { writeFile } from 'node:fs/promises'
44
import { env } from 'node:process'
55
import { Mistral } from '@mistralai/mistralai'
66
import { MISTRAL_MODELS } from '../utils/globals'
7-
import { l, err } from '../utils/logging'
7+
import { err, logAPIResults } from '../utils/logging'
88
import type { LLMFunction, MistralModelType } from '../types/llms'
99

1010
/**
@@ -31,12 +31,10 @@ export const callMistral: LLMFunction = async (
3131
try {
3232
// Select the actual model to use, defaulting to MISTRAL_NEMO if the specified model is not found
3333
const actualModel = (MISTRAL_MODELS[model as MistralModelType] || MISTRAL_MODELS.MISTRAL_NEMO).modelId
34-
l.wait(`\n Using Mistral model:\n - ${actualModel}`)
3534

3635
// Make API call to Mistral AI for chat completion
3736
const response = await mistral.chat.complete({
3837
model: actualModel,
39-
// max_tokens: ?, // Uncomment and set if you want to limit the response length
4038
messages: [{ role: 'user', content: promptAndTranscript }],
4139
})
4240

@@ -51,16 +49,21 @@ export const callMistral: LLMFunction = async (
5149
}
5250

5351
const content = firstChoice.message.content
54-
const finishReason = firstChoice.finishReason ?? 'unknown'
55-
const usage = response.usage ?? { promptTokens: 0, completionTokens: 0, totalTokens: 0 }
5652
const contentString = Array.isArray(content) ? content.join('') : content
5753

5854
// Write the generated content to the specified output file
5955
await writeFile(tempPath, contentString)
6056

61-
// Log finish reason, used model, and token usage
62-
l.wait(`\n Finish Reason: ${finishReason}\n Model Used: ${actualModel}`)
63-
l.wait(` Token Usage:\n - ${usage.promptTokens} prompt tokens\n - ${usage.completionTokens} completion tokens\n - ${usage.totalTokens} total tokens`)
57+
// Log API results using the standardized logging function
58+
logAPIResults({
59+
modelName: actualModel,
60+
stopReason: firstChoice.finishReason ?? 'unknown',
61+
tokenUsage: {
62+
input: response.usage?.promptTokens,
63+
output: response.usage?.completionTokens,
64+
total: response.usage?.totalTokens
65+
}
66+
})
6467

6568
} catch (error) {
6669
// Log any errors that occur during the process

0 commit comments

Comments
 (0)