Skip to content

Commit 729d3f7

Browse files
committedJan 4, 2025
refactor whisper and ollama
1 parent f0faece commit 729d3f7

21 files changed

+279
-347
lines changed
 

‎package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
"test-services": "tsx --test test/services.test.ts",
5353
"test-all": "tsx --test test/all.test.ts",
5454
"ta": "tsx --test test/all.test.ts",
55-
"clean": "tsx scripts/cleanContent.ts",
55+
"clean": "npm run tsx:base scripts/cleanContent.ts",
5656
"docker-cli": "docker run --rm --env-file .env -v $PWD/content:/usr/src/app/content autoshow",
5757
"docker-serve": "docker run -d -p 3000:3000 -v $PWD/content:/usr/src/app/content autoshow serve",
5858
"prune": "docker system prune -af --volumes && docker image prune -af && docker container prune -f && docker volume prune -af",

‎src/llms/chatgpt.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import { env } from 'node:process'
44
import { OpenAI } from 'openai'
5-
import { GPT_MODELS } from '../utils/llm-models'
5+
import { GPT_MODELS } from '../utils/llm-globals'
66
import { err, logAPIResults } from '../utils/logging'
77
import type { LLMFunction, ChatGPTModelType } from '../types/llms'
88

‎src/llms/claude.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import { env } from 'node:process'
44
import { Anthropic } from '@anthropic-ai/sdk'
5-
import { CLAUDE_MODELS } from '../utils/llm-models'
5+
import { CLAUDE_MODELS } from '../utils/llm-globals'
66
import { err, logAPIResults } from '../utils/logging'
77
import type { LLMFunction, ClaudeModelType } from '../types/llms'
88

‎src/llms/cohere.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import { env } from 'node:process'
44
import { CohereClient } from 'cohere-ai'
5-
import { COHERE_MODELS } from '../utils/llm-models'
5+
import { COHERE_MODELS } from '../utils/llm-globals'
66
import { err, logAPIResults } from '../utils/logging'
77
import type { LLMFunction, CohereModelType } from '../types/llms'
88

‎src/llms/fireworks.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// src/llms/fireworks.ts
22

33
import { env } from 'node:process'
4-
import { FIREWORKS_MODELS } from '../utils/llm-models'
4+
import { FIREWORKS_MODELS } from '../utils/llm-globals'
55
import { err, logAPIResults } from '../utils/logging'
66
import type { LLMFunction, FireworksModelType, FireworksResponse } from '../types/llms'
77

‎src/llms/gemini.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import { env } from 'node:process'
44
import { GoogleGenerativeAI } from "@google/generative-ai"
5-
import { GEMINI_MODELS } from '../utils/llm-models'
5+
import { GEMINI_MODELS } from '../utils/llm-globals'
66
import { err, logAPIResults } from '../utils/logging'
77
import type { LLMFunction, GeminiModelType } from '../types/llms'
88

‎src/llms/groq.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// src/llms/groq.ts
22

33
import { env } from 'node:process'
4-
import { GROQ_MODELS } from '../utils/llm-models'
4+
import { GROQ_MODELS } from '../utils/llm-globals'
55
import { err, logAPIResults } from '../utils/logging'
66
import type { LLMFunction, GroqModelType, GroqChatCompletionResponse } from '../types/llms'
77

‎src/llms/mistral.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import { env } from 'node:process'
44
import { Mistral } from '@mistralai/mistralai'
5-
import { MISTRAL_MODELS } from '../utils/llm-models'
5+
import { MISTRAL_MODELS } from '../utils/llm-globals'
66
import { err, logAPIResults } from '../utils/logging'
77
import type { LLMFunction, MistralModelType } from '../types/llms'
88

‎src/llms/ollama.ts

+18-144
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
// src/llms/ollama.ts
22

33
import { env } from 'node:process'
4-
import { spawn } from 'node:child_process'
5-
import { OLLAMA_MODELS } from '../utils/llm-models'
4+
import { OLLAMA_MODELS } from '../utils/llm-globals'
65
import { l, err, logAPIResults } from '../utils/logging'
7-
import type { LLMFunction, OllamaModelType, OllamaResponse, OllamaTagsResponse } from '../types/llms'
6+
import { checkServerAndModel } from '../utils/validate-option'
7+
import type { LLMFunction, OllamaModelType, OllamaResponse } from '../types/llms'
88

99
/**
1010
* callOllama()
@@ -38,94 +38,7 @@ export const callOllama: LLMFunction = async (
3838

3939
const combinedPrompt = `${prompt}\n${transcript}`
4040

41-
async function checkServer(): Promise<boolean> {
42-
try {
43-
const serverResponse = await fetch(`http://${ollamaHost}:${ollamaPort}`)
44-
return serverResponse.ok
45-
} catch (error) {
46-
return false
47-
}
48-
}
49-
50-
if (await checkServer()) {
51-
l.wait('\n Ollama server is already running...')
52-
} else {
53-
if (ollamaHost === 'ollama') {
54-
throw new Error('Ollama server is not running. Please ensure the Ollama server is running and accessible.')
55-
} else {
56-
l.wait('\n Ollama server is not running. Attempting to start...')
57-
const ollamaProcess = spawn('ollama', ['serve'], {
58-
detached: true,
59-
stdio: 'ignore',
60-
})
61-
ollamaProcess.unref()
62-
63-
let attempts = 0
64-
while (attempts < 30) {
65-
if (await checkServer()) {
66-
l.wait(' - Ollama server is now ready.')
67-
break
68-
}
69-
await new Promise((resolve) => setTimeout(resolve, 1000))
70-
attempts++
71-
}
72-
if (attempts === 30) {
73-
throw new Error('Ollama server failed to become ready in time.')
74-
}
75-
}
76-
}
77-
78-
l.wait(`\n Checking if model is available: ${ollamaModelName}`)
79-
try {
80-
const tagsResponse = await fetch(`http://${ollamaHost}:${ollamaPort}/api/tags`)
81-
if (!tagsResponse.ok) {
82-
throw new Error(`HTTP error! status: ${tagsResponse.status}`)
83-
}
84-
const tagsData = (await tagsResponse.json()) as OllamaTagsResponse
85-
const isModelAvailable = tagsData.models.some((m) => m.name === ollamaModelName)
86-
87-
if (!isModelAvailable) {
88-
l.wait(`\n Model ${ollamaModelName} is not available, pulling...`)
89-
const pullResponse = await fetch(`http://${ollamaHost}:${ollamaPort}/api/pull`, {
90-
method: 'POST',
91-
headers: { 'Content-Type': 'application/json' },
92-
body: JSON.stringify({ name: ollamaModelName }),
93-
})
94-
if (!pullResponse.ok) {
95-
throw new Error(`Failed to initiate pull for model ${ollamaModelName}`)
96-
}
97-
if (!pullResponse.body) {
98-
throw new Error('Response body is null')
99-
}
100-
101-
const reader = pullResponse.body.getReader()
102-
const decoder = new TextDecoder()
103-
while (true) {
104-
const { done, value } = await reader.read()
105-
if (done) break
106-
107-
const chunk = decoder.decode(value)
108-
const lines = chunk.split('\n')
109-
for (const line of lines) {
110-
if (line.trim() === '') continue
111-
try {
112-
const parsedLine = JSON.parse(line)
113-
if (parsedLine.status === 'success') {
114-
l.wait(` - Model ${ollamaModelName} pulled successfully.\n`)
115-
break
116-
}
117-
} catch (parseError) {
118-
err(`Error parsing JSON while pulling model: ${parseError}`)
119-
}
120-
}
121-
}
122-
} else {
123-
l.wait(`\n Model ${ollamaModelName} is already available.\n`)
124-
}
125-
} catch (error) {
126-
err(`Error checking/pulling model: ${(error as Error).message}`)
127-
throw error
128-
}
41+
await checkServerAndModel(ollamaHost, ollamaPort, ollamaModelName)
12942

13043
l.wait(` - Sending chat request to http://${ollamaHost}:${ollamaPort} using model '${ollamaModelName}'`)
13144

@@ -135,68 +48,29 @@ export const callOllama: LLMFunction = async (
13548
body: JSON.stringify({
13649
model: ollamaModelName,
13750
messages: [{ role: 'user', content: combinedPrompt }],
138-
stream: true,
51+
stream: false,
13952
}),
14053
})
14154

14255
if (!response.ok) {
14356
throw new Error(`HTTP error! status: ${response.status}`)
14457
}
145-
if (!response.body) {
146-
throw new Error('Response body is null')
147-
}
148-
149-
l.wait('\n Successfully connected to Ollama /api/chat streaming endpoint.')
150-
const reader = response.body.getReader()
151-
const decoder = new TextDecoder()
152-
let fullContent = ''
153-
let isFirstChunk = true
154-
let totalPromptTokens = 0
155-
let totalCompletionTokens = 0
156-
157-
while (true) {
158-
const { done, value } = await reader.read()
159-
if (done) break
16058

161-
const chunk = decoder.decode(value)
162-
const lines = chunk.split('\n')
59+
const data = await response.json() as OllamaResponse
60+
const fullContent = data?.message?.content || ''
16361

164-
for (const line of lines) {
165-
if (line.trim() === '') continue
62+
const totalPromptTokens = data.prompt_eval_count ?? 0
63+
const totalCompletionTokens = data.eval_count ?? 0
16664

167-
try {
168-
const parsedResponse = JSON.parse(line) as OllamaResponse
169-
if (parsedResponse.message?.content) {
170-
if (isFirstChunk) {
171-
l.wait(` - Streaming response from Ollama (first chunk received)`)
172-
isFirstChunk = false
173-
}
174-
fullContent += parsedResponse.message.content
175-
}
176-
177-
if (parsedResponse.prompt_eval_count) {
178-
totalPromptTokens = parsedResponse.prompt_eval_count
179-
}
180-
if (parsedResponse.eval_count) {
181-
totalCompletionTokens = parsedResponse.eval_count
182-
}
183-
184-
if (parsedResponse.done) {
185-
logAPIResults({
186-
modelName: modelKey,
187-
stopReason: 'stop',
188-
tokenUsage: {
189-
input: totalPromptTokens || undefined,
190-
output: totalCompletionTokens || undefined,
191-
total: totalPromptTokens + totalCompletionTokens || undefined,
192-
},
193-
})
194-
}
195-
} catch (parseError) {
196-
err(`Error parsing JSON from Ollama response: ${parseError}`)
197-
}
198-
}
199-
}
65+
logAPIResults({
66+
modelName: modelKey,
67+
stopReason: 'stop',
68+
tokenUsage: {
69+
input: totalPromptTokens || undefined,
70+
output: totalCompletionTokens || undefined,
71+
total: totalPromptTokens + totalCompletionTokens || undefined,
72+
},
73+
})
20074

20175
return fullContent
20276
} catch (error) {

‎src/llms/together.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// src/llms/together.ts
22

33
import { env } from 'node:process'
4-
import { TOGETHER_MODELS } from '../utils/llm-models'
4+
import { TOGETHER_MODELS } from '../utils/llm-globals'
55
import { err, logAPIResults } from '../utils/logging'
66
import type { LLMFunction, TogetherModelType, TogetherResponse } from '../types/llms'
77

‎src/process-commands/file.ts

+3-19
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import { runLLM } from '../process-steps/05-run-llm'
1212
import { cleanUpFiles } from '../process-steps/06-clean-up-files'
1313
import { l, err } from '../utils/logging'
1414
import { readFile } from 'fs/promises'
15-
import { insertShowNote } from '../server/db'
1615
import type { ProcessingOptions } from '../types/process'
1716
import type { TranscriptServices } from '../types/transcription'
1817
import type { LLMServices } from '../types/llms'
@@ -23,8 +22,7 @@ import type { LLMServices } from '../types/llms'
2322
* 2. Converts the file to the required audio format
2423
* 3. Transcribes the audio content
2524
* 4. Processes the transcript with a language model (if specified)
26-
* 5. Saves the show notes into the database
27-
* 6. Cleans up temporary files (unless disabled)
25+
* 5. Cleans up temporary files (unless disabled)
2826
*
2927
* Unlike processVideo, this function handles local files and doesn't need
3028
* to check for external dependencies like yt-dlp.
@@ -87,24 +85,10 @@ export async function processFile(
8785
options,
8886
finalPath,
8987
frontMatter,
90-
llmServices,
91-
generatedPrompt,
92-
transcript
93-
)
94-
95-
// Insert into DB
96-
insertShowNote(
97-
metadata.showLink ?? '',
98-
metadata.channel ?? '',
99-
metadata.channelURL ?? '',
100-
metadata.title,
101-
metadata.description ?? '',
102-
metadata.publishDate,
103-
metadata.coverImage ?? '',
104-
frontMatter,
10588
generatedPrompt,
10689
transcript,
107-
llmOutput
90+
metadata,
91+
llmServices
10892
)
10993

11094
// Step 6 - Cleanup

‎src/process-commands/rss.ts

+3-17
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ import { cleanUpFiles } from '../process-steps/06-clean-up-files'
1414
import { validateRSSOptions } from '../utils/validate-option'
1515
import { l, err, logRSSProcessingAction, logRSSProcessingStatus, logRSSSeparator } from '../utils/logging'
1616
import { parser } from '../utils/globals'
17-
import { insertShowNote } from '../server/db'
1817
import type { ProcessingOptions, RSSItem } from '../types/process'
1918
import type { TranscriptServices } from '../types/transcription'
2019
import type { LLMServices } from '../types/llms'
@@ -198,27 +197,14 @@ async function processItem(
198197
generatedPrompt = promptText
199198
}
200199

201-
const llmOutput = await runLLM(
200+
await runLLM(
202201
options,
203202
finalPath,
204203
frontMatter,
205-
llmServices,
206-
generatedPrompt,
207-
transcript
208-
)
209-
210-
insertShowNote(
211-
metadata.showLink ?? '',
212-
metadata.channel ?? '',
213-
metadata.channelURL ?? '',
214-
metadata.title,
215-
metadata.description ?? '',
216-
metadata.publishDate,
217-
metadata.coverImage ?? '',
218-
frontMatter,
219204
generatedPrompt,
220205
transcript,
221-
llmOutput
206+
metadata,
207+
llmServices
222208
)
223209

224210
if (!options.noCleanUp) {

‎src/process-commands/video.ts

+3-19
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import { runLLM } from '../process-steps/05-run-llm'
1212
import { cleanUpFiles } from '../process-steps/06-clean-up-files'
1313
import { l, err } from '../utils/logging'
1414
import { readFile } from 'fs/promises'
15-
import { insertShowNote } from '../server/db'
1615
import type { ProcessingOptions } from '../types/process'
1716
import type { TranscriptServices } from '../types/transcription'
1817
import type { LLMServices } from '../types/llms'
@@ -24,8 +23,7 @@ import type { LLMServices } from '../types/llms'
2423
* 3. Downloads and extracts audio
2524
* 4. Transcribes the audio content
2625
* 5. Processes the transcript with a language model (if specified)
27-
* 6. Saves the show notes into the database
28-
* 7. Cleans up temporary files (unless disabled)
26+
* 6. Cleans up temporary files (unless disabled)
2927
*
3028
* @param options - Configuration options for processing
3129
* @param url - The URL of the video to process
@@ -84,24 +82,10 @@ export async function processVideo(
8482
options,
8583
finalPath,
8684
frontMatter,
87-
llmServices,
88-
generatedPrompt,
89-
transcript
90-
)
91-
92-
// Insert into DB
93-
insertShowNote(
94-
metadata.showLink ?? '',
95-
metadata.channel ?? '',
96-
metadata.channelURL ?? '',
97-
metadata.title,
98-
metadata.description ?? '',
99-
metadata.publishDate,
100-
metadata.coverImage ?? '',
101-
frontMatter,
10285
generatedPrompt,
10386
transcript,
104-
llmOutput
87+
metadata,
88+
llmServices
10589
)
10690

10791
// Step 6 - Cleanup

‎src/process-steps/05-run-llm.ts

+34-45
Original file line numberDiff line numberDiff line change
@@ -2,37 +2,17 @@
22

33
/**
44
* @file Orchestrator for running Language Model (LLM) processing on transcripts.
5-
* Handles prompt generation, LLM processing, and file management for multiple LLM services.
5+
* Handles prompt generation, LLM processing, file management for multiple LLM services.
66
* @packageDocumentation
77
*/
88

99
import { writeFile } from 'node:fs/promises'
10-
import { callOllama } from '../llms/ollama'
11-
import { callChatGPT } from '../llms/chatgpt'
12-
import { callClaude } from '../llms/claude'
13-
import { callGemini } from '../llms/gemini'
14-
import { callCohere } from '../llms/cohere'
15-
import { callMistral } from '../llms/mistral'
16-
import { callFireworks } from '../llms/fireworks'
17-
import { callTogether } from '../llms/together'
18-
import { callGroq } from '../llms/groq'
10+
import { insertShowNote } from '../server/db'
1911
import { l, err } from '../utils/logging'
2012
import { retryLLMCall } from '../utils/retry'
21-
import type { ProcessingOptions } from '../types/process'
22-
import type { LLMServices, LLMFunction, LLMFunctions } from '../types/llms'
23-
24-
// Map of available LLM service handlers
25-
export const LLM_FUNCTIONS: LLMFunctions = {
26-
ollama: callOllama,
27-
chatgpt: callChatGPT,
28-
claude: callClaude,
29-
gemini: callGemini,
30-
cohere: callCohere,
31-
mistral: callMistral,
32-
fireworks: callFireworks,
33-
together: callTogether,
34-
groq: callGroq,
35-
}
13+
import { LLM_FUNCTIONS } from '../utils/llm-globals'
14+
import type { ProcessingOptions, EpisodeMetadata } from '../types/process'
15+
import type { LLMServices, LLMFunction } from '../types/llms'
3616

3717
/**
3818
* Processes a transcript using a specified Language Model service.
@@ -43,6 +23,7 @@ export const LLM_FUNCTIONS: LLMFunctions = {
4323
* 1. Combines the transcript with a provided prompt (if any)
4424
* 2. Processes the content with the selected LLM
4525
* 3. Saves the results with front matter and transcript or prompt+transcript
26+
* 4. Inserts show notes into the database
4627
*
4728
* If no LLM is selected, it writes the front matter, prompt, and transcript to a file.
4829
* If an LLM is selected, it writes the front matter, showNotes, and transcript to a file.
@@ -54,18 +35,20 @@ export const LLM_FUNCTIONS: LLMFunctions = {
5435
* - Final output: `${finalPath}-${llmServices}-shownotes.md` (if LLM is used)
5536
* - Otherwise: `${finalPath}-prompt.md`
5637
* @param {string} frontMatter - YAML front matter content to include in the output
38+
* @param {string} prompt - Optional prompt or instructions to process
39+
* @param {string} transcript - The transcript content
40+
* @param {EpisodeMetadata} metadata - The metadata object from generateMarkdown
5741
* @param {LLMServices} [llmServices] - The LLM service to use
58-
* @param {string} [prompt] - Optional prompt or instructions to process
59-
* @param {string} [transcript] - The transcript content
6042
* @returns {Promise<string>} Resolves with the LLM output, or an empty string if no LLM is selected
6143
*/
6244
export async function runLLM(
6345
options: ProcessingOptions,
6446
finalPath: string,
6547
frontMatter: string,
48+
prompt: string,
49+
transcript: string,
50+
metadata: EpisodeMetadata,
6651
llmServices?: LLMServices,
67-
prompt?: string,
68-
transcript?: string
6952
): Promise<string> {
7053
l.step('\nStep 5 - Run LLM on Transcript with Selected Prompt\n')
7154
l.wait(' runLLM called with arguments:\n')
@@ -76,44 +59,50 @@ export async function runLLM(
7659
l.wait(` transcript:\n\n${transcript}`)
7760

7861
try {
79-
const combinedPrompt = `${prompt || ''}\n${transcript || ''}`
80-
62+
let showNotesResult = ''
8163
if (llmServices) {
8264
l.wait(`\n Preparing to process with '${llmServices}' Language Model...\n`)
83-
8465
const llmFunction: LLMFunction = LLM_FUNCTIONS[llmServices]
66+
8567
if (!llmFunction) {
8668
throw new Error(`Invalid LLM option: ${llmServices}`)
8769
}
88-
8970
let showNotes = ''
9071

9172
await retryLLMCall(
9273
async () => {
93-
showNotes = await llmFunction(prompt || '', transcript || '', options[llmServices])
74+
showNotes = await llmFunction(prompt, transcript, options[llmServices])
9475
},
9576
5,
9677
5000
9778
)
9879

99-
l.wait(`\n LLM processing completed successfully.\n`)
100-
10180
const outputFilename = `${finalPath}-${llmServices}-shownotes.md`
102-
l.wait(`\n Writing combined front matter + LLM output + transcript to file:\n - ${outputFilename}`)
10381
await writeFile(outputFilename, `${frontMatter}\n${showNotes}\n\n## Transcript\n\n${transcript}`)
104-
l.wait(`\n Generated show notes saved to:\n - ${outputFilename}`)
105-
106-
return showNotes
82+
l.wait(`\n LLM processing completed, combined front matter + LLM output + transcript written to:\n - ${outputFilename}`)
83+
showNotesResult = showNotes
10784
} else {
10885
l.wait('\n No LLM selected, skipping processing...')
109-
11086
const noLLMFile = `${finalPath}-prompt.md`
11187
l.wait(`\n Writing front matter + prompt + transcript to file:\n - ${noLLMFile}`)
112-
await writeFile(noLLMFile, `${frontMatter}\n${combinedPrompt}`)
113-
l.wait(`\n Prompt and transcript saved to:\n - ${noLLMFile}`)
114-
115-
return ''
88+
await writeFile(noLLMFile, `${frontMatter}\n${prompt}\n## Transcript\n\n${transcript}`)
11689
}
90+
91+
insertShowNote(
92+
metadata.showLink ?? '',
93+
metadata.channel ?? '',
94+
metadata.channelURL ?? '',
95+
metadata.title ?? '',
96+
metadata.description ?? '',
97+
metadata.publishDate ?? '',
98+
metadata.coverImage ?? '',
99+
frontMatter,
100+
prompt,
101+
transcript,
102+
showNotesResult
103+
)
104+
105+
return showNotesResult
117106
} catch (error) {
118107
err(`Error running Language Model: ${(error as Error).message}`)
119108
throw error

‎src/transcription/assembly.ts

+2-11
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
// 2. Request transcription of the uploaded file.
77
// 3. Poll for completion until the transcript is ready or fails.
88
// 4. Once completed, format the transcript using a helper function from transcription-utils.ts.
9-
// 5. Save the final formatted transcript to a .txt file and also create an empty .lrc file as required by the pipeline.
9+
// 5. Return the formatted transcript.
1010

11-
import { writeFile, readFile } from 'node:fs/promises'
11+
import { readFile } from 'node:fs/promises'
1212
import { env } from 'node:process'
1313
import { l, err } from '../utils/logging'
1414
import { formatAssemblyTranscript } from '../utils/format-transcript'
@@ -115,15 +115,6 @@ export async function callAssembly(
115115
// Step 4: Formatting the transcript
116116
// The formatAssemblyTranscript function handles all formatting logic including speaker labels and timestamps.
117117
const txtContent = formatAssemblyTranscript(transcript, speakerLabels || false)
118-
119-
// Step 5: Write the formatted transcript to a .txt file
120-
await writeFile(`${finalPath}.txt`, txtContent)
121-
l.wait(`\n Transcript saved...\n - ${finalPath}.txt\n`)
122-
123-
// Create an empty LRC file to satisfy pipeline expectations (even if we don't use it for this service)
124-
await writeFile(`${finalPath}.lrc`, '')
125-
l.wait(`\n Empty LRC file created:\n - ${finalPath}.lrc\n`)
126-
127118
return txtContent
128119
} catch (error) {
129120
// If any error occurred at any step, log it and rethrow

‎src/transcription/deepgram.ts

+2-11
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
// 2. Send it to Deepgram for transcription with chosen parameters (model, formatting, punctuation, etc.).
77
// 3. Check for successful response and extract the transcription results.
88
// 4. Format the returned words array using formatDeepgramTranscript to add timestamps and newlines.
9-
// 5. Write the formatted transcript to a .txt file and create an empty .lrc file.
9+
// 5. Return the formatted transcript.
1010

11-
import { writeFile, readFile } from 'node:fs/promises'
11+
import { readFile } from 'node:fs/promises'
1212
import { env } from 'node:process'
1313
import { l, err } from '../utils/logging'
1414
import { formatDeepgramTranscript } from '../utils/format-transcript'
@@ -73,15 +73,6 @@ export async function callDeepgram(
7373

7474
// Format the returned words array
7575
const txtContent = formatDeepgramTranscript(alternative.words)
76-
77-
// Write the formatted transcript to a .txt file
78-
await writeFile(`${finalPath}.txt`, txtContent)
79-
l.wait(`\n Transcript saved:\n - ${finalPath}.txt\n`)
80-
81-
// Create an empty LRC file to meet pipeline expectations
82-
await writeFile(`${finalPath}.lrc`, '')
83-
l.wait(`\n Empty LRC file created:\n - ${finalPath}.lrc\n`)
84-
8576
return txtContent
8677
} catch (error) {
8778
// If any error occurred at any step, log it and rethrow

‎src/transcription/whisper.ts

+49-67
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@
55
* It provides a streamlined, single-container approach for audio transcription.
66
*/
77

8-
import { readFile, writeFile } from 'node:fs/promises'
8+
import { readFile, unlink } from 'node:fs/promises'
99
import { existsSync } from 'node:fs'
1010
import { lrcToTxt } from '../utils/format-transcript'
1111
import { WHISPER_MODELS, execPromise } from '../utils/globals'
1212
import { l, err } from '../utils/logging'
1313
import type { ProcessingOptions } from '../types/process'
14-
import type { WhisperModelType, WhisperRunner } from '../types/transcription'
14+
import type { WhisperModelType } from '../types/transcription'
1515

1616
/**
1717
* Main function to handle transcription using local Whisper.cpp.
@@ -41,77 +41,59 @@ export async function callWhisper(
4141

4242
l.wait(`\n Whisper model information:\n\n - whisperModel: ${whisperModel}`)
4343

44-
// Execute the local whisper.cpp runner
45-
const txtContent = await runWhisperCpp(finalPath, whisperModel)
44+
const modelGGMLName = WHISPER_MODELS[whisperModel as WhisperModelType]
45+
l.wait(` - modelGGMLName: ${modelGGMLName}`)
4646

47-
// Return the transcript text
48-
l.wait(' Returning transcript text from callWhisper...')
49-
return txtContent
50-
} catch (error) {
51-
err('Error in callWhisper:', (error as Error).message)
52-
process.exit(1)
53-
}
54-
}
55-
56-
/**
57-
* Runs transcription using the local whisper.cpp build inside this container.
58-
*
59-
* Steps:
60-
* 1. If whisper.cpp is not cloned/built locally, do so.
61-
* 2. Download model if not present.
62-
* 3. Invoke whisper.cpp to create an LRC file.
63-
* 4. Convert LRC to plain text for final transcript.
64-
*/
65-
const runWhisperCpp: WhisperRunner = async (finalPath, whisperModel) => {
66-
const modelGGMLName = WHISPER_MODELS[whisperModel as WhisperModelType]
67-
l.wait(` - modelGGMLName: ${modelGGMLName}`)
47+
// Check if whisper.cpp directory is present
48+
if (!existsSync('./whisper.cpp')) {
49+
l.wait(`\n No whisper.cpp repo found, cloning and compiling...\n`)
50+
try {
51+
await execPromise('git clone https://github.com/ggerganov/whisper.cpp.git && make -C whisper.cpp')
52+
l.wait(`\n - whisper.cpp clone and compilation complete.\n`)
53+
} catch (cloneError) {
54+
err(`Error cloning/building whisper.cpp: ${(cloneError as Error).message}`)
55+
throw cloneError
56+
}
57+
}
6858

69-
// Check if whisper.cpp directory is present
70-
if (!existsSync('./whisper.cpp')) {
71-
l.wait(`\n No whisper.cpp repo found, cloning and compiling...\n`)
72-
try {
73-
await execPromise('git clone https://github.com/ggerganov/whisper.cpp.git && make -C whisper.cpp')
74-
l.wait(`\n - whisper.cpp clone and compilation complete.\n`)
75-
} catch (cloneError) {
76-
err(`Error cloning/building whisper.cpp: ${(cloneError as Error).message}`)
77-
throw cloneError
59+
// Check if the chosen model file is present
60+
if (!existsSync(`./whisper.cpp/models/${modelGGMLName}`)) {
61+
l.wait(`\n Model not found, downloading...\n - ${whisperModel}\n`)
62+
try {
63+
await execPromise(`bash ./whisper.cpp/models/download-ggml-model.sh ${whisperModel}`)
64+
l.wait(' - Model download completed, running transcription...\n')
65+
} catch (modelError) {
66+
err(`Error downloading model: ${(modelError as Error).message}`)
67+
throw modelError
68+
}
7869
}
79-
}
8070

81-
// Check if the chosen model file is present
82-
if (!existsSync(`./whisper.cpp/models/${modelGGMLName}`)) {
83-
l.wait(`\n Model not found, downloading...\n - ${whisperModel}\n`)
71+
// Run whisper.cpp on the WAV file
72+
l.wait(`\n Invoking whisper.cpp on file:\n - ${finalPath}.wav`)
8473
try {
85-
await execPromise(`bash ./whisper.cpp/models/download-ggml-model.sh ${whisperModel}`)
86-
l.wait(' - Model download completed, running transcription...\n')
87-
} catch (modelError) {
88-
err(`Error downloading model: ${(modelError as Error).message}`)
89-
throw modelError
74+
await execPromise(
75+
`./whisper.cpp/build/bin/whisper-cli --no-gpu ` +
76+
`-m "whisper.cpp/models/${modelGGMLName}" ` +
77+
`-f "${finalPath}.wav" ` +
78+
`-of "${finalPath}" ` + // Output file base name
79+
`--output-lrc` // Output LRC file
80+
)
81+
} catch (whisperError) {
82+
err(`Error running whisper.cpp: ${(whisperError as Error).message}`)
83+
throw whisperError
9084
}
91-
}
92-
93-
// Run whisper.cpp on the WAV file
94-
l.wait(`\n Invoking whisper.cpp on file:\n - ${finalPath}.wav`)
95-
try {
96-
await execPromise(
97-
`./whisper.cpp/build/bin/whisper-cli --no-gpu ` +
98-
`-m "whisper.cpp/models/${modelGGMLName}" ` +
99-
`-f "${finalPath}.wav" ` +
100-
`-of "${finalPath}" ` + // Output file base name
101-
`--output-lrc` // Output LRC file
102-
)
103-
} catch (whisperError) {
104-
err(`Error running whisper.cpp: ${(whisperError as Error).message}`)
105-
throw whisperError
106-
}
10785

108-
// Convert .lrc -> .txt
109-
l.wait(`\n Transcript LRC file successfully created, reading file for txt conversion:\n - ${finalPath}.lrc`)
110-
const lrcContent = await readFile(`${finalPath}.lrc`, 'utf8')
111-
const txtContent = lrcToTxt(lrcContent)
112-
await writeFile(`${finalPath}.txt`, txtContent)
113-
l.wait(`\n Transcript transformation successfully completed:\n - ${finalPath}.txt\n`)
86+
// Convert .lrc -> .txt
87+
l.wait(`\n Transcript LRC file successfully created, reading file for txt conversion:\n - ${finalPath}.lrc`)
88+
const lrcContent = await readFile(`${finalPath}.lrc`, 'utf8')
89+
const txtContent = lrcToTxt(lrcContent)
90+
await unlink(`${finalPath}.lrc`)
11491

115-
// Return the plain text content
116-
return txtContent
92+
// Return the transcript text
93+
l.wait(' Returning transcript text from callWhisper...')
94+
return txtContent
95+
} catch (error) {
96+
err('Error in callWhisper:', (error as Error).message)
97+
process.exit(1)
98+
}
11799
}

‎src/types/process.ts

+21-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,26 @@
33
import type { TranscriptServices, WhisperModelType } from './transcription'
44
import type { LLMServices } from './llms'
55

6+
/**
7+
* @interface EpisodeMetadata
8+
* @property {string} [showLink]
9+
* @property {string} [channel]
10+
* @property {string} [channelURL]
11+
* @property {string} [title]
12+
* @property {string} [description]
13+
* @property {string} [publishDate]
14+
* @property {string} [coverImage]
15+
*/
16+
export interface EpisodeMetadata {
17+
showLink?: string
18+
channel?: string
19+
channelURL?: string
20+
title?: string
21+
description?: string
22+
publishDate?: string
23+
coverImage?: string
24+
}
25+
626
/**
727
* @description Pre-handler to override environment variables from request body if provided.
828
* This ensures that API keys can be passed in the request and used for the session,
@@ -184,7 +204,7 @@ export type HandlerFunction = (
184204
input: string,
185205
llmServices?: LLMServices,
186206
transcriptServices?: TranscriptServices
187-
) => Promise<void> | Promise<string>
207+
) => Promise<void> | Promise<Object> | Promise<string>
188208

189209
// Content Types
190210
/**

‎src/utils/llm-models.ts ‎src/utils/llm-globals.ts

+25-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,14 @@
1-
// src/utils/llm-models.ts
1+
// src/utils/llm-globals.ts
2+
3+
import { callOllama } from '../llms/ollama'
4+
import { callChatGPT } from '../llms/chatgpt'
5+
import { callClaude } from '../llms/claude'
6+
import { callGemini } from '../llms/gemini'
7+
import { callCohere } from '../llms/cohere'
8+
import { callMistral } from '../llms/mistral'
9+
import { callFireworks } from '../llms/fireworks'
10+
import { callTogether } from '../llms/together'
11+
import { callGroq } from '../llms/groq'
212

313
import type {
414
ModelConfig,
@@ -11,8 +21,22 @@ import type {
1121
TogetherModelType,
1222
FireworksModelType,
1323
GroqModelType,
24+
LLMFunctions
1425
} from '../types/llms'
1526

27+
// Map of available LLM service handlers
28+
export const LLM_FUNCTIONS: LLMFunctions = {
29+
ollama: callOllama,
30+
chatgpt: callChatGPT,
31+
claude: callClaude,
32+
gemini: callGemini,
33+
cohere: callCohere,
34+
mistral: callMistral,
35+
fireworks: callFireworks,
36+
together: callTogether,
37+
groq: callGroq,
38+
}
39+
1640
/**
1741
* Configuration for Ollama models, mapping model types to their display names and identifiers.
1842
* Each model has a human-readable name and a corresponding model identifier used for API calls.

‎src/utils/logging.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import type { ModelConfigValue } from '../types/llms'
55
import type { TokenUsage, CostCalculation, APILogInfo, ChainableLogger } from '../types/logging'
66
import {
77
GPT_MODELS, CLAUDE_MODELS, GEMINI_MODELS, COHERE_MODELS, MISTRAL_MODELS, OLLAMA_MODELS, FIREWORKS_MODELS, TOGETHER_MODELS, GROQ_MODELS
8-
} from './llm-models'
8+
} from './llm-globals'
99
import chalk from 'chalk'
1010

1111
/**

‎src/utils/validate-option.ts

+109-2
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,124 @@
11
// src/utils/validate-option.ts
22

33
import { exit } from 'node:process'
4-
import { err } from '../utils/logging'
4+
import { spawn } from 'node:child_process'
55
import { processVideo } from '../process-commands/video'
66
import { processPlaylist } from '../process-commands/playlist'
77
import { processChannel } from '../process-commands/channel'
88
import { processURLs } from '../process-commands/urls'
99
import { processFile } from '../process-commands/file'
1010
import { processRSS } from '../process-commands/rss'
11+
import { l, err } from '../utils/logging'
1112
import { ACTION_OPTIONS, LLM_OPTIONS, TRANSCRIPT_OPTIONS, otherOptions } from '../utils/globals'
1213
import type { ProcessingOptions, ValidAction, HandlerFunction, ProcessRequestBody } from '../types/process'
1314
import type { TranscriptServices } from '../types/transcription'
14-
import type { LLMServices } from '../types/llms'
15+
import type { LLMServices, OllamaTagsResponse } from '../types/llms'
16+
17+
/**
18+
* checkServerAndModel()
19+
* ---------------------
20+
* Checks if the Ollama server is running, attempts to start it if not running,
21+
* and ensures that the specified model is available. If not, it will pull the model.
22+
*
23+
* @param {string} ollamaHost - The Ollama host
24+
* @param {string} ollamaPort - The Ollama port
25+
* @param {string} ollamaModelName - The Ollama model name
26+
* @returns {Promise<void>}
27+
*/
28+
export async function checkServerAndModel(
29+
ollamaHost: string,
30+
ollamaPort: string,
31+
ollamaModelName: string
32+
): Promise<void> {
33+
async function checkServer(): Promise<boolean> {
34+
try {
35+
const serverResponse = await fetch(`http://${ollamaHost}:${ollamaPort}`)
36+
return serverResponse.ok
37+
} catch (error) {
38+
return false
39+
}
40+
}
41+
42+
if (await checkServer()) {
43+
l.wait('\n Ollama server is already running...')
44+
} else {
45+
if (ollamaHost === 'ollama') {
46+
throw new Error('Ollama server is not running. Please ensure the Ollama server is running and accessible.')
47+
} else {
48+
l.wait('\n Ollama server is not running. Attempting to start...')
49+
const ollamaProcess = spawn('ollama', ['serve'], {
50+
detached: true,
51+
stdio: 'ignore',
52+
})
53+
ollamaProcess.unref()
54+
55+
let attempts = 0
56+
while (attempts < 30) {
57+
if (await checkServer()) {
58+
l.wait(' - Ollama server is now ready.')
59+
break
60+
}
61+
await new Promise((resolve) => setTimeout(resolve, 1000))
62+
attempts++
63+
}
64+
if (attempts === 30) {
65+
throw new Error('Ollama server failed to become ready in time.')
66+
}
67+
}
68+
}
69+
70+
l.wait(`\n Checking if model is available: ${ollamaModelName}`)
71+
try {
72+
const tagsResponse = await fetch(`http://${ollamaHost}:${ollamaPort}/api/tags`)
73+
if (!tagsResponse.ok) {
74+
throw new Error(`HTTP error! status: ${tagsResponse.status}`)
75+
}
76+
const tagsData = (await tagsResponse.json()) as OllamaTagsResponse
77+
const isModelAvailable = tagsData.models.some((m) => m.name === ollamaModelName)
78+
79+
if (!isModelAvailable) {
80+
l.wait(`\n Model ${ollamaModelName} is not available, pulling...`)
81+
const pullResponse = await fetch(`http://${ollamaHost}:${ollamaPort}/api/pull`, {
82+
method: 'POST',
83+
headers: { 'Content-Type': 'application/json' },
84+
body: JSON.stringify({ name: ollamaModelName }),
85+
})
86+
if (!pullResponse.ok) {
87+
throw new Error(`Failed to initiate pull for model ${ollamaModelName}`)
88+
}
89+
if (!pullResponse.body) {
90+
throw new Error('Response body is null')
91+
}
92+
93+
const reader = pullResponse.body.getReader()
94+
const decoder = new TextDecoder()
95+
while (true) {
96+
const { done, value } = await reader.read()
97+
if (done) break
98+
99+
const chunk = decoder.decode(value)
100+
const lines = chunk.split('\n')
101+
for (const line of lines) {
102+
if (line.trim() === '') continue
103+
try {
104+
const parsedLine = JSON.parse(line)
105+
if (parsedLine.status === 'success') {
106+
l.wait(` - Model ${ollamaModelName} pulled successfully.\n`)
107+
break
108+
}
109+
} catch (parseError) {
110+
err(`Error parsing JSON while pulling model: ${parseError}`)
111+
}
112+
}
113+
}
114+
} else {
115+
l.wait(`\n Model ${ollamaModelName} is already available.\n`)
116+
}
117+
} catch (error) {
118+
err(`Error checking/pulling model: ${(error as Error).message}`)
119+
throw error
120+
}
121+
}
15122

16123
// Map each action to its corresponding handler function
17124
export const PROCESS_HANDLERS: Record<ValidAction, HandlerFunction> = {

0 commit comments

Comments
 (0)
Please sign in to comment.