diff --git a/docs/examples.md b/docs/examples.md index 7a47334..7b387c5 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -3,11 +3,11 @@ ## Outline - [Content and Feed Inputs](#content-and-feed-inputs) + - [Process Single Audio or Video File](#process-single-audio-or-video-file) - [Process Single Video URLs](#process-single-video-urls) + - [Process Multiple Videos Specified in a URLs File](#process-multiple-videos-specified-in-a-urls-file) - [Process Multiple Videos in YouTube Playlist](#process-multiple-videos-in-youtube-playlist) - [Process All Videos from a YouTube Channel](#process-all-videos-from-a-youtube-channel) - - [Process Multiple Videos Specified in a URLs File](#process-multiple-videos-specified-in-a-urls-file) - - [Process Single Audio or Video File](#process-single-audio-or-video-file) - [Process Podcast RSS Feed](#process-podcast-rss-feed) - [Transcription Options](#transcription-options) - [Whisper](#whisper) @@ -30,13 +30,34 @@ ## Content and Feed Inputs +### Process Single Audio or Video File + +Run on `audio.mp3` on the `content` directory: + +```bash +npm run as -- --file "content/audio.mp3" +``` + ### Process Single Video URLs Run on a single YouTube video. ```bash -npm run as -- \ - --video "https://www.youtube.com/watch?v=MORMZXEaONk" +npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" +``` + +### Process Multiple Videos Specified in a URLs File + +Run on an arbitrary list of URLs in `example-urls.md`. + +```bash +npm run as -- --urls "content/example-urls.md" +``` + +Run on URLs file and generate JSON info file with markdown metadata of each video: + +```bash +npm run as -- --info --urls "content/example-urls.md" ``` ### Process Multiple Videos in YouTube Playlist @@ -44,16 +65,13 @@ npm run as -- \ Run on multiple YouTube videos in a playlist. ```bash -npm run as -- \ - --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" +npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" ``` Run on playlist URL and generate JSON info file with markdown metadata of each video in the playlist: ```bash -npm run as -- \ - --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" \ - --info +npm run as -- --info --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" ``` ### Process All Videos from a YouTube Channel @@ -61,8 +79,7 @@ npm run as -- \ Process all videos from a YouTube channel (both live and non-live): ```bash -npm run as -- \ - --channel "https://www.youtube.com/@ajcwebdev" +npm run as -- --channel "https://www.youtube.com/@ajcwebdev" ``` Process videos starting from the oldest instead of newest: @@ -92,9 +109,7 @@ npm run as -- \ Run on a YouTube channel and generate JSON info file with markdown metadata of each video: ```bash -npm run as -- \ - --channel "https://www.youtube.com/@ajcwebdev" \ - --info +npm run as -- --info --channel "https://www.youtube.com/@ajcwebdev" ``` #### Advanced Channel Example @@ -124,39 +139,12 @@ Here’s what’s happening in this single command: 7. **Prompt**: Generates both a summary and short chapter descriptions (`--prompt summary shortChapters`). 8. **No Clean Up**: Keeps any intermediary or downloaded files around (`--noCleanUp`) so you can inspect them after the run. -### Process Multiple Videos Specified in a URLs File - -Run on an arbitrary list of URLs in `example-urls.md`. - -```bash -npm run as -- \ - --urls "content/example-urls.md" -``` - -Run on URLs file and generate JSON info file with markdown metadata of each video: - -```bash -npm run as -- \ - --urls "content/example-urls.md" \ - --info -``` - -### Process Single Audio or Video File - -Run on `audio.mp3` on the `content` directory: - -```bash -npm run as -- \ - --file "content/audio.mp3" -``` - ### Process Podcast RSS Feed Process RSS feed from newest to oldest (default behavior): ```bash -npm run as -- \ - --rss "https://ajcwebdev.substack.com/feed" +npm run as -- --rss "https://ajcwebdev.substack.com/feed" ``` Process RSS feed from oldest to newest: diff --git a/package.json b/package.json index 5fbeab5..00c1177 100644 --- a/package.json +++ b/package.json @@ -18,6 +18,7 @@ "scripts": { "tsx:base": "tsx --env-file=.env --no-warnings --experimental-sqlite", "setup": "bash ./scripts/setup.sh", + "setup-docker": "docker build -t autoshow -f .github/Dockerfile .", "docker-setup": "docker build -t autoshow -f .github/Dockerfile .", "setup-all": "npm run setup && npm run docker-setup", "as": "npm run tsx:base -- src/cli/commander.ts", @@ -51,7 +52,7 @@ "test-services": "tsx --test test/services.test.ts", "test-all": "tsx --test test/all.test.ts", "ta": "tsx --test test/all.test.ts", - "clean": "tsx scripts/cleanContent.ts", + "clean": "npm run tsx:base scripts/cleanContent.ts", "docker-cli": "docker run --rm --env-file .env -v $PWD/content:/usr/src/app/content autoshow", "docker-serve": "docker run -d -p 3000:3000 -v $PWD/content:/usr/src/app/content autoshow serve", "prune": "docker system prune -af --volumes && docker image prune -af && docker container prune -f && docker volume prune -af", diff --git a/src/llms/chatgpt.ts b/src/llms/chatgpt.ts index 5f2191c..cde2097 100644 --- a/src/llms/chatgpt.ts +++ b/src/llms/chatgpt.ts @@ -1,54 +1,48 @@ // src/llms/chatgpt.ts -import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { OpenAI } from 'openai' -import { GPT_MODELS } from '../utils/llm-models' +import { GPT_MODELS } from '../utils/llm-globals' import { err, logAPIResults } from '../utils/logging' import type { LLMFunction, ChatGPTModelType } from '../types/llms' /** * Main function to call ChatGPT API. - * @param promptAndTranscript - The combined prompt and transcript text to process. - * @param tempPath - The temporary file path to write the LLM output. - * @param model - The GPT model to use. - * @returns A Promise that resolves when the API call is complete. + * @param {string} prompt - The prompt or instructions to process. + * @param {string} transcript - The transcript text. + * @param {string} tempPath - (unused) The temporary file path (no longer used). + * @param {string} [model] - The GPT model to use. + * @returns {Promise} A Promise that resolves with the generated text. * @throws {Error} If an error occurs during API call. */ export const callChatGPT: LLMFunction = async ( - promptAndTranscript: string, - tempPath: string, + prompt: string, + transcript: string, model: string = 'GPT_4o_MINI' -): Promise => { - // Check for API key +): Promise => { if (!env['OPENAI_API_KEY']) { throw new Error('OPENAI_API_KEY environment variable is not set. Please set it to your OpenAI API key.') } - // Initialize the OpenAI client with the API key from environment variables const openai = new OpenAI({ apiKey: env['OPENAI_API_KEY'] }) - + try { - // Select the actual model to use, defaulting to GPT_4o_MINI if not specified const actualModel = (GPT_MODELS[model as ChatGPTModelType] || GPT_MODELS.GPT_4o_MINI).modelId - - // Call the OpenAI chat completions API + const combinedPrompt = `${prompt}\n${transcript}` + const response = await openai.chat.completions.create({ model: actualModel, max_completion_tokens: 4000, - messages: [{ role: 'user', content: promptAndTranscript }], + messages: [{ role: 'user', content: combinedPrompt }], }) - // Check if we have a valid response const firstChoice = response.choices[0] if (!firstChoice || !firstChoice.message?.content) { throw new Error('No valid response received from the API') } - // Write the generated content to the output file - await writeFile(tempPath, firstChoice.message.content) - - // Log API results using the standardized logging function + const content = firstChoice.message.content + logAPIResults({ modelName: actualModel, stopReason: firstChoice.finish_reason ?? 'unknown', @@ -58,6 +52,8 @@ export const callChatGPT: LLMFunction = async ( total: response.usage?.total_tokens } }) + + return content } catch (error) { err(`Error in callChatGPT: ${(error as Error).message}`) throw error diff --git a/src/llms/claude.ts b/src/llms/claude.ts index dd7370a..5968d21 100644 --- a/src/llms/claude.ts +++ b/src/llms/claude.ts @@ -1,55 +1,47 @@ // src/llms/claude.ts -import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { Anthropic } from '@anthropic-ai/sdk' -import { CLAUDE_MODELS } from '../utils/llm-models' +import { CLAUDE_MODELS } from '../utils/llm-globals' import { err, logAPIResults } from '../utils/logging' import type { LLMFunction, ClaudeModelType } from '../types/llms' /** * Main function to call Claude API. - * @param promptAndTranscript - The combined prompt and transcript text to process. - * @param tempPath - The temporary file path to write the LLM output. - * @param model - The Claude model to use. - * @returns A Promise that resolves when the API call is complete. + * @param {string} prompt - The prompt or instructions to process. + * @param {string} transcript - The transcript text. + * @param {string} tempPath - (unused) The temporary file path (no longer used). + * @param {string} [model] - The Claude model to use. + * @returns {Promise} A Promise that resolves with the generated text. * @throws {Error} If an error occurs during the API call. */ export const callClaude: LLMFunction = async ( - promptAndTranscript: string, - tempPath: string, + prompt: string, + transcript: string, model: string = 'CLAUDE_3_HAIKU' -): Promise => { - // Check if the ANTHROPIC_API_KEY environment variable is set +): Promise => { if (!env['ANTHROPIC_API_KEY']) { throw new Error('ANTHROPIC_API_KEY environment variable is not set. Please set it to your Anthropic API key.') } - // Initialize the Anthropic client with the API key from environment variables const anthropic = new Anthropic({ apiKey: env['ANTHROPIC_API_KEY'] }) try { - // Select the actual model to use, defaulting to CLAUDE_3_HAIKU if not specified const actualModel = (CLAUDE_MODELS[model as ClaudeModelType] || CLAUDE_MODELS.CLAUDE_3_HAIKU).modelId - - // Call the Anthropic messages API to create a chat completion + const combinedPrompt = `${prompt}\n${transcript}` + const response = await anthropic.messages.create({ model: actualModel, - max_tokens: 4000, // Maximum number of tokens in the response - messages: [{ role: 'user', content: promptAndTranscript }] // The input message (transcript content) + max_tokens: 4000, + messages: [{ role: 'user', content: combinedPrompt }] }) - - // Extract text content from the response + const textContent = extractTextContent(response.content) - - // Write the generated text to the output file - if (textContent) { - await writeFile(tempPath, textContent) - } else { + + if (!textContent) { throw new Error('No text content generated from the API') } - - // Log API results using the standardized logging function + logAPIResults({ modelName: actualModel, stopReason: response.stop_reason ?? 'unknown', @@ -59,9 +51,11 @@ export const callClaude: LLMFunction = async ( total: response.usage.input_tokens + response.usage.output_tokens } }) + + return textContent } catch (error) { err(`Error in callClaude: ${(error as Error).message}`) - throw error // Re-throw the error for handling in the calling function + throw error } } diff --git a/src/llms/cohere.ts b/src/llms/cohere.ts index a4769bf..691dea2 100644 --- a/src/llms/cohere.ts +++ b/src/llms/cohere.ts @@ -1,56 +1,48 @@ // src/llms/cohere.ts -import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { CohereClient } from 'cohere-ai' -import { COHERE_MODELS } from '../utils/llm-models' +import { COHERE_MODELS } from '../utils/llm-globals' import { err, logAPIResults } from '../utils/logging' import type { LLMFunction, CohereModelType } from '../types/llms' /** * Main function to call Cohere API. - * @param promptAndTranscript - The combined prompt and transcript text to process. - * @param tempPath - The temporary file path to write the LLM output. - * @param model - The Cohere model to use. - * @returns A Promise that resolves when the API call is complete. + * @param {string} prompt - The prompt or instructions to process. + * @param {string} transcript - The transcript text. + * @param {string} tempPath - (unused) The temporary file path (no longer used). + * @param {string} [model] - The Cohere model to use. + * @returns {Promise} A Promise that resolves when the API call is complete. * @throws {Error} If an error occurs during the API call. */ export const callCohere: LLMFunction = async ( - promptAndTranscript: string, - tempPath: string, + prompt: string, + transcript: string, model: string = 'COMMAND_R' -): Promise => { - // Check if the COHERE_API_KEY environment variable is set +): Promise => { if (!env['COHERE_API_KEY']) { throw new Error('COHERE_API_KEY environment variable is not set. Please set it to your Cohere API key.') } - // Initialize the Cohere client with the API key from environment variables const cohere = new CohereClient({ token: env['COHERE_API_KEY'] }) try { - // Select the actual model to use, defaulting to COMMAND_R if not specified const actualModel = (COHERE_MODELS[model as CohereModelType] || COHERE_MODELS.COMMAND_R).modelId - - // Call the Cohere chat API + const combinedPrompt = `${prompt}\n${transcript}` + const response = await cohere.chat({ model: actualModel, - message: promptAndTranscript // The input message (prompt and transcript content) + message: combinedPrompt }) - - // Destructure the response to get relevant information + const { - text, // The generated text - meta, // Metadata including token usage - finishReason // Reason why the generation stopped + text, + meta, + finishReason } = response const { inputTokens, outputTokens } = meta?.tokens ?? {} - - // Write the generated text to the output file - await writeFile(tempPath, text) - - // Log API results using the standardized logging function + logAPIResults({ modelName: actualModel, stopReason: finishReason ?? 'unknown', @@ -60,8 +52,10 @@ export const callCohere: LLMFunction = async ( total: inputTokens && outputTokens ? inputTokens + outputTokens : undefined } }) + + return text } catch (error) { err(`Error in callCohere: ${(error as Error).message}`) - throw error // Re-throw the error for handling in the calling function + throw error } } \ No newline at end of file diff --git a/src/llms/fireworks.ts b/src/llms/fireworks.ts index 2545603..ca3a728 100644 --- a/src/llms/fireworks.ts +++ b/src/llms/fireworks.ts @@ -1,47 +1,43 @@ // src/llms/fireworks.ts -import { writeFile } from 'node:fs/promises' import { env } from 'node:process' -import { FIREWORKS_MODELS } from '../utils/llm-models' +import { FIREWORKS_MODELS } from '../utils/llm-globals' import { err, logAPIResults } from '../utils/logging' import type { LLMFunction, FireworksModelType, FireworksResponse } from '../types/llms' /** * Main function to call Fireworks AI API. - * @param promptAndTranscript - The combined prompt and transcript text to process. - * @param tempPath - The temporary file path to write the LLM output. - * @param model - The Fireworks model to use. - * @returns A Promise that resolves when the API call is complete. - * @throws {Error} - If an error occurs during the API call. + * @param {string} prompt - The prompt or instructions to process. + * @param {string} transcript - The transcript text. + * @param {string | FireworksModelType} [model] - The Fireworks model to use. + * @returns {Promise} A Promise that resolves with the generated text. + * @throws {Error} If an error occurs during the API call. */ export const callFireworks: LLMFunction = async ( - promptAndTranscript: string, - tempPath: string, + prompt: string, + transcript: string, model: string | FireworksModelType = 'LLAMA_3_2_3B' -): Promise => { - // Check if the FIREWORKS_API_KEY environment variable is set +): Promise => { if (!env['FIREWORKS_API_KEY']) { throw new Error('FIREWORKS_API_KEY environment variable is not set. Please set it to your Fireworks API key.') } try { - // Get the model configuration and ID, defaulting to LLAMA_3_2_3B if not found const modelKey = typeof model === 'string' ? model : 'LLAMA_3_2_3B' const modelConfig = FIREWORKS_MODELS[modelKey as FireworksModelType] || FIREWORKS_MODELS.LLAMA_3_2_3B const modelId = modelConfig.modelId - // Prepare the request body + const combinedPrompt = `${prompt}\n${transcript}` const requestBody = { model: modelId, messages: [ { role: 'user', - content: promptAndTranscript, + content: combinedPrompt, }, ], } - // Make API call to Fireworks AI const response = await fetch('https://api.fireworks.ai/inference/v1/chat/completions', { method: 'POST', headers: { @@ -51,25 +47,18 @@ export const callFireworks: LLMFunction = async ( body: JSON.stringify(requestBody), }) - // Check if the response is OK if (!response.ok) { const errorText = await response.text() throw new Error(`Fireworks API error: ${response.status} ${response.statusText} - ${errorText}`) } const data = await response.json() as FireworksResponse - - // Extract the generated content const content = data.choices[0]?.message?.content if (!content) { throw new Error('No content generated from the Fireworks API') } - // Write the generated content to the specified output file - await writeFile(tempPath, content) - - // Log API results using the model key logAPIResults({ modelName: modelKey, stopReason: data.choices[0]?.finish_reason ?? 'unknown', @@ -79,9 +68,10 @@ export const callFireworks: LLMFunction = async ( total: data.usage.total_tokens } }) + + return content } catch (error) { - // Log any errors that occur during the process err(`Error in callFireworks: ${(error as Error).message}`) - throw error // Re-throw the error for handling by the caller + throw error } } \ No newline at end of file diff --git a/src/llms/gemini.ts b/src/llms/gemini.ts index 45a9563..d8ed69e 100644 --- a/src/llms/gemini.ts +++ b/src/llms/gemini.ts @@ -1,9 +1,8 @@ // src/llms/gemini.ts -import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { GoogleGenerativeAI } from "@google/generative-ai" -import { GEMINI_MODELS } from '../utils/llm-models' +import { GEMINI_MODELS } from '../utils/llm-globals' import { err, logAPIResults } from '../utils/logging' import type { LLMFunction, GeminiModelType } from '../types/llms' @@ -16,53 +15,37 @@ const delay = (ms: number): Promise => new Promise(resolve => setTimeout(r /** * Main function to call Gemini API. - * @param promptAndTranscript - The combined prompt and transcript text to process. - * @param tempPath - The temporary file path to write the LLM output. - * @param model - The Gemini model to use. - * @returns A Promise that resolves when the API call is complete. + * @param {string} prompt - The prompt or instructions to process. + * @param {string} transcript - The transcript text. + * @param {string} [model] - The Gemini model to use. + * @returns {Promise} A Promise that resolves when the API call is complete. * @throws {Error} If an error occurs during the API call. */ export const callGemini: LLMFunction = async ( - promptAndTranscript: string, - tempPath: string, + prompt: string, + transcript: string, model: string = 'GEMINI_1_5_FLASH' -): Promise => { - // Check if the GEMINI_API_KEY environment variable is set +): Promise => { if (!env['GEMINI_API_KEY']) { throw new Error('GEMINI_API_KEY environment variable is not set. Please set it to your Gemini API key.') } - // Initialize the Google Generative AI client const genAI = new GoogleGenerativeAI(env['GEMINI_API_KEY']) - - // Select the actual model to use, defaulting to GEMINI_1_5_FLASH if not specified const actualModel = (GEMINI_MODELS[model as GeminiModelType] || GEMINI_MODELS.GEMINI_1_5_FLASH).modelId - - // Create a GenerativeModel instance const geminiModel = genAI.getGenerativeModel({ model: actualModel }) - const maxRetries = 3 // Maximum number of retry attempts - - // Retry loop + const maxRetries = 3 + const combinedPrompt = `${prompt}\n${transcript}` + for (let attempt = 1; attempt <= maxRetries; attempt++) { try { - // Generate content using the selected model - const result = await geminiModel.generateContent(promptAndTranscript) - - // Get the response from the generated content + const result = await geminiModel.generateContent(combinedPrompt) const response = await result.response - - // Extract the text from the response const text = response.text() - - // Write the generated text to the output file - await writeFile(tempPath, text) - // Get token usage from the response metadata const { usageMetadata } = response const { promptTokenCount, candidatesTokenCount, totalTokenCount } = usageMetadata ?? {} - // Log API results using the standardized logging function logAPIResults({ modelName: actualModel, stopReason: 'complete', @@ -73,17 +56,18 @@ export const callGemini: LLMFunction = async ( } }) - return + return text } catch (error) { err(`Error in callGemini (attempt ${attempt}/${maxRetries}): ${error instanceof Error ? (error as Error).message : String(error)}`) - - // If this is the last attempt, throw the error + if (attempt === maxRetries) { throw error } - - // Wait before retrying, with exponential backoff + await delay(Math.pow(2, attempt) * 1000) } } + + // In case something unexpected happens + throw new Error('All attempts to call Gemini API have failed.') } \ No newline at end of file diff --git a/src/llms/groq.ts b/src/llms/groq.ts index af4f673..de6a1b3 100644 --- a/src/llms/groq.ts +++ b/src/llms/groq.ts @@ -1,50 +1,44 @@ // src/llms/groq.ts -import { writeFile } from 'node:fs/promises' import { env } from 'node:process' -import { GROQ_MODELS } from '../utils/llm-models' +import { GROQ_MODELS } from '../utils/llm-globals' import { err, logAPIResults } from '../utils/logging' import type { LLMFunction, GroqModelType, GroqChatCompletionResponse } from '../types/llms' -// Define the Groq API URL -const GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions' - /** * Function to call the Groq chat completion API. - * @param {string} promptAndTranscript - The combined prompt and transcript text to process. - * @param {string} tempPath - The temporary file path to write the LLM output. - * @param {string} model - The model to use, e.g., 'LLAMA_3_2_1B_PREVIEW'. + * @param {string} prompt - The prompt or instructions to process. + * @param {string} transcript - The transcript text. + * @param {string | GroqModelType} [model] - The model to use. + * @returns {Promise} A Promise that resolves when the API call is complete. + * @throws {Error} If an error occurs during the API call. */ export const callGroq: LLMFunction = async ( - promptAndTranscript: string, - tempPath: string, + prompt: string, + transcript: string, model: string | GroqModelType = 'LLAMA_3_2_1B_PREVIEW' -): Promise => { - // Ensure that the API key is set +): Promise => { if (!env['GROQ_API_KEY']) { throw new Error('GROQ_API_KEY environment variable is not set. Please set it to your Groq API key.') } try { - // Get the model configuration and ID, defaulting to LLAMA_3_2_1B_PREVIEW if not found const modelKey = typeof model === 'string' ? model : 'LLAMA_3_2_1B_PREVIEW' const modelConfig = GROQ_MODELS[modelKey as GroqModelType] || GROQ_MODELS.LLAMA_3_2_1B_PREVIEW const modelId = modelConfig.modelId - // Prepare the request body + const combinedPrompt = `${prompt}\n${transcript}` const requestBody = { model: modelId, messages: [ { role: 'user', - content: promptAndTranscript, + content: combinedPrompt, }, ], - // max_tokens: 4000, } - // Send the POST request - const response = await fetch(GROQ_API_URL, { + const response = await fetch(`https://api.groq.com/openai/v1/chat/completions`, { method: 'POST', headers: { Authorization: `Bearer ${env['GROQ_API_KEY']}`, @@ -53,25 +47,17 @@ export const callGroq: LLMFunction = async ( body: JSON.stringify(requestBody), }) - // Check if the response is OK if (!response.ok) { const errorText = await response.text() throw new Error(`Groq API error: ${response.status} ${response.statusText} - ${errorText}`) } - // Parse the JSON response const data = await response.json() as GroqChatCompletionResponse - - // Extract the generated content const content = data.choices[0]?.message?.content if (!content) { throw new Error('No content generated from the Groq API') } - // Write the generated content to the specified output file - await writeFile(tempPath, content) - - // Log API results using the standardized logging function logAPIResults({ modelName: modelKey, stopReason: data.choices[0]?.finish_reason ?? 'unknown', @@ -81,9 +67,10 @@ export const callGroq: LLMFunction = async ( total: data.usage?.total_tokens } }) + + return content } catch (error) { - // Log any errors that occur during the process err(`Error in callGroq: ${(error as Error).message}`) - throw error // Re-throw the error for handling by the caller + throw error } } \ No newline at end of file diff --git a/src/llms/mistral.ts b/src/llms/mistral.ts index c43fbf1..1a17ddb 100644 --- a/src/llms/mistral.ts +++ b/src/llms/mistral.ts @@ -1,44 +1,39 @@ // src/llms/mistral.ts -import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { Mistral } from '@mistralai/mistralai' -import { MISTRAL_MODELS } from '../utils/llm-models' +import { MISTRAL_MODELS } from '../utils/llm-globals' import { err, logAPIResults } from '../utils/logging' import type { LLMFunction, MistralModelType } from '../types/llms' /** * Main function to call Mistral AI API. - * @param promptAndTranscript - The combined prompt and transcript text to process. - * @param tempPath - The temporary file path to write the LLM output. - * @param model - The Mistral model to use. - * @returns A Promise that resolves when the API call is complete. + * @param {string} prompt - The prompt or instructions to process. + * @param {string} transcript - The transcript text. + * @param {string} [model] - The Mistral model to use. + * @returns {Promise} A Promise that resolves when the API call is complete. * @throws {Error} If an error occurs during the API call. */ export const callMistral: LLMFunction = async ( - promptAndTranscript: string, - tempPath: string, + prompt: string, + transcript: string, model: string = 'MISTRAL_NEMO' -): Promise => { - // Check if the MISTRAL_API_KEY environment variable is set +): Promise => { if (!env['MISTRAL_API_KEY']) { throw new Error('MISTRAL_API_KEY environment variable is not set. Please set it to your Mistral API key.') } - // Initialize Mistral client with API key from environment variables const mistral = new Mistral({ apiKey: env['MISTRAL_API_KEY'] }) try { - // Select the actual model to use, defaulting to MISTRAL_NEMO if the specified model is not found const actualModel = (MISTRAL_MODELS[model as MistralModelType] || MISTRAL_MODELS.MISTRAL_NEMO).modelId - - // Make API call to Mistral AI for chat completion + const combinedPrompt = `${prompt}\n${transcript}` + const response = await mistral.chat.complete({ model: actualModel, - messages: [{ role: 'user', content: promptAndTranscript }], + messages: [{ role: 'user', content: combinedPrompt }], }) - // Safely access the response properties with proper null checks if (!response.choices || response.choices.length === 0) { throw new Error("No choices returned from Mistral API") } @@ -50,11 +45,7 @@ export const callMistral: LLMFunction = async ( const content = firstChoice.message.content const contentString = Array.isArray(content) ? content.join('') : content - - // Write the generated content to the specified output file - await writeFile(tempPath, contentString) - // Log API results using the standardized logging function logAPIResults({ modelName: actualModel, stopReason: firstChoice.finishReason ?? 'unknown', @@ -65,9 +56,9 @@ export const callMistral: LLMFunction = async ( } }) + return contentString } catch (error) { - // Log any errors that occur during the process err(`Error in callMistral: ${error instanceof Error ? error.message : String(error)}`) - throw error // Re-throw the error for handling by the caller + throw error } } \ No newline at end of file diff --git a/src/llms/ollama.ts b/src/llms/ollama.ts index 63e5581..c3f6566 100644 --- a/src/llms/ollama.ts +++ b/src/llms/ollama.ts @@ -1,209 +1,78 @@ // src/llms/ollama.ts -import { writeFile } from 'node:fs/promises' import { env } from 'node:process' -import { spawn } from 'node:child_process' -import { OLLAMA_MODELS } from '../utils/llm-models' +import { OLLAMA_MODELS } from '../utils/llm-globals' import { l, err, logAPIResults } from '../utils/logging' -import type { LLMFunction, OllamaModelType, OllamaResponse, OllamaTagsResponse } from '../types/llms' +import { checkServerAndModel } from '../utils/validate-option' +import type { LLMFunction, OllamaModelType, OllamaResponse } from '../types/llms' /** * callOllama() * ----------- * Main function to call the Llama-based model using the Ollama server API. * - * In a single-container approach: - * - We assume 'ollama' binary is installed inside the container. - * - We'll try to connect to 'localhost:11434' or a custom port from env, - * and if it's not running, we'll spawn `ollama serve`. + * @param {string} prompt - The prompt or instructions to process. + * @param {string} transcript - The transcript text. + * @param {string} tempPath - (unused) The temporary file path (no longer used). + * @param {string | OllamaModelType} [model='LLAMA_3_2_1B'] - The Ollama model to use. + * @returns {Promise} A Promise resolving with the generated text. */ export const callOllama: LLMFunction = async ( - promptAndTranscript: string, - tempPath: string, + prompt: string, + transcript: string, model: string | OllamaModelType = 'LLAMA_3_2_1B' -) => { +): Promise => { + l.wait('\n callOllama called with arguments:') + l.wait(` - model: ${model}`) + try { - // Get the model configuration and ID const modelKey = typeof model === 'string' ? model : 'LLAMA_3_2_1B' const modelConfig = OLLAMA_MODELS[modelKey as OllamaModelType] || OLLAMA_MODELS.LLAMA_3_2_1B const ollamaModelName = modelConfig.modelId l.wait(` - modelName: ${modelKey}\n - ollamaModelName: ${ollamaModelName}`) - - // Host & port for Ollama + const ollamaHost = env['OLLAMA_HOST'] || 'localhost' const ollamaPort = env['OLLAMA_PORT'] || '11434' + l.wait(`\n Using Ollama host: ${ollamaHost}, port: ${ollamaPort}`) - // Check if Ollama server is up - async function checkServer(): Promise { - try { - const serverResponse = await fetch(`http://${ollamaHost}:${ollamaPort}`) - return serverResponse.ok - } catch (error) { - return false - } - } - - if (await checkServer()) { - l.wait('\n Ollama server is already running...') - } else { - if (ollamaHost === 'ollama') { - throw new Error('Ollama server is not running. Please ensure the Ollama server is running and accessible.') - } else { - l.wait('\n Ollama server is not running. Attempting to start...') - const ollamaProcess = spawn('ollama', ['serve'], { - detached: true, - stdio: 'ignore' - }) - ollamaProcess.unref() - - // Wait for server to start - let attempts = 0 - while (attempts < 30) { - if (await checkServer()) { - l.wait(' - Ollama server is now ready.') - break - } - await new Promise(resolve => setTimeout(resolve, 1000)) - attempts++ - } - if (attempts === 30) { - throw new Error('Ollama server failed to become ready in time.') - } - } - } - - // Check and pull model if needed - try { - const tagsResponse = await fetch(`http://${ollamaHost}:${ollamaPort}/api/tags`) - if (!tagsResponse.ok) { - throw new Error(`HTTP error! status: ${tagsResponse.status}`) - } - const tagsData = await tagsResponse.json() as OllamaTagsResponse - const isModelAvailable = tagsData.models.some(model => model.name === ollamaModelName) + const combinedPrompt = `${prompt}\n${transcript}` - if (!isModelAvailable) { - l.wait(`\n Model ${ollamaModelName} is not available, pulling the model...`) - const pullResponse = await fetch(`http://${ollamaHost}:${ollamaPort}/api/pull`, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, - body: JSON.stringify({ name: ollamaModelName }), - }) - if (!pullResponse.ok) { - throw new Error(`Failed to initiate pull for model ${ollamaModelName}`) - } - if (!pullResponse.body) { - throw new Error('Response body is null') - } + await checkServerAndModel(ollamaHost, ollamaPort, ollamaModelName) - const reader = pullResponse.body.getReader() - const decoder = new TextDecoder() - while (true) { - const { done, value } = await reader.read() - if (done) break - const chunk = decoder.decode(value) - const lines = chunk.split('\n') - for (const line of lines) { - if (line.trim() === '') continue - try { - const response = JSON.parse(line) - if (response.status === 'success') { - l.wait(` - Model ${ollamaModelName} has been pulled successfully...\n`) - break - } - } catch (parseError) { - err(`Error parsing JSON: ${parseError}`) - } - } - } - } else { - l.wait(`\n Model ${ollamaModelName} is already available...\n`) - } - } catch (error) { - err(`Error checking/pulling model: ${error instanceof Error ? error.message : String(error)}`) - throw error - } + l.wait(` - Sending chat request to http://${ollamaHost}:${ollamaPort} using model '${ollamaModelName}'`) - l.wait(` - Sending chat request to http://${ollamaHost}:${ollamaPort} using ${ollamaModelName} model`) - - // Call Ollama's /api/chat endpoint in streaming mode const response = await fetch(`http://${ollamaHost}:${ollamaPort}/api/chat`, { method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, + headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model: ollamaModelName, - messages: [{ role: 'user', content: promptAndTranscript }], - stream: true, + messages: [{ role: 'user', content: combinedPrompt }], + stream: false, }), }) if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`) } - if (!response.body) { - throw new Error('Response body is null') - } - - const reader = response.body.getReader() - const decoder = new TextDecoder() - let fullContent = '' - let isFirstChunk = true - let totalPromptTokens = 0 - let totalCompletionTokens = 0 - - while (true) { - const { done, value } = await reader.read() - if (done) break - - const chunk = decoder.decode(value) - const lines = chunk.split('\n') - - for (const line of lines) { - if (line.trim() === '') continue - try { - const parsedResponse = JSON.parse(line) as OllamaResponse - if (parsedResponse.message?.content) { - if (isFirstChunk) { - l.wait(` - Receiving streaming response from Ollama...`) - isFirstChunk = false - } - fullContent += parsedResponse.message.content - } + const data = await response.json() as OllamaResponse + const fullContent = data?.message?.content || '' - // Accumulate token counts if available - if (parsedResponse.prompt_eval_count) { - totalPromptTokens = parsedResponse.prompt_eval_count - } - if (parsedResponse.eval_count) { - totalCompletionTokens = parsedResponse.eval_count - } + const totalPromptTokens = data.prompt_eval_count ?? 0 + const totalCompletionTokens = data.eval_count ?? 0 - if (parsedResponse.done) { - // Log final results using standardized logging function - logAPIResults({ - modelName: modelKey, - stopReason: 'stop', - tokenUsage: { - input: totalPromptTokens || undefined, - output: totalCompletionTokens || undefined, - total: totalPromptTokens + totalCompletionTokens || undefined - } - }) - } - } catch (parseError) { - err(`Error parsing JSON: ${parseError}`) - } - } - } - - // Write final content to the specified temp file - await writeFile(tempPath, fullContent) + logAPIResults({ + modelName: modelKey, + stopReason: 'stop', + tokenUsage: { + input: totalPromptTokens || undefined, + output: totalCompletionTokens || undefined, + total: totalPromptTokens + totalCompletionTokens || undefined, + }, + }) + return fullContent } catch (error) { err(`Error in callOllama: ${error instanceof Error ? error.message : String(error)}`) err(`Stack Trace: ${error instanceof Error ? error.stack : 'No stack trace available'}`) diff --git a/src/llms/together.ts b/src/llms/together.ts index 2abc78a..5593484 100644 --- a/src/llms/together.ts +++ b/src/llms/together.ts @@ -1,49 +1,43 @@ // src/llms/together.ts -import { writeFile } from 'node:fs/promises' import { env } from 'node:process' -import { TOGETHER_MODELS } from '../utils/llm-models' +import { TOGETHER_MODELS } from '../utils/llm-globals' import { err, logAPIResults } from '../utils/logging' import type { LLMFunction, TogetherModelType, TogetherResponse } from '../types/llms' /** * Main function to call Together AI API. - * @param promptAndTranscript - The combined prompt and transcript text to process. - * @param tempPath - The temporary file path to write the LLM output. - * @param model - The Together AI model to use. - * @returns A Promise that resolves when the API call is complete. - * @throws {Error} - If an error occurs during the API call. + * @param {string} prompt - The prompt or instructions to process. + * @param {string} transcript - The transcript text. + * @param {string | TogetherModelType} [model] - The Together AI model to use. + * @returns {Promise} A Promise that resolves with the generated text. + * @throws {Error} If an error occurs during the API call. */ export const callTogether: LLMFunction = async ( - promptAndTranscript: string, - tempPath: string, + prompt: string, + transcript: string, model: string | TogetherModelType = 'LLAMA_3_2_3B' -): Promise => { - // Check if the TOGETHER_API_KEY environment variable is set +): Promise => { if (!env['TOGETHER_API_KEY']) { throw new Error('TOGETHER_API_KEY environment variable is not set. Please set it to your Together AI API key.') } try { - // Get the model configuration and ID, defaulting to LLAMA_3_2_3B if not found const modelKey = typeof model === 'string' ? model : 'LLAMA_3_2_3B' const modelConfig = TOGETHER_MODELS[modelKey as TogetherModelType] || TOGETHER_MODELS.LLAMA_3_2_3B const modelId = modelConfig.modelId - // Prepare the request body + const combinedPrompt = `${prompt}\n${transcript}` const requestBody = { model: modelId, messages: [ { role: 'user', - content: promptAndTranscript, + content: combinedPrompt, }, ], - // max_tokens: 2000, - // temperature: 0.7, } - // Make API call to Together AI const response = await fetch('https://api.together.xyz/v1/chat/completions', { method: 'POST', headers: { @@ -54,24 +48,17 @@ export const callTogether: LLMFunction = async ( body: JSON.stringify(requestBody), }) - // Check if the response is OK if (!response.ok) { const errorText = await response.text() throw new Error(`Together AI API error: ${response.status} ${response.statusText} - ${errorText}`) } const data = await response.json() as TogetherResponse - - // Extract the generated content const content = data.choices[0]?.message?.content if (!content) { throw new Error('No content generated from the Together AI API') } - // Write the generated content to the specified output file - await writeFile(tempPath, content) - - // Log API results using the standardized logging function logAPIResults({ modelName: modelKey, stopReason: data.choices[0]?.finish_reason ?? 'unknown', @@ -81,9 +68,10 @@ export const callTogether: LLMFunction = async ( total: data.usage.total_tokens } }) + + return content } catch (error) { - // Log any errors that occur during the process err(`Error in callTogether: ${(error as Error).message}`) - throw error // Re-throw the error for handling by the caller + throw error } } \ No newline at end of file diff --git a/src/process-commands/file.ts b/src/process-commands/file.ts index a96fdde..0566ec8 100644 --- a/src/process-commands/file.ts +++ b/src/process-commands/file.ts @@ -12,7 +12,6 @@ import { runLLM } from '../process-steps/05-run-llm' import { cleanUpFiles } from '../process-steps/06-clean-up-files' import { l, err } from '../utils/logging' import { readFile } from 'fs/promises' -import { insertShowNote } from '../server/db' import type { ProcessingOptions } from '../types/process' import type { TranscriptServices } from '../types/transcription' import type { LLMServices } from '../types/llms' @@ -23,8 +22,7 @@ import type { LLMServices } from '../types/llms' * 2. Converts the file to the required audio format * 3. Transcribes the audio content * 4. Processes the transcript with a language model (if specified) - * 5. Saves the show notes into the database - * 6. Cleans up temporary files (unless disabled) + * 5. Cleans up temporary files (unless disabled) * * Unlike processVideo, this function handles local files and doesn't need * to check for external dependencies like yt-dlp. @@ -41,9 +39,17 @@ export async function processFile( filePath: string, llmServices?: LLMServices, transcriptServices?: TranscriptServices -): Promise { - l.opts('Parameters passed to processFile:\n') - l.opts(` - llmServices: ${llmServices}\n - transcriptServices: ${transcriptServices}\n`) +): Promise<{ + frontMatter: string + prompt: string + llmOutput: string + transcript: string +}> { + // Log function inputs + l.info('processFile called with the following arguments:') + l.opts(` - filePath: ${filePath}`) + l.opts(` - llmServices: ${llmServices}`) + l.opts(` - transcriptServices: ${transcriptServices}\n`) try { // Step 1 - Generate markdown @@ -53,40 +59,51 @@ export async function processFile( await downloadAudio(options, filePath, filename) // Step 3 - Transcribe audio and read transcript - await runTranscription(options, finalPath, transcriptServices) - const transcript = await readFile(`${finalPath}.txt`, 'utf-8') + const transcript = await runTranscription(options, finalPath, transcriptServices) - // Step 4 - Select Prompt - const promptText = await readFile(options.customPrompt || '', 'utf-8').catch(() => '') - - // Step 5 - Run LLM (optional) - const llmOutput = await runLLM(options, finalPath, frontMatter, llmServices) + // Step 4 - Selecting prompt + if (options.customPrompt) { + l.wait(`\n Reading custom prompt file:\n - ${options.customPrompt}`) + } + const promptText = await readFile(options.customPrompt || '', 'utf-8').catch(() => { + return '' + }) + // Prepare the final prompt let generatedPrompt = '' if (!promptText) { + l.wait('\n No custom prompt text found, importing default prompt generator...') const defaultPrompt = await import('../process-steps/04-select-prompt') generatedPrompt = await defaultPrompt.generatePrompt(options.prompt, undefined) + l.wait(`\n Default prompt generated (length: ${generatedPrompt.length})`) } else { generatedPrompt = promptText } - // Insert into DB - insertShowNote( - metadata.showLink ?? '', - metadata.channel ?? '', - metadata.channelURL ?? '', - metadata.title, - metadata.description ?? '', - metadata.publishDate, - metadata.coverImage ?? '', + // Step 5 - Run LLM (if applicable) + const llmOutput = await runLLM( + options, + finalPath, frontMatter, generatedPrompt, transcript, - llmOutput + metadata, + llmServices ) + // Step 6 - Cleanup if (!options.noCleanUp) { await cleanUpFiles(finalPath) + l.wait('\n Cleanup completed.\n') + } + + l.wait(' processFile command completed successfully.') + + return { + frontMatter, + prompt: generatedPrompt, + llmOutput: llmOutput || '', + transcript, } } catch (error) { err(`Error processing file: ${(error as Error).message}`) diff --git a/src/process-commands/rss.ts b/src/process-commands/rss.ts index 23b2867..f7ee941 100644 --- a/src/process-commands/rss.ts +++ b/src/process-commands/rss.ts @@ -14,7 +14,6 @@ import { cleanUpFiles } from '../process-steps/06-clean-up-files' import { validateRSSOptions } from '../utils/validate-option' import { l, err, logRSSProcessingAction, logRSSProcessingStatus, logRSSSeparator } from '../utils/logging' import { parser } from '../utils/globals' -import { insertShowNote } from '../server/db' import type { ProcessingOptions, RSSItem } from '../types/process' import type { TranscriptServices } from '../types/transcription' import type { LLMServices } from '../types/llms' @@ -75,14 +74,11 @@ function extractFeedItems(feed: any): { items: RSSItem[], channelTitle: string } return audioVideoTypes.some((type) => item.enclosure.type.startsWith(type)) }) .map((item) => { - // Ensure publishDate is always a valid string let publishDate: string try { - // Try to parse the date, fall back to current date if invalid const date = item.pubDate ? new Date(item.pubDate) : new Date() publishDate = date.toISOString().substring(0, 10) } catch { - // If date parsing fails, use current date publishDate = defaultDate } @@ -187,15 +183,12 @@ async function processItem( await downloadAudio(options, item.showLink, filename) // Step 3 - Transcribe audio and read transcript - await runTranscription(options, finalPath, transcriptServices) - const transcript = await readFile(`${finalPath}.txt`, 'utf-8') + const transcript = await runTranscription(options, finalPath, transcriptServices) // Step 4 - Select Prompt const promptText = await readFile(options.customPrompt || '', 'utf-8').catch(() => '') // Step 5 - Run LLM (optional) - const llmOutput = await runLLM(options, finalPath, frontMatter, llmServices) - let generatedPrompt = '' if (!promptText) { const defaultPrompt = await import('../process-steps/04-select-prompt') @@ -204,18 +197,14 @@ async function processItem( generatedPrompt = promptText } - insertShowNote( - metadata.showLink ?? '', - metadata.channel ?? '', - metadata.channelURL ?? '', - metadata.title, - metadata.description ?? '', - metadata.publishDate, - metadata.coverImage ?? '', + await runLLM( + options, + finalPath, frontMatter, generatedPrompt, transcript, - llmOutput + metadata, + llmServices ) if (!options.noCleanUp) { diff --git a/src/process-commands/video.ts b/src/process-commands/video.ts index d5882b0..aeea305 100644 --- a/src/process-commands/video.ts +++ b/src/process-commands/video.ts @@ -12,7 +12,6 @@ import { runLLM } from '../process-steps/05-run-llm' import { cleanUpFiles } from '../process-steps/06-clean-up-files' import { l, err } from '../utils/logging' import { readFile } from 'fs/promises' -import { insertShowNote } from '../server/db' import type { ProcessingOptions } from '../types/process' import type { TranscriptServices } from '../types/transcription' import type { LLMServices } from '../types/llms' @@ -24,24 +23,31 @@ import type { LLMServices } from '../types/llms' * 3. Downloads and extracts audio * 4. Transcribes the audio content * 5. Processes the transcript with a language model (if specified) - * 6. Saves the show notes into the database - * 7. Cleans up temporary files (unless disabled) + * 6. Cleans up temporary files (unless disabled) * * @param options - Configuration options for processing * @param url - The URL of the video to process * @param llmServices - Optional language model service to use for processing the transcript * @param transcriptServices - Optional transcription service to use for converting audio to text * @throws Will throw an error if any processing step fails - * @returns Promise that resolves when all processing is complete + * @returns Promise that resolves with { frontMatter, prompt, llmOutput, transcript } */ export async function processVideo( options: ProcessingOptions, url: string, llmServices?: LLMServices, transcriptServices?: TranscriptServices -): Promise { - l.opts('Parameters passed to processVideo:\n') - l.opts(` - llmServices: ${llmServices}\n - transcriptServices: ${transcriptServices}\n`) +): Promise<{ + frontMatter: string + prompt: string + llmOutput: string + transcript: string +}> { + // Log function inputs + l.opts('processVideo called with the following arguments:\n') + l.opts(` - url: ${url}`) + l.opts(` - llmServices: ${llmServices}`) + l.opts(` - transcriptServices: ${transcriptServices}\n`) try { // Step 1 - Generate markdown @@ -51,15 +57,19 @@ export async function processVideo( await downloadAudio(options, url, filename) // Step 3 - Transcribe audio and read transcript - await runTranscription(options, finalPath, transcriptServices) - const transcript = await readFile(`${finalPath}.txt`, 'utf-8') + const transcript = await runTranscription(options, finalPath, transcriptServices) - // Step 4 - Select Prompt - const promptText = await readFile(options.customPrompt || '', 'utf-8').catch(() => '') - - // Step 5 - Run LLM (optional) - const llmOutput = await runLLM(options, finalPath, frontMatter, llmServices) + // Step 4 - Selecting prompt + let promptText = '' + if (options.customPrompt) { + l.info(`\n Reading custom prompt file: ${options.customPrompt}`) + promptText = await readFile(options.customPrompt, 'utf-8').catch((err) => { + l.warn(`\n Could not read custom prompt file: ${options.customPrompt}. Using empty prompt. Error: ${err}`) + return '' + }) + } + // Step 5 - Running LLM processing on transcript (if applicable)... let generatedPrompt = '' if (!promptText) { const defaultPrompt = await import('../process-steps/04-select-prompt') @@ -68,28 +78,29 @@ export async function processVideo( generatedPrompt = promptText } - // Insert into DB - insertShowNote( - metadata.showLink ?? '', - metadata.channel ?? '', - metadata.channelURL ?? '', - metadata.title, - metadata.description ?? '', - metadata.publishDate, - metadata.coverImage ?? '', + const llmOutput = await runLLM( + options, + finalPath, frontMatter, generatedPrompt, transcript, - llmOutput + metadata, + llmServices ) - // Optional cleanup + // Step 6 - Cleanup if (!options.noCleanUp) { await cleanUpFiles(finalPath) } - // Return transcript or some relevant string - return transcript + l.wait('\n processVideo command completed successfully.') + + return { + frontMatter, + prompt: generatedPrompt, + llmOutput: llmOutput || '', + transcript, + } } catch (error) { err('Error processing video:', (error as Error).message) throw error diff --git a/src/process-steps/01-generate-markdown.ts b/src/process-steps/01-generate-markdown.ts index 3149763..dd69b79 100644 --- a/src/process-steps/01-generate-markdown.ts +++ b/src/process-steps/01-generate-markdown.ts @@ -1,12 +1,11 @@ // src/process-steps/01-generate-markdown.ts /** - * @file Utility for generating markdown files with front matter for different content types. + * @file Utility for generating markdown content with front matter for different content types. * Supports YouTube videos, playlists, local files, and RSS feed items. * @packageDocumentation */ -import { writeFile } from 'node:fs/promises' import { basename, extname } from 'node:path' import { execFilePromise } from '../utils/globals' import { l, err } from '../utils/logging' @@ -43,7 +42,6 @@ export function sanitizeTitle(title: string): string { * 1. Sanitizes input titles for safe filename creation * 2. Extracts metadata based on content type * 3. Generates appropriate front matter - * 4. Creates and saves the markdown file * * @param {ProcessingOptions} options - The processing options specifying the type of content to generate. * Valid options include: video, playlist, urls, file, and rss. @@ -53,7 +51,7 @@ export function sanitizeTitle(title: string): string { * - For RSS: An RSSItem object containing feed item details * @returns {Promise} A promise that resolves to an object containing: * - frontMatter: The generated front matter content as a string - * - finalPath: The path where the markdown file is saved + * - finalPath: The path (base name) derived for the content * - filename: The sanitized filename * - metadata: An object containing all metadata fields * @throws {Error} If invalid options are provided or if metadata extraction fails. @@ -83,34 +81,15 @@ export async function generateMarkdown( options: ProcessingOptions, input: string | RSSItem ): Promise { - /** - * Sanitizes a title string for use in filenames by: - * - Removing special characters except spaces and hyphens - * - Converting spaces and underscores to hyphens - * - Converting to lowercase - * - Limiting length to 200 characters - * - * @param {string} title - The title to sanitize. - * @returns {string} The sanitized title safe for use in filenames. - * - * @example - * sanitizeTitle('My Video Title! (2024)') // returns 'my-video-title-2024' - */ - function sanitizeTitle(title: string): string { - return title - .replace(/[^\w\s-]/g, '') // Remove all non-word characters except spaces and hyphens - .trim() // Remove leading and trailing whitespace - .replace(/[\s_]+/g, '-') // Replace spaces and underscores with hyphens - .replace(/-+/g, '-') // Replace multiple hyphens with a single hyphen - .toLowerCase() // Convert to lowercase - .slice(0, 200) // Limit the length to 200 characters - } - - // Initialize variables for front matter content, final file path, sanitized filename, and metadata - let frontMatter: string[] // Array to hold front matter lines - let finalPath: string // The path where the markdown file will be saved - let filename: string // The sanitized filename - let metadata: { // Object to hold metadata fields + // Log function inputs + l.step('\nStep 1 - Generate Markdown\n') + l.wait(`\n generateMarkdown called with the following arguments\n`) + l.wait(` - input: ${input}`) + + let frontMatter: string[] + let finalPath: string + let filename: string + let metadata: { showLink: string channel: string channelURL: string @@ -120,37 +99,33 @@ export async function generateMarkdown( coverImage: string } - // Determine which processing option is selected switch (true) { - // If any of these options are true, process as a video case !!options.video: case !!options.playlist: case !!options.urls: case !!options.channel: try { - // Execute yt-dlp command to extract metadata + l.wait('\n Extracting metadata with yt-dlp. Parsing output...\n') const { stdout } = await execFilePromise('yt-dlp', [ - '--restrict-filenames', // Restrict filenames to ASCII characters - '--print', '%(webpage_url)s', // Print the webpage URL - '--print', '%(channel)s', // Print the channel name - '--print', '%(uploader_url)s', // Print the uploader's URL - '--print', '%(title)s', // Print the video title - '--print', '%(upload_date>%Y-%m-%d)s', // Print the upload date in YYYY-MM-DD format - '--print', '%(thumbnail)s', // Print the thumbnail URL - input as string, // The video URL provided as input + '--restrict-filenames', + '--print', '%(webpage_url)s', + '--print', '%(channel)s', + '--print', '%(uploader_url)s', + '--print', '%(title)s', + '--print', '%(upload_date>%Y-%m-%d)s', + '--print', '%(thumbnail)s', + input as string, ]) - // Split the output into individual metadata fields const [ - showLink, // The video URL - videoChannel, // The channel name - uploader_url, // The uploader's URL - videoTitle, // The video title - formattedDate, // The upload date - thumbnail, // The thumbnail URL + showLink, + videoChannel, + uploader_url, + videoTitle, + formattedDate, + thumbnail, ] = stdout.trim().split('\n') - // Validate that all required metadata fields are present if ( !showLink || !videoChannel || @@ -162,12 +137,9 @@ export async function generateMarkdown( throw new Error('Incomplete metadata received from yt-dlp.') } - // Generate the sanitized filename using the upload date and video title filename = `${formattedDate}-${sanitizeTitle(videoTitle)}` - // Define the final path where the markdown file will be saved finalPath = `content/${filename}` - // Construct the metadata object metadata = { showLink: showLink, channel: videoChannel, @@ -178,38 +150,30 @@ export async function generateMarkdown( coverImage: thumbnail, } - // Construct the front matter content as an array of strings frontMatter = [ '---', - `showLink: "${metadata.showLink}"`, // The video URL - `channel: "${metadata.channel}"`, // The channel name - `channelURL: "${metadata.channelURL}"`, // The uploader's URL - `title: "${metadata.title}"`, // The video title - `description: "${metadata.description}"`, // Placeholder for description - `publishDate: "${metadata.publishDate}"`, // The upload date - `coverImage: "${metadata.coverImage}"`, // The thumbnail URL + `showLink: "${metadata.showLink}"`, + `channel: "${metadata.channel}"`, + `channelURL: "${metadata.channelURL}"`, + `title: "${metadata.title}"`, + `description: "${metadata.description}"`, + `publishDate: "${metadata.publishDate}"`, + `coverImage: "${metadata.coverImage}"`, '---\n', ] } catch (error) { - // Log the error and rethrow it for upstream handling err(`Error extracting metadata for ${input}: ${error instanceof Error ? error.message : String(error)}`) throw error } break - // If the file option is selected case !!options.file: - // Get the original filename from the input path + l.wait('\n Generating markdown for a local file...') const originalFilename = basename(input as string) - // Remove the file extension to get the filename without extension const filenameWithoutExt = originalFilename.replace(extname(originalFilename), '') - - // Sanitize the filename to make it safe for use in paths filename = sanitizeTitle(filenameWithoutExt) - // Define the final path where the markdown file will be saved finalPath = `content/${filename}` - // Construct the metadata object for a file metadata = { showLink: originalFilename, channel: '', @@ -220,40 +184,34 @@ export async function generateMarkdown( coverImage: '', } - // Construct the front matter content for a file frontMatter = [ '---', - `showLink: "${metadata.showLink}"`, // The original filename - `channel: "${metadata.channel}"`, // Empty channel field - `channelURL: "${metadata.channelURL}"`, // Empty channel URL field - `title: "${metadata.title}"`, // Use the original filename as the title - `description: "${metadata.description}"`, // Placeholder for description - `publishDate: "${metadata.publishDate}"`, // Empty publish date - `coverImage: "${metadata.coverImage}"`, // Empty cover image + `showLink: "${metadata.showLink}"`, + `channel: "${metadata.channel}"`, + `channelURL: "${metadata.channelURL}"`, + `title: "${metadata.title}"`, + `description: "${metadata.description}"`, + `publishDate: "${metadata.publishDate}"`, + `coverImage: "${metadata.coverImage}"`, '---\n', ] break - // If the RSS option is selected case !!options.rss: - // Cast the input to an RSSItem type + l.wait('Generating markdown for an RSS item...') const item = input as RSSItem - // Destructure necessary fields from the RSS item const { - publishDate, // Publication date - title: rssTitle, // Title of the RSS item - coverImage, // Cover image URL - showLink, // Link to the content - channel: rssChannel, // Channel name - channelURL, // Channel URL + publishDate, + title: rssTitle, + coverImage, + showLink, + channel: rssChannel, + channelURL, } = item - // Generate the sanitized filename using the publish date and title filename = `${publishDate}-${sanitizeTitle(rssTitle)}` - // Define the final path where the markdown file will be saved finalPath = `content/${filename}` - // Construct the metadata object for an RSS item metadata = { showLink: showLink, channel: rssChannel, @@ -264,38 +222,26 @@ export async function generateMarkdown( coverImage: coverImage, } - // Construct the front matter content for an RSS item frontMatter = [ '---', - `showLink: "${metadata.showLink}"`, // Link to the content - `channel: "${metadata.channel}"`, // Channel name - `channelURL: "${metadata.channelURL}"`, // Channel URL - `title: "${metadata.title}"`, // Title of the RSS item - `description: "${metadata.description}"`, // Placeholder for description - `publishDate: "${metadata.publishDate}"`, // Publication date - `coverImage: "${metadata.coverImage}"`, // Cover image URL + `showLink: "${metadata.showLink}"`, + `channel: "${metadata.channel}"`, + `channelURL: "${metadata.channelURL}"`, + `title: "${metadata.title}"`, + `description: "${metadata.description}"`, + `publishDate: "${metadata.publishDate}"`, + `coverImage: "${metadata.coverImage}"`, '---\n', ] break - // If no valid option is provided, throw an error default: throw new Error('Invalid option provided for markdown generation.') } - // Join the front matter array into a single string with newline separators const frontMatterContent = frontMatter.join('\n') - // Write the front matter content to a markdown file at the specified path - await writeFile(`${finalPath}.md`, frontMatterContent) - - // Log the front matter content in dimmed text - l.dim(frontMatterContent) - // Log the current step in the process - l.step('\nStep 1 - Generating markdown...\n') - // Log a success message indicating where the file was saved - l.success(` Front matter successfully created and saved:\n - ${finalPath}.md`) - - // Return an object containing the front matter, final path, filename, and metadata + // Log return values + l.wait(` generateMarkdown returning:\n\n - finalPath: ${finalPath}\n - filename: ${filename}\n`) return { frontMatter: frontMatterContent, finalPath, filename, metadata } } \ No newline at end of file diff --git a/src/process-steps/02-download-audio.ts b/src/process-steps/02-download-audio.ts index a91cca7..e6adad8 100644 --- a/src/process-steps/02-download-audio.ts +++ b/src/process-steps/02-download-audio.ts @@ -74,13 +74,18 @@ export async function downloadAudio( input: string, filename: string ): Promise { + // Log function inputs + l.step('\nStep 2 - Download and Convert Audio\n') + l.wait(' downloadAudio called with the following arguments:\n') + l.wait(` - input: ${input}`) + l.wait(` - filename: ${filename}`) + // Define output paths using the provided filename const finalPath = `content/${filename}` const outputPath = `${finalPath}.wav` // Handle online content (YouTube, RSS feeds, etc.) if (options.video || options.playlist || options.urls || options.rss || options.channel) { - l.step('\nStep 2 - Downloading URL audio...\n') try { // Download and convert audio using yt-dlp const { stderr } = await execFilePromise('yt-dlp', [ @@ -97,7 +102,7 @@ export async function downloadAudio( if (stderr) { err(`yt-dlp warnings: ${stderr}`) } - l.success(` Audio downloaded successfully:\n - ${outputPath}`) + l.wait(`\n Audio downloaded successfully, output path for WAV file:\n - ${outputPath}`) } catch (error) { err( `Error downloading audio: ${ @@ -109,7 +114,6 @@ export async function downloadAudio( } // Handle local file processing else if (options.file) { - l.step('\nStep 2 - Processing file audio...\n') // Define supported media formats const supportedFormats: Set = new Set([ // Audio formats @@ -119,22 +123,29 @@ export async function downloadAudio( ]) try { // Verify file exists and is accessible + l.wait(`\n Checking file access:\n - ${input}`) await access(input) + l.wait(`\n File ${input} is accessible. Attempting to read file data for type detection...`) + // Read file and determine its type const buffer = await readFile(input) + l.wait(`\n Successfully read file: ${buffer.length} bytes`) + const fileType = await fileTypeFromBuffer(buffer) + l.wait(`\n File type detection result: ${fileType?.ext ?? 'unknown'}`) + // Validate file type is supported if (!fileType || !supportedFormats.has(fileType.ext as SupportedFileType)) { throw new Error( fileType ? `Unsupported file type: ${fileType.ext}` : 'Unable to determine file type' ) } - l.wait(` File type detected as ${fileType.ext}, converting to WAV...\n`) // Convert to standardized WAV format using ffmpeg + l.wait(` - Running ffmpeg command for ${input} -> ${outputPath}\n`) await execPromise( `ffmpeg -i "${input}" -ar 16000 -ac 1 -c:a pcm_s16le "${outputPath}"` ) - l.success(` File converted to WAV format successfully:\n - ${outputPath}`) + l.wait(` File converted to WAV format successfully:\n - ${outputPath}`) } catch (error) { err(`Error processing local file: ${error instanceof Error ? (error as Error).message : String(error)}`) throw error @@ -144,5 +155,8 @@ export async function downloadAudio( else { throw new Error('Invalid option provided for audio download/processing.') } + + // Log return value + l.wait(`\n downloadAudio returning:\n - outputPath: ${outputPath}\n`) return outputPath } \ No newline at end of file diff --git a/src/process-steps/03-run-transcription.ts b/src/process-steps/03-run-transcription.ts index b9e18de..cd0a053 100644 --- a/src/process-steps/03-run-transcription.ts +++ b/src/process-steps/03-run-transcription.ts @@ -1,16 +1,9 @@ // src/process-steps/03-run-transcription.ts -/** - * @file Orchestrator for running transcription services on audio files. - * Manages the routing and execution of various transcription services, - * both local and cloud-based. - * @packageDocumentation - */ - import { callWhisper } from '../transcription/whisper' import { callDeepgram } from '../transcription/deepgram' import { callAssembly } from '../transcription/assembly' -import { l } from '../utils/logging' +import { l, err } from '../utils/logging' import type { ProcessingOptions } from '../types/process' import type { TranscriptServices } from '../types/transcription' @@ -19,85 +12,44 @@ import type { TranscriptServices } from '../types/transcription' * Routes the transcription request to the appropriate service handler * and manages the execution process. * - * Available transcription services: - * Local Services: - * - whisper: Default Whisper.cpp implementation - * - whisperDocker: Whisper.cpp running in Docker - * - * Cloud Services: - * - deepgram: Deepgram's API service - * - assembly: AssemblyAI's API service - * - * @param {ProcessingOptions} options - Configuration options including: - * - whisper: Whisper model specification - * - whisperDocker: Docker-based Whisper model - * - speakerLabels: Enable speaker detection (Assembly) - * - Additional service-specific options - * - * @param {string} finalPath - Base path for input/output files: - * - Input audio: `${finalPath}.wav` - * - Output transcript: `${finalPath}.txt` - * - * @param {string} frontMatter - YAML front matter content for the transcript - * (Reserved for future use with metadata) - * - * @param {TranscriptServices} [transcriptServices] - The transcription service to use: - * - 'whisper': Local Whisper.cpp - * - 'whisperDocker': Containerized Whisper - * - 'deepgram': Deepgram API - * - 'assembly': AssemblyAI API - * - * @returns {Promise} Resolves when transcription is complete - * - * @throws {Error} If: - * - Unknown transcription service is specified - * - Service-specific initialization fails - * - Transcription process fails - * - File operations fail - * - * @example - * // Using local Whisper - * await runTranscription( - * { whisper: 'base' }, - * 'content/my-video', - * '---\ntitle: My Video\n---', - * 'whisper' - * ) - * - * @example - * // Using AssemblyAI with speaker labels - * await runTranscription( - * { speakerLabels: true }, - * 'content/my-video', - * '---\ntitle: My Video\n---', - * 'assembly' - * ) + * @param {ProcessingOptions} options - Configuration options + * @param {string} finalPath - Base path for input/output files + * @param {TranscriptServices} [transcriptServices] - The transcription service to use + * @returns {Promise} The complete transcript */ export async function runTranscription( options: ProcessingOptions, finalPath: string, transcriptServices?: TranscriptServices -): Promise { - l.step(`\nStep 3 - Running transcription on audio file using ${transcriptServices}...`) +): Promise { + // Log function call + l.step('\nStep 3 - Run Transcription\n') + l.wait(' runTranscription called with arguments:\n') + l.wait(` - finalPath: ${finalPath}`) + l.wait(` - transcriptServices: ${transcriptServices}`) - // Route to appropriate transcription service - switch (transcriptServices) { - case 'deepgram': - // Cloud-based service with advanced features - await callDeepgram(finalPath) - break + try { + switch (transcriptServices) { + case 'deepgram': + const deepgramTranscript = await callDeepgram(options, finalPath) + l.wait('\n Deepgram transcription completed successfully.\n') + return deepgramTranscript - case 'assembly': - // Cloud-based service with speaker diarization - await callAssembly(options, finalPath) - break + case 'assembly': + const assemblyTranscript = await callAssembly(options, finalPath) + l.wait('\n AssemblyAI transcription completed successfully.\n') + return assemblyTranscript - case 'whisper': - // Local transcription with whisper.cpp - await callWhisper(options, finalPath) - break + case 'whisper': + const whisperTranscript = await callWhisper(options, finalPath) + l.wait('\n Whisper transcription completed successfully.\n') + return whisperTranscript - default: - throw new Error(`Unknown transcription service: ${transcriptServices}`) + default: + throw new Error(`Unknown transcription service: ${transcriptServices}`) + } + } catch (error) { + err(`Error during runTranscription: ${(error as Error).message}`) + throw error } } \ No newline at end of file diff --git a/src/process-steps/04-select-prompt.ts b/src/process-steps/04-select-prompt.ts index 724bc4a..8755b1d 100644 --- a/src/process-steps/04-select-prompt.ts +++ b/src/process-steps/04-select-prompt.ts @@ -1,8 +1,8 @@ -// src/process-steps/04-prompt.ts +// src/process-steps/04-select-prompt.ts import type { PromptSection } from '../types/process' import { readFile } from 'fs/promises' -import { err } from '../utils/logging' +import { err, l } from '../utils/logging' /** * Define the structure for different sections of the prompt @@ -228,8 +228,13 @@ const sections = { * @throws {Error} If the file cannot be read or is invalid */ export async function readCustomPrompt(filePath: string): Promise { + l.wait('\n readCustomPrompt called with arguments:\n') + l.wait(` - filePath: ${filePath}`) + try { + l.wait(`\n Reading custom prompt file:\n - ${filePath}`) const customPrompt = await readFile(filePath, 'utf8') + l.wait(`\n Successfully read custom prompt file, character length:\n\n - ${customPrompt.length}`) return customPrompt.trim() } catch (error) { err(`Error reading custom prompt file: ${(error as Error).message}`) @@ -248,27 +253,41 @@ export async function generatePrompt( prompt: string[] = ['summary', 'longChapters'], customPromptPath?: string ): Promise { + l.step('\nStep 4 - Select Prompts\n') + l.wait(' generatePrompt called with arguments:\n') + l.wait(` - prompt: ${JSON.stringify(prompt)}`) + l.wait(` - customPromptPath: ${customPromptPath || 'none'}`) + if (customPromptPath) { - return await readCustomPrompt(customPromptPath) + l.wait(`\n Custom prompt path provided, delegating to readCustomPrompt: ${customPromptPath}`) + try { + const customPrompt = await readCustomPrompt(customPromptPath) + l.wait('\n Custom prompt file successfully processed.') + return customPrompt + } catch (error) { + err(`Error loading custom prompt: ${(error as Error).message}`) + throw error + } } // Original prompt generation logic - let text = "This is a transcript with timestamps. It does not contain copyrighted materials.\n\n" - - // Filter valid sections first - const validSections = prompt.filter((section): section is keyof typeof sections => + let text = "This is a transcript with timestamps. It does not contain copyrighted materials. Do not ever use the word delve.\n\n" + + // Filter valid sections + const validSections = prompt.filter((section): section is keyof typeof sections => Object.hasOwn(sections, section) ) + l.wait(`\n Valid sections identified:\n\n ${JSON.stringify(validSections)}`) // Add instructions - validSections.forEach(section => { + validSections.forEach((section) => { text += sections[section].instruction + "\n" }) + // Add formatting instructions and examples text += "Format the output like so:\n\n" - validSections.forEach(section => { + validSections.forEach((section) => { text += ` ${sections[section].example}\n` }) - return text } \ No newline at end of file diff --git a/src/process-steps/05-run-llm.ts b/src/process-steps/05-run-llm.ts index 25d9bd2..d01e233 100644 --- a/src/process-steps/05-run-llm.ts +++ b/src/process-steps/05-run-llm.ts @@ -2,171 +2,107 @@ /** * @file Orchestrator for running Language Model (LLM) processing on transcripts. - * Handles prompt generation, LLM processing, and file management for multiple LLM services. + * Handles prompt generation, LLM processing, file management for multiple LLM services. * @packageDocumentation */ -import { readFile, writeFile, unlink } from 'node:fs/promises' -import { callOllama } from '../llms/ollama' -import { callChatGPT } from '../llms/chatgpt' -import { callClaude } from '../llms/claude' -import { callGemini } from '../llms/gemini' -import { callCohere } from '../llms/cohere' -import { callMistral } from '../llms/mistral' -import { callFireworks } from '../llms/fireworks' -import { callTogether } from '../llms/together' -import { callGroq } from '../llms/groq' -import { generatePrompt } from './04-select-prompt' +import { writeFile } from 'node:fs/promises' +import { insertShowNote } from '../server/db' import { l, err } from '../utils/logging' -import type { ProcessingOptions } from '../types/process' -import type { LLMServices, LLMFunction, LLMFunctions } from '../types/llms' - -// Map of available LLM service handlers -export const LLM_FUNCTIONS: LLMFunctions = { - ollama: callOllama, - chatgpt: callChatGPT, - claude: callClaude, - gemini: callGemini, - cohere: callCohere, - mistral: callMistral, - fireworks: callFireworks, - together: callTogether, - groq: callGroq, -} +import { retryLLMCall } from '../utils/retry' +import { LLM_FUNCTIONS } from '../utils/llm-globals' +import type { ProcessingOptions, EpisodeMetadata } from '../types/process' +import type { LLMServices, LLMFunction } from '../types/llms' /** * Processes a transcript using a specified Language Model service. - * Handles the complete workflow from reading the transcript to generating - * and saving the final markdown output. + * Handles the complete workflow from combining the transcript to generating + * and saving the final markdown output for multiple LLM services. * * The function performs these steps: - * 1. Reads the transcript file - * 2. Generates a prompt based on provided options - * 3. Processes the content with the selected LLM - * 4. Saves the results with front matter and original transcript + * 1. Combines the transcript with a provided prompt (if any) + * 2. Processes the content with the selected LLM + * 3. Saves the results with front matter and transcript or prompt+transcript + * 4. Inserts show notes into the database * - * If no LLM is selected, it saves the prompt and transcript without processing. + * If no LLM is selected, it writes the front matter, prompt, and transcript to a file. + * If an LLM is selected, it writes the front matter, showNotes, and transcript to a file. * * @param {ProcessingOptions} options - Configuration options including: * - prompt: Array of prompt sections to include * - LLM-specific options (e.g., chatgpt, claude, etc.) - * * @param {string} finalPath - Base path for input/output files: - * - Input transcript: `${finalPath}.txt` - * - Temporary file: `${finalPath}-${llmServices}-temp.md` - * - Final output: `${finalPath}-${llmServices}-shownotes.md` - * + * - Final output: `${finalPath}-${llmServices}-shownotes.md` (if LLM is used) + * - Otherwise: `${finalPath}-prompt.md` * @param {string} frontMatter - YAML front matter content to include in the output - * - * @param {LLMServices} [llmServices] - The LLM service to use: - * - ollama: Ollama for local inference - * - chatgpt: OpenAI's ChatGPT - * - claude: Anthropic's Claude - * - gemini: Google's Gemini - * - cohere: Cohere - * - mistral: Mistral AI - * - fireworks: Fireworks AI - * - together: Together AI - * - groq: Groq - * + * @param {string} prompt - Optional prompt or instructions to process + * @param {string} transcript - The transcript content + * @param {EpisodeMetadata} metadata - The metadata object from generateMarkdown + * @param {LLMServices} [llmServices] - The LLM service to use * @returns {Promise} Resolves with the LLM output, or an empty string if no LLM is selected - * - * @throws {Error} If: - * - Transcript file is missing or unreadable - * - Invalid LLM service is specified - * - LLM processing fails after retries - * - File operations fail - * - * @example - * // Process with Ollama - * const llmOutput = await runLLM( - * { prompt: ['summary', 'highlights'], ollama: 'LLAMA_3_2_1B' }, - * 'content/my-video', - * '---\ntitle: My Video\n---', - * 'chatgpt' - * ) - * - * @example - * // Save prompt and transcript without LLM processing - * const llmOutput = await runLLM( - * { prompt: ['summary'] }, - * 'content/my-video', - * '---\ntitle: My Video\n---' - * ) */ export async function runLLM( options: ProcessingOptions, finalPath: string, frontMatter: string, - llmServices?: LLMServices + prompt: string, + transcript: string, + metadata: EpisodeMetadata, + llmServices?: LLMServices, ): Promise { - l.step(`\nStep 4 - Running LLM processing on transcript...\n`) + l.step('\nStep 5 - Run LLM on Transcript with Selected Prompt\n') + l.wait(' runLLM called with arguments:\n') + l.wait(` - finalPath: ${finalPath}`) + l.wait(` - llmServices: ${llmServices}\n`) + l.wait(` frontMatter:\n\n${frontMatter}`) + l.wait(` prompt:\n\n${prompt}`) + l.wait(` transcript:\n\n${transcript}`) try { - // Read and format the transcript - const tempTranscript = await readFile(`${finalPath}.txt`, 'utf8') - const transcript = `## Transcript\n\n${tempTranscript}` - - // Generate and combine prompt with transcript - const prompt = await generatePrompt(options.prompt, options.customPrompt) - const promptAndTranscript = `${prompt}${transcript}` - + let showNotesResult = '' if (llmServices) { - l.wait(` Preparing to process with ${llmServices} Language Model...\n`) - - // Get the appropriate LLM handler function + l.wait(`\n Preparing to process with '${llmServices}' Language Model...\n`) const llmFunction: LLMFunction = LLM_FUNCTIONS[llmServices] + if (!llmFunction) { throw new Error(`Invalid LLM option: ${llmServices}`) } + let showNotes = '' - // Set up retry logic - const maxRetries = 5 - const delayBetweenRetries = 10000 // 10 seconds in milliseconds - let attempt = 0 - const tempPath = `${finalPath}-${llmServices}-temp.md` - - while (attempt < maxRetries) { - try { - attempt++ - l.wait(` Attempt ${attempt} - Processing with ${llmServices} Language Model...\n`) - // Process content with selected LLM - await llmFunction(promptAndTranscript, tempPath, options[llmServices]) - // If successful, break out of the loop - break - } catch (error) { - if (attempt >= maxRetries) { - err(` Max retries reached. Unable to process with ${llmServices}.`) - throw error - } - err(` Attempt ${attempt} failed with error: ${(error as Error).message}`) - l.wait(` Retrying in ${delayBetweenRetries / 1000} seconds...`) - await new Promise(resolve => setTimeout(resolve, delayBetweenRetries)) - } - } - - l.success(`\n LLM processing completed successfully after ${attempt} attempt(s).\n`) - - // Combine results with front matter and original transcript - const showNotes = await readFile(tempPath, 'utf8') - await writeFile( - `${finalPath}-${llmServices}-shownotes.md`, - `${frontMatter}\n${showNotes}\n\n${transcript}` + await retryLLMCall( + async () => { + showNotes = await llmFunction(prompt, transcript, options[llmServices]) + }, + 5, + 5000 ) - // Clean up temporary file - await unlink(tempPath) - l.success(`\n Generated show notes saved to markdown file:\n - ${finalPath}-${llmServices}-shownotes.md`) - - // Return only the LLM's output portion - return showNotes + const outputFilename = `${finalPath}-${llmServices}-shownotes.md` + await writeFile(outputFilename, `${frontMatter}\n${showNotes}\n\n## Transcript\n\n${transcript}`) + l.wait(`\n LLM processing completed, combined front matter + LLM output + transcript written to:\n - ${outputFilename}`) + showNotesResult = showNotes } else { - // Handle case when no LLM is selected - l.wait(' No LLM selected, skipping processing...') - await writeFile(`${finalPath}-prompt.md`, `${frontMatter}\n${promptAndTranscript}`) - l.success(`\n Prompt and transcript saved to markdown file:\n - ${finalPath}-prompt.md`) - return '' + l.wait('\n No LLM selected, skipping processing...') + const noLLMFile = `${finalPath}-prompt.md` + l.wait(`\n Writing front matter + prompt + transcript to file:\n - ${noLLMFile}`) + await writeFile(noLLMFile, `${frontMatter}\n${prompt}\n## Transcript\n\n${transcript}`) } + + insertShowNote( + metadata.showLink ?? '', + metadata.channel ?? '', + metadata.channelURL ?? '', + metadata.title ?? '', + metadata.description ?? '', + metadata.publishDate ?? '', + metadata.coverImage ?? '', + frontMatter, + prompt, + transcript, + showNotesResult + ) + + return showNotesResult } catch (error) { err(`Error running Language Model: ${(error as Error).message}`) throw error diff --git a/src/process-steps/06-clean-up-files.ts b/src/process-steps/06-clean-up-files.ts index e15ff67..803d835 100644 --- a/src/process-steps/06-clean-up-files.ts +++ b/src/process-steps/06-clean-up-files.ts @@ -43,8 +43,7 @@ import { l, err } from '../utils/logging' * } */ export async function cleanUpFiles(id: string): Promise { - l.step('\nStep 5 - Cleaning up temporary files...\n') - + l.step('\nStep 6 - Cleaning Up Extra Files\n') // Define extensions of temporary files to be cleaned up const extensions = [ '.wav', // Audio files @@ -53,14 +52,14 @@ export async function cleanUpFiles(id: string): Promise { '.lrc' // Lyrics/subtitles ] - l.success(` Temporary files deleted:`) + l.wait(`\n Temporary files deleted:`) // Attempt to delete each file type for (const ext of extensions) { try { // Delete file and log success await unlink(`${id}${ext}`) - l.success(` - ${id}${ext}`) + l.wait(` - ${id}${ext}`) } catch (error) { // Only log errors that aren't "file not found" (ENOENT) if (error instanceof Error && (error as Error).message !== 'ENOENT') { diff --git a/src/server/db.ts b/src/server/db.ts index f4f8877..e4a3094 100644 --- a/src/server/db.ts +++ b/src/server/db.ts @@ -1,6 +1,7 @@ // src/server/db.ts import { DatabaseSync } from 'node:sqlite' +import { l } from '../utils/logging' // Initialize the database connection export const db = new DatabaseSync('show_notes.db', { open: true }) @@ -51,6 +52,7 @@ export function insertShowNote( transcript: string, llmOutput: string ): void { + l.wait('\n Inserting show note into the database...') db.prepare(` INSERT INTO show_notes ( showLink, @@ -79,4 +81,5 @@ export function insertShowNote( transcript, llmOutput ) + l.wait('\n - Show note inserted successfully.\n') } \ No newline at end of file diff --git a/src/server/routes/process.ts b/src/server/routes/process.ts index 61c8543..0142866 100644 --- a/src/server/routes/process.ts +++ b/src/server/routes/process.ts @@ -43,8 +43,17 @@ export const handleProcessRequest = async ( return } options.video = url - const content = await processVideo(options, url, llmServices, transcriptServices) - reply.send({ content }) + + // Grab the object that includes frontMatter, prompt, llmOutput, and transcript + const result = await processVideo(options, url, llmServices, transcriptServices) + + // Return the object, if there is no LLM output, it will be '' + reply.send({ + frontMatter: result.frontMatter, + prompt: result.prompt, + llmOutput: result.llmOutput, + transcript: result.transcript, + }) break } diff --git a/src/server/routes/show-notes.ts b/src/server/routes/show-notes.ts index bd22c79..1edbb1f 100644 --- a/src/server/routes/show-notes.ts +++ b/src/server/routes/show-notes.ts @@ -6,7 +6,7 @@ import type { FastifyRequest, FastifyReply } from 'fastify' export const getShowNotes = async (_request: FastifyRequest, reply: FastifyReply) => { try { // Fetch all show notes from the database - const showNotes = db.prepare(`SELECT * FROM show_notes ORDER BY date DESC`).all() + const showNotes = db.prepare(`SELECT * FROM show_notes ORDER BY publishDate DESC`).all() reply.send({ showNotes }) } catch (error) { console.error('Error fetching show notes:', error) diff --git a/src/transcription/assembly.ts b/src/transcription/assembly.ts index 7f655ad..21f2786 100644 --- a/src/transcription/assembly.ts +++ b/src/transcription/assembly.ts @@ -6,9 +6,9 @@ // 2. Request transcription of the uploaded file. // 3. Poll for completion until the transcript is ready or fails. // 4. Once completed, format the transcript using a helper function from transcription-utils.ts. -// 5. Save the final formatted transcript to a .txt file and also create an empty .lrc file as required by the pipeline. +// 5. Return the formatted transcript. -import { writeFile, readFile } from 'node:fs/promises' +import { readFile } from 'node:fs/promises' import { env } from 'node:process' import { l, err } from '../utils/logging' import { formatAssemblyTranscript } from '../utils/format-transcript' @@ -30,8 +30,12 @@ const BASE_URL = 'https://api.assemblyai.com/v2' * @returns Promise - The formatted transcript content * @throws Error if any step of the process fails (upload, transcription request, polling, formatting) */ -export async function callAssembly(options: ProcessingOptions, finalPath: string): Promise { - l.wait('\n Using AssemblyAI for transcription...') +export async function callAssembly( + options: ProcessingOptions, + finalPath: string +): Promise { + l.wait('\n Using AssemblyAI for transcription...\n') + l.wait(`\n Options:\n\n${JSON.stringify(options)}`) if (!env['ASSEMBLY_API_KEY']) { throw new Error('ASSEMBLY_API_KEY environment variable is not set. Please set it to your AssemblyAI API key.') @@ -69,7 +73,7 @@ export async function callAssembly(options: ProcessingOptions, finalPath: string if (!upload_url) { throw new Error('Upload URL not returned by AssemblyAI.') } - l.success(' Audio file uploaded successfully.') + l.wait(' - Audio file uploaded successfully.') // Step 2: Requesting the transcription const transcriptionOptions: AssemblyAITranscriptionOptions = { @@ -111,15 +115,6 @@ export async function callAssembly(options: ProcessingOptions, finalPath: string // Step 4: Formatting the transcript // The formatAssemblyTranscript function handles all formatting logic including speaker labels and timestamps. const txtContent = formatAssemblyTranscript(transcript, speakerLabels || false) - - // Step 5: Write the formatted transcript to a .txt file - await writeFile(`${finalPath}.txt`, txtContent) - l.wait(`\n Transcript saved...\n - ${finalPath}.txt\n`) - - // Create an empty LRC file to satisfy pipeline expectations (even if we don't use it for this service) - await writeFile(`${finalPath}.lrc`, '') - l.wait(`\n Empty LRC file created:\n - ${finalPath}.lrc\n`) - return txtContent } catch (error) { // If any error occurred at any step, log it and rethrow diff --git a/src/transcription/deepgram.ts b/src/transcription/deepgram.ts index 79a72fc..e00cb92 100644 --- a/src/transcription/deepgram.ts +++ b/src/transcription/deepgram.ts @@ -6,22 +6,28 @@ // 2. Send it to Deepgram for transcription with chosen parameters (model, formatting, punctuation, etc.). // 3. Check for successful response and extract the transcription results. // 4. Format the returned words array using formatDeepgramTranscript to add timestamps and newlines. -// 5. Write the formatted transcript to a .txt file and create an empty .lrc file. +// 5. Return the formatted transcript. -import { writeFile, readFile } from 'node:fs/promises' +import { readFile } from 'node:fs/promises' import { env } from 'node:process' import { l, err } from '../utils/logging' import { formatDeepgramTranscript } from '../utils/format-transcript' +import type { ProcessingOptions } from '../types/process' import type { DeepgramResponse } from '../types/transcription' /** * Main function to handle transcription using Deepgram API. + * @param options - Additional processing options (e.g., speaker labels) * @param finalPath - The base filename (without extension) for input/output files * @returns Promise - The formatted transcript content * @throws Error if any step of the process fails (upload, transcription request, formatting) */ -export async function callDeepgram(finalPath: string): Promise { +export async function callDeepgram( + options: ProcessingOptions, + finalPath: string +): Promise { l.wait('\n Using Deepgram for transcription...\n') + l.wait(`\n Options:\n\n${JSON.stringify(options)}`) if (!env['DEEPGRAM_API_KEY']) { throw new Error('DEEPGRAM_API_KEY environment variable is not set. Please set it to your Deepgram API key.') @@ -67,15 +73,6 @@ export async function callDeepgram(finalPath: string): Promise { // Format the returned words array const txtContent = formatDeepgramTranscript(alternative.words) - - // Write the formatted transcript to a .txt file - await writeFile(`${finalPath}.txt`, txtContent) - l.wait(`\n Transcript saved:\n - ${finalPath}.txt\n`) - - // Create an empty LRC file to meet pipeline expectations - await writeFile(`${finalPath}.lrc`, '') - l.wait(`\n Empty LRC file created:\n - ${finalPath}.lrc\n`) - return txtContent } catch (error) { // If any error occurred at any step, log it and rethrow diff --git a/src/transcription/whisper.ts b/src/transcription/whisper.ts index 5be3200..e743610 100644 --- a/src/transcription/whisper.ts +++ b/src/transcription/whisper.ts @@ -5,13 +5,13 @@ * It provides a streamlined, single-container approach for audio transcription. */ -import { readFile, writeFile } from 'node:fs/promises' +import { readFile, unlink } from 'node:fs/promises' import { existsSync } from 'node:fs' import { lrcToTxt } from '../utils/format-transcript' import { WHISPER_MODELS, execPromise } from '../utils/globals' import { l, err } from '../utils/logging' import type { ProcessingOptions } from '../types/process' -import type { WhisperModelType, WhisperRunner } from '../types/transcription' +import type { WhisperModelType } from '../types/transcription' /** * Main function to handle transcription using local Whisper.cpp. @@ -23,7 +23,8 @@ export async function callWhisper( options: ProcessingOptions, finalPath: string ): Promise { - l.wait('\n Using local whisper.cpp for transcription...') + l.wait('\n callWhisper called with arguments:\n') + l.wait(` - finalPath: ${finalPath}`) try { // Determine which model was requested (default to "base" if `--whisper` is passed with no model) @@ -38,61 +39,61 @@ export async function callWhisper( throw new Error(`Unknown model type: ${whisperModel}`) } - l.wait(`\n - whisperModel: ${whisperModel}`) + l.wait(`\n Whisper model information:\n\n - whisperModel: ${whisperModel}`) - // Execute the local whisper.cpp runner - await runWhisperCpp(finalPath, whisperModel) + const modelGGMLName = WHISPER_MODELS[whisperModel as WhisperModelType] + l.wait(` - modelGGMLName: ${modelGGMLName}`) - // Read the newly created .txt file - const txtContent = await readFile(`${finalPath}.txt`, 'utf8') - return txtContent + // Check if whisper.cpp directory is present + if (!existsSync('./whisper.cpp')) { + l.wait(`\n No whisper.cpp repo found, cloning and compiling...\n`) + try { + await execPromise('git clone https://github.com/ggerganov/whisper.cpp.git && make -C whisper.cpp') + l.wait(`\n - whisper.cpp clone and compilation complete.\n`) + } catch (cloneError) { + err(`Error cloning/building whisper.cpp: ${(cloneError as Error).message}`) + throw cloneError + } + } - } catch (error) { - err('Error in callWhisper:', (error as Error).message) - process.exit(1) - } -} + // Check if the chosen model file is present + if (!existsSync(`./whisper.cpp/models/${modelGGMLName}`)) { + l.wait(`\n Model not found, downloading...\n - ${whisperModel}\n`) + try { + await execPromise(`bash ./whisper.cpp/models/download-ggml-model.sh ${whisperModel}`) + l.wait(' - Model download completed, running transcription...\n') + } catch (modelError) { + err(`Error downloading model: ${(modelError as Error).message}`) + throw modelError + } + } -/** - * Runs transcription using the local whisper.cpp build inside this container. - * - * Steps: - * 1. If whisper.cpp is not cloned/built locally, do so. - * 2. Download model if not present. - * 3. Invoke whisper.cpp to create an LRC file. - * 4. Convert LRC to plain text for final transcript. - */ -const runWhisperCpp: WhisperRunner = async (finalPath, whisperModel) => { - const modelGGMLName = WHISPER_MODELS[whisperModel as WhisperModelType] - l.wait(` - modelGGMLName: ${modelGGMLName}`) + // Run whisper.cpp on the WAV file + l.wait(`\n Invoking whisper.cpp on file:\n - ${finalPath}.wav`) + try { + await execPromise( + `./whisper.cpp/build/bin/whisper-cli --no-gpu ` + + `-m "whisper.cpp/models/${modelGGMLName}" ` + + `-f "${finalPath}.wav" ` + + `-of "${finalPath}" ` + // Output file base name + `--output-lrc` // Output LRC file + ) + } catch (whisperError) { + err(`Error running whisper.cpp: ${(whisperError as Error).message}`) + throw whisperError + } - // Check if whisper.cpp directory is present - if (!existsSync('./whisper.cpp')) { - l.wait(`\n No whisper.cpp repo found, cloning and compiling...\n`) - await execPromise('git clone https://github.com/ggerganov/whisper.cpp.git && make -C whisper.cpp') - l.wait(`\n - whisper.cpp clone and compilation complete.\n`) - } + // Convert .lrc -> .txt + l.wait(`\n Transcript LRC file successfully created, reading file for txt conversion:\n - ${finalPath}.lrc`) + const lrcContent = await readFile(`${finalPath}.lrc`, 'utf8') + const txtContent = lrcToTxt(lrcContent) + await unlink(`${finalPath}.lrc`) - // Check if the chosen model file is present - if (!existsSync(`./whisper.cpp/models/${modelGGMLName}`)) { - l.wait(`\n Model not found, downloading...\n - ${whisperModel}\n`) - await execPromise(`bash ./whisper.cpp/models/download-ggml-model.sh ${whisperModel}`) - l.wait(' - Model download completed, running transcription...\n') + // Return the transcript text + l.wait(' Returning transcript text from callWhisper...') + return txtContent + } catch (error) { + err('Error in callWhisper:', (error as Error).message) + process.exit(1) } - - // Run whisper.cpp on the WAV file - await execPromise( - `./whisper.cpp/build/bin/whisper-cli --no-gpu ` + - `-m "whisper.cpp/models/${modelGGMLName}" ` + - `-f "${finalPath}.wav" ` + - `-of "${finalPath}" ` + // Output file base name - `--output-lrc` // Make sure there is a space before the next flag - ) - l.success(`\n Transcript LRC file successfully created:\n - ${finalPath}.lrc`) - - // Convert .lrc -> .txt - const lrcContent = await readFile(`${finalPath}.lrc`, 'utf8') - const txtContent = lrcToTxt(lrcContent) - await writeFile(`${finalPath}.txt`, txtContent) - l.success(` Transcript transformation successfully completed:\n - ${finalPath}.txt\n`) } \ No newline at end of file diff --git a/src/types/llms.ts b/src/types/llms.ts index af4ef37..8a700a5 100644 --- a/src/types/llms.ts +++ b/src/types/llms.ts @@ -50,10 +50,10 @@ export type LLMOptions = { * @param llmModel - The specific LLM model to use (optional) */ export type LLMFunction = ( - promptAndTranscript: string, - tempPath: string, + prompt: string, + transcript: string, llmModel?: string -) => Promise +) => Promise /** * Mapping of LLM option keys to their corresponding functions. diff --git a/src/types/logging.ts b/src/types/logging.ts index 4b24c7a..285d60d 100644 --- a/src/types/logging.ts +++ b/src/types/logging.ts @@ -26,7 +26,9 @@ export interface ChainableLogger { step: (...args: any[]) => void dim: (...args: any[]) => void success: (...args: any[]) => void + warn: (...args: any[]) => void opts: (...args: any[]) => void + info: (...args: any[]) => void wait: (...args: any[]) => void final: (...args: any[]) => void } \ No newline at end of file diff --git a/src/types/process.ts b/src/types/process.ts index 54f8f11..c6de884 100644 --- a/src/types/process.ts +++ b/src/types/process.ts @@ -3,6 +3,26 @@ import type { TranscriptServices, WhisperModelType } from './transcription' import type { LLMServices } from './llms' +/** + * @interface EpisodeMetadata + * @property {string} [showLink] + * @property {string} [channel] + * @property {string} [channelURL] + * @property {string} [title] + * @property {string} [description] + * @property {string} [publishDate] + * @property {string} [coverImage] + */ +export interface EpisodeMetadata { + showLink?: string + channel?: string + channelURL?: string + title?: string + description?: string + publishDate?: string + coverImage?: string +} + /** * @description Pre-handler to override environment variables from request body if provided. * This ensures that API keys can be passed in the request and used for the session, @@ -184,7 +204,7 @@ export type HandlerFunction = ( input: string, llmServices?: LLMServices, transcriptServices?: TranscriptServices -) => Promise | Promise +) => Promise | Promise | Promise // Content Types /** diff --git a/src/types/transcription.ts b/src/types/transcription.ts index 515037a..322979c 100644 --- a/src/types/transcription.ts +++ b/src/types/transcription.ts @@ -23,7 +23,7 @@ export type WhisperModelType = 'tiny' | 'tiny.en' | 'base' | 'base.en' | 'small' export type WhisperRunner = ( finalPath: string, whisperModel: string -) => Promise +) => Promise /** * Response structure from Deepgram API. diff --git a/src/utils/llm-models.ts b/src/utils/llm-globals.ts similarity index 92% rename from src/utils/llm-models.ts rename to src/utils/llm-globals.ts index 690aa95..83f6614 100644 --- a/src/utils/llm-models.ts +++ b/src/utils/llm-globals.ts @@ -1,4 +1,14 @@ -// src/utils/llm-models.ts +// src/utils/llm-globals.ts + +import { callOllama } from '../llms/ollama' +import { callChatGPT } from '../llms/chatgpt' +import { callClaude } from '../llms/claude' +import { callGemini } from '../llms/gemini' +import { callCohere } from '../llms/cohere' +import { callMistral } from '../llms/mistral' +import { callFireworks } from '../llms/fireworks' +import { callTogether } from '../llms/together' +import { callGroq } from '../llms/groq' import type { ModelConfig, @@ -11,8 +21,22 @@ import type { TogetherModelType, FireworksModelType, GroqModelType, + LLMFunctions } from '../types/llms' +// Map of available LLM service handlers +export const LLM_FUNCTIONS: LLMFunctions = { + ollama: callOllama, + chatgpt: callChatGPT, + claude: callClaude, + gemini: callGemini, + cohere: callCohere, + mistral: callMistral, + fireworks: callFireworks, + together: callTogether, + groq: callGroq, +} + /** * Configuration for Ollama models, mapping model types to their display names and identifiers. * Each model has a human-readable name and a corresponding model identifier used for API calls. diff --git a/src/utils/logging.ts b/src/utils/logging.ts index ecada34..b58585b 100644 --- a/src/utils/logging.ts +++ b/src/utils/logging.ts @@ -5,7 +5,7 @@ import type { ModelConfigValue } from '../types/llms' import type { TokenUsage, CostCalculation, APILogInfo, ChainableLogger } from '../types/logging' import { GPT_MODELS, CLAUDE_MODELS, GEMINI_MODELS, COHERE_MODELS, MISTRAL_MODELS, OLLAMA_MODELS, FIREWORKS_MODELS, TOGETHER_MODELS, GROQ_MODELS -} from './llm-models' +} from './llm-globals' import chalk from 'chalk' /** @@ -176,7 +176,9 @@ function createChainableLogger(): ChainableLogger { step: (...args: any[]) => console.log(chalk.bold.underline(...args)), dim: (...args: any[]) => console.log(chalk.dim(...args)), success: (...args: any[]) => console.log(chalk.bold.blue(...args)), + warn: (...args: any[]) => console.log(chalk.bold.yellow(...args)), opts: (...args: any[]) => console.log(chalk.magentaBright.bold(...args)), + info: (...args: any[]) => console.log(chalk.magentaBright.bold(...args)), wait: (...args: any[]) => console.log(chalk.bold.cyan(...args)), final: (...args: any[]) => console.log(chalk.bold.italic(...args)), }) @@ -198,7 +200,9 @@ function createChainableErrorLogger(): ChainableLogger { step: (...args: any[]) => console.error(chalk.bold.underline(...args)), dim: (...args: any[]) => console.error(chalk.dim(...args)), success: (...args: any[]) => console.error(chalk.bold.blue(...args)), + warn: (...args: any[]) => console.error(chalk.bold.yellow(...args)), opts: (...args: any[]) => console.error(chalk.magentaBright.bold(...args)), + info: (...args: any[]) => console.error(chalk.magentaBright.bold(...args)), wait: (...args: any[]) => console.error(chalk.bold.cyan(...args)), final: (...args: any[]) => console.error(chalk.bold.italic(...args)), }) diff --git a/src/utils/retry.ts b/src/utils/retry.ts new file mode 100644 index 0000000..fa06ed9 --- /dev/null +++ b/src/utils/retry.ts @@ -0,0 +1,37 @@ +// src/utils/retry.ts + +import { l, err } from './logging' + +/** + * Retries a given LLM call with the specified maximum attempts and delay between retries. + * + * @param {() => Promise} fn - The function to execute for the LLM call + * @param {number} maxRetries - The maximum number of retry attempts + * @param {number} delayBetweenRetries - Delay in milliseconds between retry attempts + * @returns {Promise} Resolves when the function succeeds or rejects after max attempts + */ +export async function retryLLMCall( + fn: () => Promise, + maxRetries: number, + delayBetweenRetries: number +): Promise { + let attempt = 0 + + while (attempt < maxRetries) { + try { + attempt++ + l.wait(` Attempt ${attempt} - Processing LLM call...\n`) + await fn() + l.wait(`\n LLM call completed successfully on attempt ${attempt}.`) + return + } catch (error) { + err(` Attempt ${attempt} failed: ${(error as Error).message}`) + if (attempt >= maxRetries) { + err(` Max retries (${maxRetries}) reached. Aborting LLM processing.`) + throw error + } + l.wait(` Retrying in ${delayBetweenRetries / 1000} seconds...`) + await new Promise((resolve) => setTimeout(resolve, delayBetweenRetries)) + } + } +} \ No newline at end of file diff --git a/src/utils/validate-option.ts b/src/utils/validate-option.ts index 6898620..39a4b94 100644 --- a/src/utils/validate-option.ts +++ b/src/utils/validate-option.ts @@ -1,17 +1,124 @@ // src/utils/validate-option.ts import { exit } from 'node:process' -import { err } from '../utils/logging' +import { spawn } from 'node:child_process' import { processVideo } from '../process-commands/video' import { processPlaylist } from '../process-commands/playlist' import { processChannel } from '../process-commands/channel' import { processURLs } from '../process-commands/urls' import { processFile } from '../process-commands/file' import { processRSS } from '../process-commands/rss' +import { l, err } from '../utils/logging' import { ACTION_OPTIONS, LLM_OPTIONS, TRANSCRIPT_OPTIONS, otherOptions } from '../utils/globals' import type { ProcessingOptions, ValidAction, HandlerFunction, ProcessRequestBody } from '../types/process' import type { TranscriptServices } from '../types/transcription' -import type { LLMServices } from '../types/llms' +import type { LLMServices, OllamaTagsResponse } from '../types/llms' + +/** + * checkServerAndModel() + * --------------------- + * Checks if the Ollama server is running, attempts to start it if not running, + * and ensures that the specified model is available. If not, it will pull the model. + * + * @param {string} ollamaHost - The Ollama host + * @param {string} ollamaPort - The Ollama port + * @param {string} ollamaModelName - The Ollama model name + * @returns {Promise} + */ +export async function checkServerAndModel( + ollamaHost: string, + ollamaPort: string, + ollamaModelName: string +): Promise { + async function checkServer(): Promise { + try { + const serverResponse = await fetch(`http://${ollamaHost}:${ollamaPort}`) + return serverResponse.ok + } catch (error) { + return false + } + } + + if (await checkServer()) { + l.wait('\n Ollama server is already running...') + } else { + if (ollamaHost === 'ollama') { + throw new Error('Ollama server is not running. Please ensure the Ollama server is running and accessible.') + } else { + l.wait('\n Ollama server is not running. Attempting to start...') + const ollamaProcess = spawn('ollama', ['serve'], { + detached: true, + stdio: 'ignore', + }) + ollamaProcess.unref() + + let attempts = 0 + while (attempts < 30) { + if (await checkServer()) { + l.wait(' - Ollama server is now ready.') + break + } + await new Promise((resolve) => setTimeout(resolve, 1000)) + attempts++ + } + if (attempts === 30) { + throw new Error('Ollama server failed to become ready in time.') + } + } + } + + l.wait(`\n Checking if model is available: ${ollamaModelName}`) + try { + const tagsResponse = await fetch(`http://${ollamaHost}:${ollamaPort}/api/tags`) + if (!tagsResponse.ok) { + throw new Error(`HTTP error! status: ${tagsResponse.status}`) + } + const tagsData = (await tagsResponse.json()) as OllamaTagsResponse + const isModelAvailable = tagsData.models.some((m) => m.name === ollamaModelName) + + if (!isModelAvailable) { + l.wait(`\n Model ${ollamaModelName} is not available, pulling...`) + const pullResponse = await fetch(`http://${ollamaHost}:${ollamaPort}/api/pull`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ name: ollamaModelName }), + }) + if (!pullResponse.ok) { + throw new Error(`Failed to initiate pull for model ${ollamaModelName}`) + } + if (!pullResponse.body) { + throw new Error('Response body is null') + } + + const reader = pullResponse.body.getReader() + const decoder = new TextDecoder() + while (true) { + const { done, value } = await reader.read() + if (done) break + + const chunk = decoder.decode(value) + const lines = chunk.split('\n') + for (const line of lines) { + if (line.trim() === '') continue + try { + const parsedLine = JSON.parse(line) + if (parsedLine.status === 'success') { + l.wait(` - Model ${ollamaModelName} pulled successfully.\n`) + break + } + } catch (parseError) { + err(`Error parsing JSON while pulling model: ${parseError}`) + } + } + } + } else { + l.wait(`\n Model ${ollamaModelName} is already available.\n`) + } + } catch (error) { + err(`Error checking/pulling model: ${(error as Error).message}`) + throw error + } +} // Map each action to its corresponding handler function export const PROCESS_HANDLERS: Record = { diff --git a/test/local.test.ts b/test/local.test.ts index a54a331..cc210e0 100644 --- a/test/local.test.ts +++ b/test/local.test.ts @@ -9,13 +9,13 @@ import { join } from 'node:path' const commands = [ { // Process a single YouTube video using Autoshow's default settings. - cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk"', + cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisper tiny', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', newName: '01-video-default.md' }, { // Process all videos in a specified YouTube playlist. - cmd: 'npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr"', + cmd: 'npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" --whisper tiny', expectedFiles: [ { file: '2024-09-24-ep1-fsjam-podcast-prompt.md', newName: '02-playlist-default.md' }, { file: '2024-09-24-ep0-fsjam-podcast-prompt.md', newName: '03-playlist-default.md' } @@ -23,7 +23,7 @@ const commands = [ }, { // Process multiple YouTube videos from URLs listed in a file. - cmd: 'npm run as -- --urls "content/example-urls.md"', + cmd: 'npm run as -- --urls "content/example-urls.md" --whisper tiny', expectedFiles: [ { file: '2024-09-24-ep1-fsjam-podcast-prompt.md', newName: '04-urls-default.md' }, { file: '2024-09-24-ep0-fsjam-podcast-prompt.md', newName: '05-urls-default.md' } @@ -31,7 +31,7 @@ const commands = [ }, { // Process a single local audio file. - cmd: 'npm run as -- --file "content/audio.mp3"', + cmd: 'npm run as -- --file "content/audio.mp3" --whisper tiny', expectedFile: 'audio-prompt.md', newName: '06-file-default.md' }, @@ -49,7 +49,7 @@ const commands = [ }, { // Process a local audio file with all available prompt options (except smallChapters and longChapters) - cmd: 'npm run as -- --file "content/audio.mp3" --prompt titles summary mediumChapters takeaways questions', + cmd: 'npm run as -- --file "content/audio.mp3" --prompt titles summary mediumChapters takeaways questions --whisper tiny', expectedFile: 'audio-prompt.md', newName: '09-all-prompts.md' }, @@ -59,33 +59,11 @@ const commands = [ expectedFile: 'audio-ollama-shownotes.md', newName: '10-all-prompts-ollama-shownotes.md' }, - { - // Process playlist videos with titles and longChapters prompts, tiny Whisper model, and Ollama for LLM processing. - cmd: 'npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" --prompt titles longChapters --whisper tiny --ollama LLAMA_3_2_1B', - expectedFiles: [ - { file: '2024-09-24-ep1-fsjam-podcast-ollama-shownotes.md', newName: '11-prompt-whisper-ollama-shownotes.md' }, - { file: '2024-09-24-ep0-fsjam-podcast-ollama-shownotes.md', newName: '12-prompt-whisper-ollama-shownotes.md' } - ] - }, - { - // Process multiple YouTube videos from URLs with title prompts, Whisper 'tiny' model, and Ollama. - cmd: 'npm run as -- --urls "content/example-urls.md" --prompt titles --whisper tiny --ollama LLAMA_3_2_1B', - expectedFiles: [ - { file: '2024-09-24-ep1-fsjam-podcast-ollama-shownotes.md', newName: '13-prompt-whisper-ollama-shownotes.md' }, - { file: '2024-09-24-ep0-fsjam-podcast-ollama-shownotes.md', newName: '14-prompt-whisper-ollama-shownotes.md' } - ] - }, - { - // Process podcast RSS feed from default order. - cmd: 'npm run as -- --rss "https://ajcwebdev.substack.com/feed" --whisper tiny', - expectedFile: '2021-05-10-thoughts-on-lambda-school-layoffs-prompt.md', - newName: '15-rss-whisper-tiny.md' - }, { // Download JSON file with metadata for each item in the RSS feed. cmd: 'npm run as -- --rss "https://ajcwebdev.substack.com/feed" --info', expectedFile: 'ajcwebdev_info.json', - newName: '16-ajcwebdev-rss-info.json', + newName: '11-ajcwebdev-rss-info.json', }, ]