Merge pull request #97 from ajcwebdev/io

Refactor, remove unnecessary read and write file operations
ajcwebdev · Jan 4, 2025 · f45f08c · f45f08c
2 parents da5b550 + 729d3f7
commit f45f08c
Show file tree

Hide file tree

Showing 35 changed files with 784 additions and 942 deletions.
diff --git a/docs/examples.md b/docs/examples.md
@@ -3,11 +3,11 @@
 ## Outline
 
 - [Content and Feed Inputs](#content-and-feed-inputs)
+  - [Process Single Audio or Video File](#process-single-audio-or-video-file)
   - [Process Single Video URLs](#process-single-video-urls)
+  - [Process Multiple Videos Specified in a URLs File](#process-multiple-videos-specified-in-a-urls-file)
   - [Process Multiple Videos in YouTube Playlist](#process-multiple-videos-in-youtube-playlist)
   - [Process All Videos from a YouTube Channel](#process-all-videos-from-a-youtube-channel)
-  - [Process Multiple Videos Specified in a URLs File](#process-multiple-videos-specified-in-a-urls-file)
-  - [Process Single Audio or Video File](#process-single-audio-or-video-file)
   - [Process Podcast RSS Feed](#process-podcast-rss-feed)
 - [Transcription Options](#transcription-options)
   - [Whisper](#whisper)
@@ -30,39 +30,56 @@
 
 ## Content and Feed Inputs
 
+### Process Single Audio or Video File
+
+Run on `audio.mp3` on the `content` directory:
+
+```bash
+npm run as -- --file "content/audio.mp3"
+```
+
 ### Process Single Video URLs
 
 Run on a single YouTube video.
 
 ```bash
-npm run as -- \
-  --video "https://www.youtube.com/watch?v=MORMZXEaONk"
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk"
+```
+
+### Process Multiple Videos Specified in a URLs File
+
+Run on an arbitrary list of URLs in `example-urls.md`.
+
+```bash
+npm run as -- --urls "content/example-urls.md"
+```
+
+Run on URLs file and generate JSON info file with markdown metadata of each video:
+
+```bash
+npm run as -- --info --urls "content/example-urls.md"
 ```
 
 ### Process Multiple Videos in YouTube Playlist
 
 Run on multiple YouTube videos in a playlist.
 
 ```bash
-npm run as -- \
-  --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr"
+npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr"
 ```
 
 Run on playlist URL and generate JSON info file with markdown metadata of each video in the playlist:
 
 ```bash
-npm run as -- \
-  --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" \
-  --info
+npm run as -- --info --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr"
 ```
 
 ### Process All Videos from a YouTube Channel
 
 Process all videos from a YouTube channel (both live and non-live):
 
 ```bash
-npm run as -- \
-  --channel "https://www.youtube.com/@ajcwebdev"
+npm run as -- --channel "https://www.youtube.com/@ajcwebdev"
 ```
 
 Process videos starting from the oldest instead of newest:
@@ -92,9 +109,7 @@ npm run as -- \
 Run on a YouTube channel and generate JSON info file with markdown metadata of each video:
 
 ```bash
-npm run as -- \
-  --channel "https://www.youtube.com/@ajcwebdev" \
-  --info
+npm run as -- --info --channel "https://www.youtube.com/@ajcwebdev"
 ```
 
 #### Advanced Channel Example
@@ -124,39 +139,12 @@ Here’s what’s happening in this single command:
 7. **Prompt**: Generates both a summary and short chapter descriptions (`--prompt summary shortChapters`).
 8. **No Clean Up**: Keeps any intermediary or downloaded files around (`--noCleanUp`) so you can inspect them after the run.
 
-### Process Multiple Videos Specified in a URLs File
-
-Run on an arbitrary list of URLs in `example-urls.md`.
-
-```bash
-npm run as -- \
-  --urls "content/example-urls.md"
-```
-
-Run on URLs file and generate JSON info file with markdown metadata of each video:
-
-```bash
-npm run as -- \
-  --urls "content/example-urls.md" \
-  --info
-```
-
-### Process Single Audio or Video File
-
-Run on `audio.mp3` on the `content` directory:
-
-```bash
-npm run as -- \
-  --file "content/audio.mp3"
-```
-
 ### Process Podcast RSS Feed
 
 Process RSS feed from newest to oldest (default behavior):
 
 ```bash
-npm run as -- \
-  --rss "https://ajcwebdev.substack.com/feed"
+npm run as -- --rss "https://ajcwebdev.substack.com/feed"
 ```
 
 Process RSS feed from oldest to newest:

diff --git a/package.json b/package.json
@@ -18,6 +18,7 @@
   "scripts": {
     "tsx:base": "tsx --env-file=.env --no-warnings --experimental-sqlite",
     "setup": "bash ./scripts/setup.sh",
+    "setup-docker": "docker build -t autoshow -f .github/Dockerfile .",
     "docker-setup": "docker build -t autoshow -f .github/Dockerfile .",
     "setup-all": "npm run setup && npm run docker-setup",
     "as": "npm run tsx:base -- src/cli/commander.ts",
@@ -51,7 +52,7 @@
     "test-services": "tsx --test test/services.test.ts",
     "test-all": "tsx --test test/all.test.ts",
     "ta": "tsx --test test/all.test.ts",
-    "clean": "tsx scripts/cleanContent.ts",
+    "clean": "npm run tsx:base scripts/cleanContent.ts",
     "docker-cli": "docker run --rm --env-file .env -v $PWD/content:/usr/src/app/content autoshow",
     "docker-serve": "docker run -d -p 3000:3000 -v $PWD/content:/usr/src/app/content autoshow serve",
     "prune": "docker system prune -af --volumes && docker image prune -af && docker container prune -f && docker volume prune -af",

diff --git a/src/llms/chatgpt.ts b/src/llms/chatgpt.ts
@@ -1,54 +1,48 @@
 // src/llms/chatgpt.ts
 
-import { writeFile } from 'node:fs/promises'
 import { env } from 'node:process'
 import { OpenAI } from 'openai'
-import { GPT_MODELS } from '../utils/llm-models'
+import { GPT_MODELS } from '../utils/llm-globals'
 import { err, logAPIResults } from '../utils/logging'
 import type { LLMFunction, ChatGPTModelType } from '../types/llms'
 
 /**
  * Main function to call ChatGPT API.
- * @param promptAndTranscript - The combined prompt and transcript text to process.
- * @param tempPath - The temporary file path to write the LLM output.
- * @param model - The GPT model to use.
- * @returns A Promise that resolves when the API call is complete.
+ * @param {string} prompt - The prompt or instructions to process.
+ * @param {string} transcript - The transcript text.
+ * @param {string} tempPath - (unused) The temporary file path (no longer used).
+ * @param {string} [model] - The GPT model to use.
+ * @returns {Promise<string>} A Promise that resolves with the generated text.
  * @throws {Error} If an error occurs during API call.
  */
 export const callChatGPT: LLMFunction = async (
-  promptAndTranscript: string,
-  tempPath: string,
+  prompt: string,
+  transcript: string,
   model: string = 'GPT_4o_MINI'
-): Promise<void> => {
-  // Check for API key
+): Promise<string> => {
   if (!env['OPENAI_API_KEY']) {
     throw new Error('OPENAI_API_KEY environment variable is not set. Please set it to your OpenAI API key.')
   }
 
-  // Initialize the OpenAI client with the API key from environment variables
   const openai = new OpenAI({ apiKey: env['OPENAI_API_KEY'] })
-  
+
   try {
-    // Select the actual model to use, defaulting to GPT_4o_MINI if not specified
     const actualModel = (GPT_MODELS[model as ChatGPTModelType] || GPT_MODELS.GPT_4o_MINI).modelId
-
-    // Call the OpenAI chat completions API
+    const combinedPrompt = `${prompt}\n${transcript}`
+
     const response = await openai.chat.completions.create({
       model: actualModel,
       max_completion_tokens: 4000,
-      messages: [{ role: 'user', content: promptAndTranscript }],
+      messages: [{ role: 'user', content: combinedPrompt }],
     })
 
-    // Check if we have a valid response
     const firstChoice = response.choices[0]
     if (!firstChoice || !firstChoice.message?.content) {
       throw new Error('No valid response received from the API')
     }
 
-    // Write the generated content to the output file
-    await writeFile(tempPath, firstChoice.message.content)
-
-    // Log API results using the standardized logging function
+    const content = firstChoice.message.content
+
     logAPIResults({
       modelName: actualModel,
       stopReason: firstChoice.finish_reason ?? 'unknown',
@@ -58,6 +52,8 @@ export const callChatGPT: LLMFunction = async (
         total: response.usage?.total_tokens
       }
     })
+
+    return content
   } catch (error) {
     err(`Error in callChatGPT: ${(error as Error).message}`)
     throw error

diff --git a/src/llms/claude.ts b/src/llms/claude.ts
@@ -1,55 +1,47 @@
 // src/llms/claude.ts
 
-import { writeFile } from 'node:fs/promises'
 import { env } from 'node:process'
 import { Anthropic } from '@anthropic-ai/sdk'
-import { CLAUDE_MODELS } from '../utils/llm-models'
+import { CLAUDE_MODELS } from '../utils/llm-globals'
 import { err, logAPIResults } from '../utils/logging'
 import type { LLMFunction, ClaudeModelType } from '../types/llms'
 
 /**
  * Main function to call Claude API.
- * @param promptAndTranscript - The combined prompt and transcript text to process.
- * @param tempPath - The temporary file path to write the LLM output.
- * @param model - The Claude model to use.
- * @returns A Promise that resolves when the API call is complete.
+ * @param {string} prompt - The prompt or instructions to process.
+ * @param {string} transcript - The transcript text.
+ * @param {string} tempPath - (unused) The temporary file path (no longer used).
+ * @param {string} [model] - The Claude model to use.
+ * @returns {Promise<string>} A Promise that resolves with the generated text.
  * @throws {Error} If an error occurs during the API call.
  */
 export const callClaude: LLMFunction = async (
-  promptAndTranscript: string,
-  tempPath: string,
+  prompt: string,
+  transcript: string,
   model: string = 'CLAUDE_3_HAIKU'
-): Promise<void> => {
-  // Check if the ANTHROPIC_API_KEY environment variable is set
+): Promise<string> => {
   if (!env['ANTHROPIC_API_KEY']) {
     throw new Error('ANTHROPIC_API_KEY environment variable is not set. Please set it to your Anthropic API key.')
   }
 
-  // Initialize the Anthropic client with the API key from environment variables
   const anthropic = new Anthropic({ apiKey: env['ANTHROPIC_API_KEY'] })
 
   try {
-    // Select the actual model to use, defaulting to CLAUDE_3_HAIKU if not specified
     const actualModel = (CLAUDE_MODELS[model as ClaudeModelType] || CLAUDE_MODELS.CLAUDE_3_HAIKU).modelId
-
-    // Call the Anthropic messages API to create a chat completion
+    const combinedPrompt = `${prompt}\n${transcript}`
+
     const response = await anthropic.messages.create({
       model: actualModel,
-      max_tokens: 4000, // Maximum number of tokens in the response
-      messages: [{ role: 'user', content: promptAndTranscript }] // The input message (transcript content)
+      max_tokens: 4000,
+      messages: [{ role: 'user', content: combinedPrompt }]
     })
-
-    // Extract text content from the response
+
     const textContent = extractTextContent(response.content)
-
-    // Write the generated text to the output file
-    if (textContent) {
-      await writeFile(tempPath, textContent)
-    } else {
+
+    if (!textContent) {
       throw new Error('No text content generated from the API')
     }
-
-    // Log API results using the standardized logging function
+
     logAPIResults({
       modelName: actualModel,
       stopReason: response.stop_reason ?? 'unknown',
@@ -59,9 +51,11 @@ export const callClaude: LLMFunction = async (
         total: response.usage.input_tokens + response.usage.output_tokens
       }
     })
+
+    return textContent
   } catch (error) {
     err(`Error in callClaude: ${(error as Error).message}`)
-    throw error // Re-throw the error for handling in the calling function
+    throw error
   }
 }