-
Notifications
You must be signed in to change notification settings - Fork 222
/
Copy pathyoutube_agent_utils.py
183 lines (162 loc) · 6.81 KB
/
youtube_agent_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# imports
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
import os
import google.oauth2.credentials
from googleapiclient.http import MediaFileUpload
import urllib.request
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime, timezone, timedelta
import pytz
import pandas as pd
from time import sleep
import seaborn as sns
from openai import OpenAI
import pandas as pd
import re
from datetime import datetime
OPEN_AI_API_KEY = "SET_OPEN_AI_API_KEY_HERE"
CHANNEL_ID = 'SET_YOUR_YOUTUBE_CHANNEL_ID_HERE'
def get_authenticated_youtube_api():
flow = InstalledAppFlow.from_client_secrets_file(
'/path/to/your/client_secret.json',
scopes=['https://www.googleapis.com/auth/youtube']
)
# If credentials don't exist, open a web browser to authenticate
if not os.path.exists('credentials.json'):
credentials = flow.run_local_server(port=0)
with open('credentials.json', 'w') as credentials_file:
credentials_file.write(credentials.to_json())
else:
credentials = google.oauth2.credentials.Credentials.from_authorized_user_file('credentials.json')
youtube = build("youtube", "v3", credentials=credentials)
return youtube
def get_views_snippet(youtube, video_id):
video_info = youtube.videos().list(
id=video_id,
part='snippet,statistics'
).execute()
views = video_info['items'][0]['statistics']['viewCount']
snippet = video_info['items'][0]['snippet']
return int(views), snippet
def update_video_title(youtube, video_id, new_title):
views, snippet = get_views_snippet(youtube, video_id)
snippet['title'] = new_title
youtube.videos().update(
part="snippet",
body={
"id": f"{video_id}",
"snippet": snippet
}
).execute()
def get_last_n_videos_with_views(youtube, n):
"""
Fetch the last 10 videos from a YouTube channel with their view counts.
Args:
youtube: Authenticated YouTube API client.
Returns:
A list of dictionaries containing video titles, URLs, and view counts.
"""
try:
# Step 1: Fetch the last n videos using `search.list`
search_request = youtube.search().list(
part="snippet",
channelId=CHANNEL_ID,
maxResults=n,
order="date",
type="video"
)
search_response = search_request.execute()
# Step 2: Extract video IDs
video_ids = [item["id"]["videoId"] for item in search_response.get("items", [])]
if not video_ids:
print("No videos found for the specified channel.")
return []
# Step 3: Fetch video statistics using `videos.list`
videos_request = youtube.videos().list(
part="snippet,statistics",
id=",".join(video_ids)
)
videos_response = videos_request.execute()
# Step 4: Process the response
videos = []
for item in videos_response.get("items", []):
title = item["snippet"]["title"]
publishedAt = datetime.strptime(item["snippet"]["publishedAt"], '%Y-%m-%dT%H:%M:%SZ')
utc_datetime = publishedAt.replace(tzinfo=timezone.utc)
pdt_datetime = utc_datetime.astimezone(pytz.timezone('America/Los_Angeles'))
delta = (datetime.now(pytz.timezone('America/Los_Angeles')) - pdt_datetime).total_seconds()
video_id = item["id"]
view_count = int(item["statistics"].get("viewCount", "0"))
like_count = int(item["statistics"].get("likeCount", "0"))
dislike_count = int(item["statistics"].get("dislikeCount", "0"))
comment_count = int(item["statistics"].get("commentCount", "0"))
views_per_day = view_count / (delta / 3600 / 24)
videos.append({
"title": title,
"publishedAt": pdt_datetime,
"publishedDaysAgo": delta / 3600 / 24,
"url": f"https://www.youtube.com/watch?v={video_id}",
"views": view_count,
"views_per_day": views_per_day,
"likes": like_count,
"dislikes": dislike_count,
"like_dislike_ratio": like_count / dislike_count,
"comments": comment_count
})
return videos
except Exception as e:
print(f"An error occurred: {e}")
return []
def get_openai_client():
client = OpenAI(api_key=OPEN_AI_API_KEY)
return client
def chat(client, messages):
completion = client.chat.completions.create(
model="gpt-4o",
store=True,
messages=messages,
temperature=0
)
return completion.choices[0].message.content
def get_messages(last_n_videos, user_input):
last_n = len(last_n_videos)
messages = []
messages.append(
{
'role': 'system',
'content':
f"""
You are an assistant who is an expert in generating video titles for YouTube videos which are likely to get lots of engagement.
You will be provided below with information about the last {last_n} videos posted by the YouTube channel ritvikmath.
These videos will be in the VIDEO_DATA section at the end of this prompt.
This channel focusses on data science, statistics, and mathematics educational videos.
Each item in the provided list below has the following schema:
- title: the title of the video
- publishedAt: the datetime when this video was first published
- url: the url of the video
- views: the current number of views of the video
- views_per_day: the number of views this video got per day so far
- likes: the number of likes the video got
- dislikes: the number of dislikes the video got
- like_dislike_ratio: the ratio of number of likes to number of dislikes
- comments: the number of dislikes the video got
The user will provide a description of what the a new video is about.
Your job is to use the strongly-performing videos from the provided data to suggest a strong title for this new video.
By "strong", we mean a video title that is more likely to get engagement.
Please output the new title as well as your reasoning in the following json format:
{{
new_title: the suggested new title,
reasoning: the reasoning for this new title
}}
The reasoning should reference one or more videos provided in the data above.
The reasoning should be 75 words or fewer.
Return the output as raw JSON without any Markdown formatting or additional text.
VIDEO_DATA:
{last_n_videos}
"""
}
)
messages.append({"role": "user", "content": user_input})
return messages