-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsoundcloud_rss_import.js
188 lines (167 loc) · 6.48 KB
/
soundcloud_rss_import.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
// User-configurable inputs
let settings = input.config({
title: "Fetch SoundCloud RSS Feed",
description: "This script fetches the SoundCloud RSS feed and updates your Airtable table with the latest tracks.",
items: [
input.config.text("rssUrl", {
label: "🔗 SoundCloud RSS Feed URL",
description: "Enter the RSS feed URL for the SoundCloud account",
}),
input.config.table("table", {
label: "📄 Table",
description: "Select the table where the data will be stored",
}),
input.config.field("linkField", {
parentTable: "table",
label: "🔗 Link Field",
description: "Select the field to store the track URL",
}),
input.config.field("titleField", {
parentTable: "table",
label: "🎵 Title Field",
description: "Select the field to store the track title",
}),
input.config.field("durationField", {
parentTable: "table",
label: "⏳ Duration Field",
description: "Select the field to store the track duration",
}),
input.config.field("playsField", {
parentTable: "table",
label: "▶️ Plays Field",
description: "Select the field to store the play count",
}),
input.config.field("commentsField", {
parentTable: "table",
label: "💬 Comments Field",
description: "Select the field to store the comment count",
}),
input.config.field("pubDateField", {
parentTable: "table",
label: "📅 Publication Date Field",
description: "Select the field to store the publication date",
}),
input.config.field("contentField", {
parentTable: "table",
label: "📝 Content Field",
description: "Select the field to store the full description",
}),
input.config.field("contentSnippetField", {
parentTable: "table",
label: "✂️ Content Snippet Field",
description: "Select the field to store the shortened description",
}),
input.config.field("guidField", {
parentTable: "table",
label: "🆔 GUID Field",
description: "Select the field to store the unique identifier",
}),
],
});
let {
rssUrl,
table,
linkField,
titleField,
durationField,
playsField,
commentsField,
pubDateField,
contentField,
contentSnippetField,
guidField,
} = settings;
// Function to fetch the RSS feed
async function fetchRSSFeed(url) {
let response = await remoteFetchAsync(url);
if (!response.ok) throw new Error('Failed to fetch the RSS feed.');
let rssText = await response.text();
return rssText;
}
// Function to extract text content from an XML element manually
function extractValue(xmlString, tagName) {
const regex = new RegExp(`<${tagName}>(.*?)</${tagName}>`, 'g');
const match = regex.exec(xmlString);
return match ? match[1] : '';
}
// Function to fetch existing records from Airtable
async function fetchExistingRecords(table, linkField) {
let query = await table.selectRecordsAsync();
let records = {};
for (let record of query.records) {
let link = record.getCellValue(linkField);
if (link) {
records[link] = record;
}
}
return records;
}
// Function to scrape SoundCloud stats directly from the track page
async function scrapeSoundCloudStats(trackUrl) {
let response = await remoteFetchAsync(trackUrl);
if (!response.ok) throw new Error('Failed to fetch track page.');
let htmlText = await response.text();
// Play count
let playCountMatch = htmlText.match(/"playback_count":(\d+)/);
let playCount = playCountMatch ? parseInt(playCountMatch[1]) : 0;
// Comment count
let commentCountMatch = htmlText.match(/"comment_count":(\d+)/);
let commentCount = commentCountMatch ? parseInt(commentCountMatch[1]) : 0;
return {
plays: playCount,
comments: commentCount
};
}
// Function to update the output table in real-time
function updateProgress(progressData) {
output.clear();
output.table(progressData);
}
// Main script logic
async function main() {
try {
// Fetch existing records from Airtable
let existingRecords = await fetchExistingRecords(table, linkField);
// Fetch and parse the RSS feed
let rssText = await fetchRSSFeed(rssUrl);
let items = rssText.match(/<item>[\s\S]*?<\/item>/g) || [];
let progressData = [];
for (let item of items) {
let link = extractValue(item, "link");
let title = extractValue(item, "title");
let description = extractValue(item, "description");
let pubDate = extractValue(item, "pubDate");
let guid = extractValue(item, "guid");
let duration = extractValue(item, "itunes:duration") || 0; // Adjust if duration is present elsewhere
// Scrape play count and comment count directly from the SoundCloud page
let { plays, comments } = await scrapeSoundCloudStats(link);
let contentSnippet = description.substring(0, 100); // Example snippet logic
let fields = {
[titleField.name]: title,
[linkField.name]: link,
[guidField.name]: guid,
[durationField.name]: duration,
[contentField.name]: description,
[contentSnippetField.name]: contentSnippet,
[pubDateField.name]: new Date(pubDate),
[playsField.name]: plays,
[commentsField.name]: comments,
};
if (existingRecords[link]) {
// Update existing record
await table.updateRecordAsync(existingRecords[link].id, fields);
progressData.push({ Status: "Updated", Link: link, Title: title });
} else {
// Create a new record
await table.createRecordAsync({ fields });
progressData.push({ Status: "New Record", Link: link, Title: title });
}
// Update progress after each record
updateProgress(progressData);
}
output.text(`Script completed. Processed ${progressData.length} records.`);
} catch (error) {
output.text(`Error: ${error.message}`);
}
}
await main();