-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit ffa0c08
Showing
3 changed files
with
315 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
#----------------------------------------------------------------------------------------------------------------------- | ||
|
||
# AS A DEADHEAD, ARCHIVE.ORG IS AN INVALUABLE RESOURCE. WE ARE ALL FAMILIAR WITH SHOWS THAT ALLOW YOU TO DOWNLOAD FLACS OR MP3S, | ||
# AND FOR 'STREAM ONLY' SHOWS THERE IS OUR BELOVED GRATEFUL GRABBER. BUT WHAT IF YOU WANT EVERY SHOW? YES, EVERY SHOW. OR EVERY SHOW OF A PARTICULAR YEAR. | ||
# PARANOID BY NATURE, I BEGAN IMAGINING A DISASTER SCENARIO WHERE THE ZOMBIE (OR OTHER) APOCALYPSE TAKES DOWN THE INTERNET, | ||
# AND WITH IT, OUR BELOVED ARCHIVE. IN THAT BRAVE NEW LANDSCAPE I STILL WANT TO DISCOVER NEW SHOWS AND LISTEN TO THEM AS I PLEASE, | ||
# ON A GENERATOR POWERED STEREO, AS I WAIT FOR THE UNKNOWN. | ||
|
||
# AS OF 11/20/2019 THERE ARE EXACTLY 14000 SHOWS IN THE ARCHIVE'S GRATEFUL DEAD COLLECTION. | ||
# IF YOU HAVE A GOOD INTERNET CONNECTION AND PLENTY OF STORAGE SPACE, YOU CAN DOWNLOAD ALL 14000 SHOWS WITH 7 LINES OF CODE. | ||
# THOUGH SOME MAY FIND IT MORE MANAGEABLE TO SEGMENT AND DOWNLOAD SHOWS BY YEAR. | ||
|
||
# AFTER EXTENSIVE RESEARCH AND MUCH TRIAL AND ERROR I STUMBLED UPON THESE RESOURCES WHICH LED TO THE SOLUTION: | ||
|
||
# ARCHIVE HAS IT'S OWN PYTHON API FOR DOWNLOADING STUFF??? THIS IS WAY EASIER THAN THE BEAUTIFUL SOUP STUFF I WAS TRYING: | ||
# https://gareth.halfacree.co.uk/2013/04/bulk-downloading-collections-from-archive-org | ||
|
||
# HOW TO OBTAIN LISTS OF IDENTIFIERS USED TO DOWNLOAD SHOWS: | ||
# https://blog.archive.org/2012/04/26/downloading-in-bulk-using-wget/ | ||
|
||
# internetarchive API QUICK START GUIDE: | ||
# https://archive.org/services/docs/api/internetarchive/quickstart.html | ||
|
||
# PRE-REQUISITES: | ||
|
||
# DOWNLOAD AND INSTALL PYTHON: https://www.python.org/downloads/ | ||
|
||
# DOWNLOAD A PYTHON IDE (OPTIONAL): https://www.jetbrains.com/pycharm/ | ||
|
||
# GET internetarchive PACKAGE: (from command line) pip install internetarchive | ||
|
||
# CREATE CONFIG FILE WITH ARCHIVE.ORG CREDENTIALS: | ||
|
||
# (from command line) ia configure | ||
|
||
# Enter your archive.org credentials below to configure 'ia'. | ||
# | ||
# Email address: [email protected] | ||
# Password: | ||
# | ||
# Config saved to: /home/user/.config/ia.ini | ||
|
||
# OBTAIN CSV LIST OF ALL SHOWS: | ||
|
||
# GO TO ARCHIVE.ORG: UNDER THE SEARCH BAR CLICK 'Advanced Search'; IN THE UPPER 'ADVANCED SEARCH' BLOCK: SEARCH: | ||
|
||
# AND Collection: is GratefulDead <--must type exactly this | ||
|
||
# HIT SEARCH; THIS REDIRECTS TO SEARCH RESULTS WITH SYNTAX IN SEARCH FIELD: collection:(GratefulDead) | ||
# COPY SEARCH SYNTAX AND GO BACK TO ADVANCED SEARCH PAGE | ||
# | ||
# IN LOWER 'Advanced Search returning JSON, XML, and more' BLOCK: PASTE: collection:(GratefulDead) INTO 'Query' FIELD | ||
# SELECT 'identifier' IN 'Fields to return' list | ||
# CHANGE 'NUMBER OF RESULTS' TO 15000 | ||
# SELECT 'CSV FORMAT' | ||
# HIT SEARCH | ||
# HIT 'OK' ON POP-UP NOTE | ||
# OPEN CSV FILE, DELETE 'IDENTIFIER' COLUMN HEADER | ||
# SAVE, CLOSE | ||
# SET PATH TO CSV FILE (HARDCODE IN .PY FILE) | ||
# SET PATH TO LOCAL DIRECTORY TO SAVE FILES (HARDCODE IN .PY FILE) | ||
# RUN SELECTED .PY FILE FROM COMMAND LINE | ||
# OPTION 1: DOWNLOAD ALL 14000 GRATEFUL DEAD SHOWS AT ONCE: download_all_gd.py | ||
# OPTION 2: SEGMENT AND DOWNLOAD SHOWS BY YEAR: download_gd_by_year.py | ||
|
||
# NOTE TO SELF: COLLECTION NAME FOR DEAD & COMPANY: collection:(DeadAndCompany) | ||
# NOTE TO ALL: YOU CAN FIND THESE 'COLLECTION NAMES' ON ARCHIVE, YOU JUST GOTTA POKE AROUND | ||
|
||
# STATS FROM SEGMENTATION AND DOWNLOAD BY YEAR BASED ON 1975: | ||
|
||
# INTERNET CONNECTION DETAILS: | ||
# DOWNLOAD Mbps: 887.41 | ||
# UPLOAD Mbps: 839.64 | ||
# https://www.speedtest.net/ | ||
|
||
# 1975: 55 shows | ||
# time to download all 1975: 81.11 minutes | ||
# download time per show: 1.47 minutes | ||
# total size of 1975: 6.35 GB | ||
# average file size per show: 115.45 MB | ||
|
||
# Estimate regarding all 14000 shows based on 1975 stats: | ||
# Approx: 1.62 TB TOTAL | ||
# Approx: 344 Total hours to download (14 days!) | ||
# MILEAGE WILL VARY | ||
|
||
# BREAKDOWN: NUMBER OF SHOWS BY YEAR (as of 11/20/2019): | ||
# 65: 3 shows | ||
# 66: 69 shows | ||
# 67: 48 shows | ||
# 68: 113 shows | ||
# 69: 291 shows | ||
# 70: 340 shows | ||
# 71: 307 shows | ||
# 72: 325 shows | ||
# 73: 379 shows | ||
# 74: 270 shows | ||
# 75: 55 shows | ||
# 76: 251 shows | ||
# 77: 352 shows | ||
# 78: 462 shows | ||
# 79: 453 shows | ||
# 80: 613 shows | ||
# 81: 638 shows | ||
# 82: 524 shows | ||
# 83: 812 shows | ||
# 84: 688 shows | ||
# 85: 874 shows | ||
# 86: 497 shows | ||
# 87: 772 shows | ||
# 88: 684 shows | ||
# 89: 854 shows | ||
# 90: 933 shows | ||
# 91: 682 shows | ||
# 92: 406 shows | ||
# 93: 576 shows | ||
# 94: 412 shows | ||
# 95: 315 shows | ||
# gdnrps: 2 shows | ||
# Total: 14000 shows | ||
|
||
# TESTING SOME FUNCTIONALITY IN THE internetarchive API | ||
|
||
#----------------------------------------------------------------------------------------------------------------------- | ||
|
||
# TESTING: | ||
|
||
# DOWNLOADABLE SHOW | ||
|
||
#THIS IS THE IDENTIFIER | ||
#download('gd1977-11-05.aud.zimmerman.minches.81180.sbeok.flac16', verbose=True, glob_pattern='*.mp3') # SUCCESS | ||
|
||
#----------------------------------------------------------------------------------------------------------------------- | ||
|
||
# TESTING: | ||
|
||
# STREAM-ONLY SHOW | ||
|
||
#THIS IS THE IDENTIFIER | ||
#download('gd73-06-10.sbd.hollister.174.sbeok.shnf', verbose=True, glob_pattern='*.mp3', destdir=r"H:\gd") # SUCCESS | ||
|
||
#----------------------------------------------------------------------------------------------------------------------- | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
from internetarchive import get_item | ||
from internetarchive import download | ||
from internetarchive import search_items | ||
import time as t | ||
|
||
#----------------------------------------------------------------------------------------------------------------------- | ||
|
||
# START TIME OF JOB | ||
|
||
start = t.time() | ||
|
||
#----------------------------------------------------------------------------------------------------------------------- | ||
|
||
# READ CSV FILE INTO LIST | ||
|
||
f = open(r"H:\14000.csv") # PATH TO CSV FILE | ||
x = f.readlines() | ||
f.close() | ||
|
||
with open(r"H:\14000.csv") as f: | ||
x = f.read().splitlines() | ||
|
||
#----------------------------------------------------------------------------------------------------------------------- | ||
|
||
# DOWNLOAD ALL 14000 GRATEFUL DEAD SHOWS AT ONCE | ||
|
||
for a in x: | ||
download(a, verbose=True, glob_pattern='*.mp3', destdir=r"C:\Users\username\Desktop\gd") # LOCAL DIRECTORY TO SAVE FILES | ||
|
||
#----------------------------------------------------------------------------------------------------------------------- | ||
|
||
# END TIME OF JOB | ||
|
||
end = t.time() | ||
print('time to complete: ' + str((end - start) / 60) + ' minutes') | ||
|
||
#----------------------------------------------------------------------------------------------------------------------- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
from internetarchive import get_item | ||
from internetarchive import download | ||
from internetarchive import search_items | ||
import time as t | ||
|
||
#----------------------------------------------------------------------------------------------------------------------- | ||
|
||
# START TIME OF JOB | ||
|
||
start = t.time() | ||
|
||
#----------------------------------------------------------------------------------------------------------------------- | ||
|
||
# READ CSV FILE INTO LIST | ||
|
||
f = open(r"H:\14000.csv") # PATH TO CSV FILE | ||
x = f.readlines() | ||
f.close() | ||
|
||
with open(r"H:\14000.csv") as f: | ||
x = f.read().splitlines() | ||
|
||
#----------------------------------------------------------------------------------------------------------------------- | ||
|
||
# SEGMENT BY YEAR | ||
|
||
gd65_matchers = ['gd65', 'gd1965'] | ||
gd65_matching = [s for s in x if any(xs in s for xs in gd65_matchers)] | ||
|
||
gd66_matchers = ['gd66', 'gd1966'] | ||
gd66_matching = [s for s in x if any(xs in s for xs in gd66_matchers)] | ||
|
||
gd67_matchers = ['gd67', 'gd1967'] | ||
gd67_matching = [s for s in x if any(xs in s for xs in gd67_matchers)] | ||
|
||
gd68_matchers = ['gd68', 'gd1968'] | ||
gd68_matching = [s for s in x if any(xs in s for xs in gd68_matchers)] | ||
|
||
gd69_matchers = ['gd69', 'gd1969'] | ||
gd69_matching = [s for s in x if any(xs in s for xs in gd69_matchers)] | ||
|
||
gd70_matchers = ['gd70', 'gd1970'] | ||
gd70_matching = [s for s in x if any(xs in s for xs in gd70_matchers)] | ||
|
||
gd71_matchers = ['gd71', 'gd1971'] | ||
gd71_matching = [s for s in x if any(xs in s for xs in gd71_matchers)] | ||
|
||
gd72_matchers = ['gd72', 'gd1972'] | ||
gd72_matching = [s for s in x if any(xs in s for xs in gd72_matchers)] | ||
|
||
gd73_matchers = ['gd73', 'gd1973'] | ||
gd73_matching = [s for s in x if any(xs in s for xs in gd73_matchers)] | ||
|
||
gd74_matchers = ['gd74', 'gd1974'] | ||
gd74_matching = [s for s in x if any(xs in s for xs in gd74_matchers)] | ||
|
||
gd75_matchers = ['gd75', 'gd1975'] | ||
gd75_matching = [s for s in x if any(xs in s for xs in gd75_matchers)] | ||
|
||
gd76_matchers = ['gd76', 'gd1976'] | ||
gd76_matching = [s for s in x if any(xs in s for xs in gd76_matchers)] | ||
|
||
gd77_matchers = ['gd77', 'gd1977'] | ||
gd77_matching = [s for s in x if any(xs in s for xs in gd77_matchers)] | ||
|
||
gd78_matchers = ['gd78', 'gd1978'] | ||
gd78_matching = [s for s in x if any(xs in s for xs in gd78_matchers)] | ||
|
||
gd79_matchers = ['gd79', 'gd1979'] | ||
gd79_matching = [s for s in x if any(xs in s for xs in gd79_matchers)] | ||
|
||
gd80_matchers = ['gd80', 'gd1980'] | ||
gd80_matching = [s for s in x if any(xs in s for xs in gd80_matchers)] | ||
|
||
gd81_matchers = ['gd81', 'gd1981'] | ||
gd81_matching = [s for s in x if any(xs in s for xs in gd81_matchers)] | ||
|
||
gd82_matchers = ['gd82', 'gd1982'] | ||
gd82_matching = [s for s in x if any(xs in s for xs in gd82_matchers)] | ||
|
||
gd83_matchers = ['gd83', 'gd1983'] | ||
gd83_matching = [s for s in x if any(xs in s for xs in gd83_matchers)] | ||
|
||
gd84_matchers = ['gd84', 'gd1984'] | ||
gd84_matching = [s for s in x if any(xs in s for xs in gd84_matchers)] | ||
|
||
gd85_matchers = ['gd85', 'gd1985'] | ||
gd85_matching = [s for s in x if any(xs in s for xs in gd85_matchers)] | ||
|
||
gd86_matchers = ['gd86', 'gd1986'] | ||
gd86_matching = [s for s in x if any(xs in s for xs in gd86_matchers)] | ||
|
||
gd87_matchers = ['gd87', 'gd1987'] | ||
gd87_matching = [s for s in x if any(xs in s for xs in gd87_matchers)] | ||
|
||
gd88_matchers = ['gd88', 'gd1988'] | ||
gd88_matching = [s for s in x if any(xs in s for xs in gd88_matchers)] | ||
|
||
gd89_matchers = ['gd89', 'gd1989'] | ||
gd89_matching = [s for s in x if any(xs in s for xs in gd89_matchers)] | ||
|
||
gd90_matchers = ['gd90', 'gd1990'] | ||
gd90_matching = [s for s in x if any(xs in s for xs in gd90_matchers)] | ||
|
||
gd91_matchers = ['gd91', 'gd1991'] | ||
gd91_matching = [s for s in x if any(xs in s for xs in gd91_matchers)] | ||
|
||
gd92_matchers = ['gd92', 'gd1992'] | ||
gd92_matching = [s for s in x if any(xs in s for xs in gd92_matchers)] | ||
|
||
gd93_matchers = ['gd93', 'gd1993'] | ||
gd93_matching = [s for s in x if any(xs in s for xs in gd93_matchers)] | ||
|
||
gd94_matchers = ['gd94', 'gd1994'] | ||
gd94_matching = [s for s in x if any(xs in s for xs in gd94_matchers)] | ||
|
||
gd95_matchers = ['gd95', 'gd1995'] | ||
gd95_matching = [s for s in x if any(xs in s for xs in gd95_matchers)] | ||
|
||
gdnrps_matchers = ['gd_nrps'] | ||
gdnrps_matching = [s for s in x if any(xs in s for xs in gdnrps_matchers)] | ||
|
||
# DOWNLOAD BY YEAR | ||
|
||
for a in gd75_matching: ## YEAR TO DOWNLOAD; CHANGE AS DESIRED | ||
download(a, verbose=True, glob_pattern='*.mp3', destdir=r"C:\Users\username\Desktop\gd") # LOCAL DIRECTORY TO SAVE FILES | ||
|
||
#----------------------------------------------------------------------------------------------------------------------- | ||
|
||
# END TIME OF JOB | ||
|
||
end = t.time() | ||
print('time to complete: ' + str((end - start) / 60) + ' minutes') | ||
|
||
#----------------------------------------------------------------------------------------------------------------------- |