Add files via upload

JackStrawFromWichita · Nov 20, 2019 · ffa0c08 · ffa0c08
commit ffa0c08
Show file tree

Hide file tree

Showing 3 changed files with 315 additions and 0 deletions.
diff --git a/README.txt b/README.txt
@@ -0,0 +1,143 @@
+#-----------------------------------------------------------------------------------------------------------------------
+
+# AS A DEADHEAD, ARCHIVE.ORG IS AN INVALUABLE RESOURCE.  WE ARE ALL FAMILIAR WITH SHOWS THAT ALLOW YOU TO DOWNLOAD FLACS OR MP3S,
+# AND FOR 'STREAM ONLY' SHOWS THERE IS OUR BELOVED GRATEFUL GRABBER.  BUT WHAT IF YOU WANT EVERY SHOW?  YES, EVERY SHOW. OR EVERY SHOW OF A PARTICULAR YEAR.
+# PARANOID BY NATURE, I BEGAN IMAGINING A DISASTER SCENARIO WHERE THE ZOMBIE (OR OTHER) APOCALYPSE TAKES DOWN THE INTERNET,
+# AND WITH IT, OUR BELOVED ARCHIVE.  IN THAT BRAVE NEW LANDSCAPE I STILL WANT TO DISCOVER NEW SHOWS AND LISTEN TO THEM AS I PLEASE,
+# ON A GENERATOR POWERED STEREO, AS I WAIT FOR THE UNKNOWN.
+
+# AS OF 11/20/2019 THERE ARE EXACTLY 14000 SHOWS IN THE ARCHIVE'S GRATEFUL DEAD COLLECTION.
+# IF YOU HAVE A GOOD INTERNET CONNECTION AND PLENTY OF STORAGE SPACE, YOU CAN DOWNLOAD ALL 14000 SHOWS WITH 7 LINES OF CODE.
+# THOUGH SOME MAY FIND IT MORE MANAGEABLE TO SEGMENT AND DOWNLOAD SHOWS BY YEAR.
+
+# AFTER EXTENSIVE RESEARCH AND MUCH TRIAL AND ERROR I STUMBLED UPON THESE RESOURCES WHICH LED TO THE SOLUTION:
+
+    # ARCHIVE HAS IT'S OWN PYTHON API FOR DOWNLOADING STUFF???  THIS IS WAY EASIER THAN THE BEAUTIFUL SOUP STUFF I WAS TRYING:
+    # https://gareth.halfacree.co.uk/2013/04/bulk-downloading-collections-from-archive-org
+
+    # HOW TO OBTAIN LISTS OF IDENTIFIERS USED TO DOWNLOAD SHOWS:
+    # https://blog.archive.org/2012/04/26/downloading-in-bulk-using-wget/
+
+    # internetarchive API QUICK START GUIDE:
+    # https://archive.org/services/docs/api/internetarchive/quickstart.html
+
+# PRE-REQUISITES:
+
+# DOWNLOAD AND INSTALL PYTHON: https://www.python.org/downloads/
+
+# DOWNLOAD A PYTHON IDE (OPTIONAL): https://www.jetbrains.com/pycharm/
+
+# GET internetarchive PACKAGE: (from command line) pip install internetarchive
+
+# CREATE CONFIG FILE WITH ARCHIVE.ORG CREDENTIALS:
+
+    # (from command line) ia configure
+
+    # Enter your archive.org credentials below to configure 'ia'.
+    #
+    # Email address: [email protected]
+    # Password:
+    #
+    # Config saved to: /home/user/.config/ia.ini
+
+# OBTAIN CSV LIST OF ALL SHOWS:
+
+    # GO TO ARCHIVE.ORG: UNDER THE SEARCH BAR CLICK 'Advanced Search'; IN THE UPPER 'ADVANCED SEARCH' BLOCK: SEARCH:
+
+            #       AND           Collection:        is           GratefulDead          <--must type exactly this
+
+    # HIT SEARCH; THIS REDIRECTS TO SEARCH RESULTS WITH SYNTAX IN SEARCH FIELD:     collection:(GratefulDead)
+    # COPY SEARCH SYNTAX AND GO BACK TO ADVANCED SEARCH PAGE
+    #
+    # IN LOWER 'Advanced Search returning JSON, XML, and more' BLOCK: PASTE:        collection:(GratefulDead)       INTO 'Query' FIELD
+    # SELECT 'identifier' IN 'Fields to return' list
+    # CHANGE 'NUMBER OF RESULTS' TO 15000
+    # SELECT 'CSV FORMAT'
+    # HIT SEARCH
+    # HIT 'OK' ON POP-UP NOTE
+    # OPEN CSV FILE, DELETE 'IDENTIFIER' COLUMN HEADER
+    # SAVE, CLOSE
+    # SET PATH TO CSV FILE (HARDCODE IN .PY FILE)
+    # SET PATH TO LOCAL DIRECTORY TO SAVE FILES (HARDCODE IN .PY FILE)
+    # RUN SELECTED .PY FILE FROM COMMAND LINE
+    	# OPTION 1: DOWNLOAD ALL 14000 GRATEFUL DEAD SHOWS AT ONCE: download_all_gd.py
+    	# OPTION 2: SEGMENT AND DOWNLOAD SHOWS BY YEAR: download_gd_by_year.py
+
+    # NOTE TO SELF: COLLECTION NAME FOR DEAD & COMPANY: collection:(DeadAndCompany)
+    # NOTE TO ALL:  YOU CAN FIND THESE 'COLLECTION NAMES' ON ARCHIVE, YOU JUST GOTTA POKE AROUND	
+
+# STATS FROM SEGMENTATION AND DOWNLOAD BY YEAR BASED ON 1975:
+
+# INTERNET CONNECTION DETAILS:
+# DOWNLOAD Mbps: 887.41
+# UPLOAD Mbps: 839.64
+# https://www.speedtest.net/
+
+# 1975: 55 shows
+# time to download all 1975: 81.11 minutes
+# download time per show: 1.47 minutes
+# total size of 1975: 6.35 GB
+# average file size per show:  115.45 MB
+
+# Estimate regarding all 14000 shows based on 1975 stats:
+# Approx: 1.62 TB TOTAL
+# Approx: 344 Total hours to download (14 days!)
+# MILEAGE WILL VARY
+
+# BREAKDOWN: NUMBER OF SHOWS BY YEAR (as of 11/20/2019):
+# 65: 3 shows
+# 66: 69 shows
+# 67: 48 shows
+# 68: 113 shows
+# 69: 291 shows
+# 70: 340 shows
+# 71: 307 shows
+# 72: 325 shows
+# 73: 379 shows
+# 74: 270 shows
+# 75: 55 shows
+# 76: 251 shows
+# 77: 352 shows
+# 78: 462 shows
+# 79: 453 shows
+# 80: 613 shows
+# 81: 638 shows
+# 82: 524 shows
+# 83: 812 shows
+# 84: 688 shows
+# 85: 874 shows
+# 86: 497 shows
+# 87: 772 shows
+# 88: 684 shows
+# 89: 854 shows
+# 90: 933 shows
+# 91: 682 shows
+# 92: 406 shows
+# 93: 576 shows
+# 94: 412 shows
+# 95: 315 shows
+# gdnrps: 2 shows
+# Total: 14000 shows
+
+# TESTING SOME FUNCTIONALITY IN THE internetarchive API
+
+#-----------------------------------------------------------------------------------------------------------------------
+
+# TESTING:
+
+# DOWNLOADABLE SHOW
+
+                             #THIS IS THE IDENTIFIER
+#download('gd1977-11-05.aud.zimmerman.minches.81180.sbeok.flac16', verbose=True, glob_pattern='*.mp3') # SUCCESS
+
+#-----------------------------------------------------------------------------------------------------------------------
+
+# TESTING:
+
+# STREAM-ONLY SHOW
+
+                   #THIS IS THE IDENTIFIER
+#download('gd73-06-10.sbd.hollister.174.sbeok.shnf', verbose=True, glob_pattern='*.mp3', destdir=r"H:\gd") # SUCCESS
+
+#-----------------------------------------------------------------------------------------------------------------------
+
diff --git a/download_all_gd.py b/download_all_gd.py
@@ -0,0 +1,37 @@
+from internetarchive import get_item
+from internetarchive import download
+from internetarchive import search_items
+import time as t
+
+#-----------------------------------------------------------------------------------------------------------------------
+
+# START TIME OF JOB
+
+start = t.time()
+
+#-----------------------------------------------------------------------------------------------------------------------
+
+# READ CSV FILE INTO LIST
+
+f = open(r"H:\14000.csv") # PATH TO CSV FILE
+x = f.readlines()
+f.close()
+
+with open(r"H:\14000.csv") as f:
+    x = f.read().splitlines()
+
+#-----------------------------------------------------------------------------------------------------------------------
+
+# DOWNLOAD ALL 14000 GRATEFUL DEAD SHOWS AT ONCE
+
+for a in x:
+    download(a, verbose=True, glob_pattern='*.mp3', destdir=r"C:\Users\username\Desktop\gd") # LOCAL DIRECTORY TO SAVE FILES
+
+#-----------------------------------------------------------------------------------------------------------------------
+
+# END TIME OF JOB
+
+end = t.time()
+print('time to complete: ' + str((end - start) / 60) + ' minutes')
+
+#-----------------------------------------------------------------------------------------------------------------------
diff --git a/download_gd_by_year.py b/download_gd_by_year.py
@@ -0,0 +1,135 @@
+from internetarchive import get_item
+from internetarchive import download
+from internetarchive import search_items
+import time as t
+
+#-----------------------------------------------------------------------------------------------------------------------
+
+# START TIME OF JOB
+
+start = t.time()
+
+#-----------------------------------------------------------------------------------------------------------------------
+
+# READ CSV FILE INTO LIST
+
+f = open(r"H:\14000.csv") # PATH TO CSV FILE
+x = f.readlines()
+f.close()
+
+with open(r"H:\14000.csv") as f:
+    x = f.read().splitlines()
+
+#-----------------------------------------------------------------------------------------------------------------------
+
+# SEGMENT BY YEAR
+
+gd65_matchers = ['gd65', 'gd1965']
+gd65_matching = [s for s in x if any(xs in s for xs in gd65_matchers)]
+
+gd66_matchers = ['gd66', 'gd1966']
+gd66_matching = [s for s in x if any(xs in s for xs in gd66_matchers)]
+
+gd67_matchers = ['gd67', 'gd1967']
+gd67_matching = [s for s in x if any(xs in s for xs in gd67_matchers)]
+
+gd68_matchers = ['gd68', 'gd1968']
+gd68_matching = [s for s in x if any(xs in s for xs in gd68_matchers)]
+
+gd69_matchers = ['gd69', 'gd1969']
+gd69_matching = [s for s in x if any(xs in s for xs in gd69_matchers)]
+
+gd70_matchers = ['gd70', 'gd1970']
+gd70_matching = [s for s in x if any(xs in s for xs in gd70_matchers)]
+
+gd71_matchers = ['gd71', 'gd1971']
+gd71_matching = [s for s in x if any(xs in s for xs in gd71_matchers)]
+
+gd72_matchers = ['gd72', 'gd1972']
+gd72_matching = [s for s in x if any(xs in s for xs in gd72_matchers)]
+
+gd73_matchers = ['gd73', 'gd1973']
+gd73_matching = [s for s in x if any(xs in s for xs in gd73_matchers)]
+
+gd74_matchers = ['gd74', 'gd1974']
+gd74_matching = [s for s in x if any(xs in s for xs in gd74_matchers)]
+
+gd75_matchers = ['gd75', 'gd1975']
+gd75_matching = [s for s in x if any(xs in s for xs in gd75_matchers)]
+
+gd76_matchers = ['gd76', 'gd1976']
+gd76_matching = [s for s in x if any(xs in s for xs in gd76_matchers)]
+
+gd77_matchers = ['gd77', 'gd1977']
+gd77_matching = [s for s in x if any(xs in s for xs in gd77_matchers)]
+
+gd78_matchers = ['gd78', 'gd1978']
+gd78_matching = [s for s in x if any(xs in s for xs in gd78_matchers)]
+
+gd79_matchers = ['gd79', 'gd1979']
+gd79_matching = [s for s in x if any(xs in s for xs in gd79_matchers)]
+
+gd80_matchers = ['gd80', 'gd1980']
+gd80_matching = [s for s in x if any(xs in s for xs in gd80_matchers)]
+
+gd81_matchers = ['gd81', 'gd1981']
+gd81_matching = [s for s in x if any(xs in s for xs in gd81_matchers)]
+
+gd82_matchers = ['gd82', 'gd1982']
+gd82_matching = [s for s in x if any(xs in s for xs in gd82_matchers)]
+
+gd83_matchers = ['gd83', 'gd1983']
+gd83_matching = [s for s in x if any(xs in s for xs in gd83_matchers)]
+
+gd84_matchers = ['gd84', 'gd1984']
+gd84_matching = [s for s in x if any(xs in s for xs in gd84_matchers)]
+
+gd85_matchers = ['gd85', 'gd1985']
+gd85_matching = [s for s in x if any(xs in s for xs in gd85_matchers)]
+
+gd86_matchers = ['gd86', 'gd1986']
+gd86_matching = [s for s in x if any(xs in s for xs in gd86_matchers)]
+
+gd87_matchers = ['gd87', 'gd1987']
+gd87_matching = [s for s in x if any(xs in s for xs in gd87_matchers)]
+
+gd88_matchers = ['gd88', 'gd1988']
+gd88_matching = [s for s in x if any(xs in s for xs in gd88_matchers)]
+
+gd89_matchers = ['gd89', 'gd1989']
+gd89_matching = [s for s in x if any(xs in s for xs in gd89_matchers)]
+
+gd90_matchers = ['gd90', 'gd1990']
+gd90_matching = [s for s in x if any(xs in s for xs in gd90_matchers)]
+
+gd91_matchers = ['gd91', 'gd1991']
+gd91_matching = [s for s in x if any(xs in s for xs in gd91_matchers)]
+
+gd92_matchers = ['gd92', 'gd1992']
+gd92_matching = [s for s in x if any(xs in s for xs in gd92_matchers)]
+
+gd93_matchers = ['gd93', 'gd1993']
+gd93_matching = [s for s in x if any(xs in s for xs in gd93_matchers)]
+
+gd94_matchers = ['gd94', 'gd1994']
+gd94_matching = [s for s in x if any(xs in s for xs in gd94_matchers)]
+
+gd95_matchers = ['gd95', 'gd1995']
+gd95_matching = [s for s in x if any(xs in s for xs in gd95_matchers)]
+
+gdnrps_matchers = ['gd_nrps']
+gdnrps_matching = [s for s in x if any(xs in s for xs in gdnrps_matchers)]
+
+# DOWNLOAD BY YEAR
+
+for a in gd75_matching: ## YEAR TO DOWNLOAD; CHANGE AS DESIRED
+    download(a, verbose=True, glob_pattern='*.mp3', destdir=r"C:\Users\username\Desktop\gd") # LOCAL DIRECTORY TO SAVE FILES
+
+#-----------------------------------------------------------------------------------------------------------------------
+
+# END TIME OF JOB
+
+end = t.time()
+print('time to complete: ' + str((end - start) / 60) + ' minutes')
+
+#-----------------------------------------------------------------------------------------------------------------------