-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsearch.py
172 lines (151 loc) · 7.63 KB
/
search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import sys
import shutil
import argparse
import json
import internetarchive as ia
from random import choice
from urllib.parse import unquote
from ytsearch.iametadata import *
from ytsearch.exceptions import *
from archiving.youtube_archiving import archiver_submit
from metadata_update import update_metadata
import redis
import rq
import youtube.match as ytmatch
from spotify.match import SpotifyMatcher
from spotipy.oauth2 import SpotifyClientCredentials
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.WARNING)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)
def merge_results(main, sub, urn):
for t in sub:
if t not in main:
main[t] = {}
main[t][urn] = sub[t]
if __name__=='__main__':
parser = argparse.ArgumentParser(description='')
parser.add_argument('entries', metavar='ENTRIES', type=str, nargs='+',
help='One or more Internet Archive identifiers, or \
identifier/filename if --searchbyfilename is specified')
parser.add_argument('-Y', '--search_youtube', dest='search_youtube',
action='store_true', default=False)
parser.add_argument('-S', '--search_spotify', dest='search_spotify',
action='store_true', default=False)
parser.add_argument('-c', '--config_file', dest='config_file',
metavar='CONFIG_FILE', default=None,
help='Path to an internetarchive library config file')
parser.add_argument('-f', '--searchfullalbum', dest='search_full_album',
action='store_true', default=False,
help='Match full-album Youtube videos')
parser.add_argument('-sf', '--searchbyfilename', dest='search_by_filename',
action='store_true', default=False,
help='Search using an input CSV of identifier/file URL pairs')
parser.add_argument('-cac', '--clear_audio_cache', dest='clear_audio_cache',
action='store_true', default=False,
help='Remove downloaded audio files after fingerprint comparison')
parser.add_argument('-i', '--ignore_matched', dest='ignore_matched',
action='store_true', default=False,
help='Skip matches (track or full-album) that have the \
corresponding external identifiers in their metadata')
parser.add_argument('-q', '--query_format', dest='query_format',
metavar='QUERY_FORMAT', default=None)
parser.add_argument('-d', '--dry_run', dest='dry_run',
action='store_true', default=False,
help='Bypass writing metadata to the Archive')
parser.add_argument('-rq', '--use_redis_queue', dest='use_redis_queue',
action='store_true', default=False,
help='Upload metadata asynchronously via redis Queue.\
Requires a running redis server.')
args = parser.parse_args()
config = ia.config.get_config(config_file=args.config_file)
GOOGLE_API_KEYS = [key.strip() for key in \
config.get('ytsearch', {}).get('google_api_keys', '').split(',')]
YOUTUBE_DL_DIR = config.get('ytsearch', {}).get('youtube_dl_dir', 'tmp/ytdl')
IA_DL_DIR = config.get('ytsearch', {}).get('ia_dl_dir', 'tmp/iadl')
MAX_YOUTUBE_RESULTS = int(config.get('ytsearch', {}).get('max_youtube_results', 10))
SPOTIFY_CREDENTIALS = SpotifyClientCredentials(*config.get('ytsearch', {}).get(
'spotify_credentials', ':').split(':'))
results = {}
for entry in args.entries:
if args.search_by_filename:
iaid, filename_url = entry.split('/', 1)
filename = unquote(filename_url)
else:
iaid = entry
try:
album = IAAlbum(iaid)
except MetadataException:
logger.error('{}: Unable to process item metadata'.format(iaid))
sys.exit(ExitCodes.IAMetadataError.value)
except MediaTypeException:
logger.error('{}: Item is not audio'.format(iaid))
sys.exit(ExitCodes.IAMediatypeError.value)
YOUTUBE_DL_SUBDIR = '{}/{}'.format(YOUTUBE_DL_DIR, iaid)
results[iaid] = {t.name:{} for t in album.tracks}
# YouTube
if args.search_youtube:
youtube_results = {}
if args.search_full_album:
if args.ignore_matched and not album.get_eid('youtube'):
youtube_results = ytmatch.match_album(album, ia_dir=IA_DL_DIR,
yt_dir=YOUTUBE_DL_SUBDIR,
api_key=GOOGLE_API_KEYS)
if not youtube_results:
if args.search_by_filename:
tracks = [album.track_map[filename]]
elif args.ignore_matched:
tracks = [t for t in album.tracks if not t.get_eid('youtube')]
else:
tracks = album.tracks
youtube_results = ytmatch.match_tracks(tracks, album, ia_dir=IA_DL_DIR,
yt_dir=YOUTUBE_DL_SUBDIR,
api_key=GOOGLE_API_KEYS)
merge_results(results[iaid], youtube_results, 'youtube')
# Submit results to the YouTube archiver endpoint
vids = youtube_results['full_album'] if 'full_album' in youtube_results \
else list(youtube_results.values())
if vids:
if args.use_redis_queue:
try:
#TODO: Connection settings in archive config.
q = rq.Queue(connection=redis.Redis())
q.enqueue(archiver_submit, vids)
except redis.exceptions.ConnectionError():
archiver_submit(vids)
else:
archiver_submit(vids)
# Spotify
if args.search_spotify:
spotify_results = {}
spm = SpotifyMatcher(SPOTIFY_CREDENTIALS, ia_dir=IA_DL_DIR)
if args.search_full_album:
if args.ignore_matched and not album.get_eid('spotify'):
spotify_results = spm.match_album(album)
if not spotify_results:
if args.search_by_filename:
tracks = [album.track_map[filename]]
elif args.ignore_matched:
tracks = [t for t in album.tracks if not t.get_eid('spotify')]
else:
tracks = album.tracks
spotify_results = spm.match_tracks(tracks, album)
merge_results(results[iaid], spotify_results, 'spotify')
if args.clear_audio_cache:
shutil.rmtree('{}/{}'.format(IA_DL_DIR.rstrip(), iaid), ignore_errors=True)
shutil.rmtree(YOUTUBE_DL_SUBDIR, ignore_errors=True)
if not args.dry_run:
if args.use_redis_queue:
try:
#TODO: Connection settings in archive config.
q = rq.Queue(connection=redis.Redis())
q.enqueue(update_metadata, results)
except redis.exceptions.ConnectionError():
logger.warning('No valid redis connection; uploading metadata directly.')
update_metadata(results)
else:
update_metadata(results)
print(json.dumps(results))