@@ -31,35 +31,37 @@ def getSearchResults(term, page, column):
31
31
def formatBooks (books , page ):
32
32
fmt_books = []
33
33
books_mirrors = [] # List of dics with complete titles and mirrors
34
-
35
- for i , rawbook in enumerate (books ):
36
- i += (page - 1 ) * 25
37
-
34
+ cont_book = (page - 1 )* 25 + 1
35
+ for rawbook in books :
36
+
38
37
book_attrs = rawbook .find_all ('td' )
39
38
40
- authors = [a .text for a in book_attrs [1 ].find_all ('a' )]
41
- author = ', ' .join (authors [:N_AUTHORS ])
42
- author = author [:MAX_CHARS_AUTHORS ]
43
-
44
- title = book_attrs [2 ].find (title = True ).text
45
- tinytitle = title [:MAX_CHARS_TITLE ]
46
-
47
- publisher = book_attrs [3 ].text [:MAX_CHARS_PUBLISHER ]
48
- year = book_attrs [4 ].text
49
- lang = book_attrs [6 ].text [:2 ] # Show only 2 first characters
50
- size = book_attrs [7 ].text
51
- ext = book_attrs [8 ].text
52
- mirror_list = [] # List of all the four mirrors
53
- for mirror in range (9 , 13 ):
54
- mirror_list .append (book_attrs [mirror ].a .attrs ['href' ])
55
-
56
- book = (str (i + 1 ), author , tinytitle , publisher ,
57
- year , lang , ext , size ) # Start at 1
58
-
59
- book_mirrors = {'title' : title , 'mirrors' : mirror_list }
60
- books_mirrors .append (book_mirrors )
61
-
62
- fmt_books .append (book )
39
+ if len (book_attrs ) >= 14 :
40
+ authors = [a .text for a in book_attrs [1 ].find_all ('a' )]
41
+ author = ', ' .join (authors [:N_AUTHORS ])
42
+ author = author [:MAX_CHARS_AUTHORS ]
43
+
44
+ title = book_attrs [2 ].find (title = True ).text
45
+ tinytitle = title [:MAX_CHARS_TITLE ]
46
+
47
+ publisher = book_attrs [3 ].text [:MAX_CHARS_PUBLISHER ]
48
+ year = book_attrs [4 ].text
49
+ lang = book_attrs [6 ].text [:2 ] # Show only 2 first characters
50
+ size = book_attrs [7 ].text
51
+ ext = book_attrs [8 ].text
52
+ mirror_list = {} # Dictionary of all the four mirrors
53
+ for i in range (10 , 15 ):
54
+ mirror = i - 10
55
+ if book_attrs [i ].a :
56
+ mirror_list [mirror ] = book_attrs [i ].a .attrs ['href' ]
57
+
58
+ book = (str (cont_book ), author , tinytitle , publisher ,
59
+ year , lang , ext , size ) # Start at 1
60
+
61
+ book_mirrors = {'title' : title , 'mirrors' : mirror_list }
62
+ books_mirrors .append (book_mirrors )
63
+ cont_book += 1
64
+ fmt_books .append (book )
63
65
64
66
return (fmt_books , books_mirrors )
65
67
@@ -88,7 +90,7 @@ def selectBook(books, mirrors, page, n_books):
88
90
title = '{}.{}' .format (
89
91
mirrors [choice ]['title' ], books [choice ][- 2 ])
90
92
91
- if True :
93
+ if False :
92
94
''' This is the default mirror.
93
95
In the case we can get the other mirrors to work,
94
96
change True to a boolean variable defined in settings.py
@@ -97,14 +99,20 @@ def selectBook(books, mirrors, page, n_books):
97
99
DownloadBook .default_mirror (
98
100
mirrors [choice ]['mirrors' ][0 ], title )
99
101
else :
100
- number_of_mirrors = 4
101
- print (
102
- "\n #1: Mirror libgen.io (default)" ,
103
- "\n #2: Mirror libgen.pw" ,
104
- "\n #3: Mirror bookfi.net" ,
105
- "\n #4: Mirror b-ok" ,
106
- )
107
- while True :
102
+ number_of_mirrors = len (mirrors [choice ]['mirrors' ])
103
+ print_list = (
104
+ "#1: Mirror bookdescr.org (default)" ,
105
+ "#2: Mirror libgen.me" ,
106
+ "#3: Mirror library1.org" ,
107
+ "#4: Mirror b-ok.cc" ,
108
+ "#5: Mirror bookfi.net" )
109
+
110
+ while SHOW_MIRRORS :
111
+ print ("\n Mirrors Available: \n " )
112
+ ava_mirrors = list (mirrors [choice ]['mirrors' ].keys ())
113
+ for mir in ava_mirrors :
114
+ print (print_list [mir ])
115
+
108
116
option = input (
109
117
'\n Type # of mirror to start download or q to quit: ' )
110
118
@@ -125,6 +133,9 @@ def selectBook(books, mirrors, page, n_books):
125
133
DownloadBook .fourth_mirror (
126
134
mirrors [choice ]['mirrors' ][3 ], title )
127
135
pass
136
+ elif int (option ) == 5 :
137
+ DownloadBook .fifth_mirror (
138
+ mirrors [choice ]['mirrors' ][4 ], title )
128
139
129
140
return (False )
130
141
@@ -169,53 +180,95 @@ class DownloadBook():
169
180
'Connection' : connection ,
170
181
}
171
182
183
+ def save_book (download_link , file_name ):
184
+ if os .path .exists (DOWNLOAD_PATH ) and os .path .isdir (DOWNLOAD_PATH ):
185
+ bad_chars = '\/:*?"<>|'
186
+ for char in bad_chars :
187
+ file_name = file_name .replace (char , " " )
188
+ print ('Downloading...' )
189
+ path = '{}/{}' .format (DOWNLOAD_PATH , file_name )
190
+ request .urlretrieve (download_link , filename = path )
191
+ print ('Book downloaded to {}' .format (os .path .abspath (path )))
192
+ elif os .path .isfile (DOWNLOAD_PATH ):
193
+ print ('The download path is not a directory. Change it in settings.py' )
194
+ else :
195
+ print ('The download path does not exist. Change it in settings.py' )
196
+
172
197
def default_mirror (link , filename ):
173
198
'''This is the default (and first) mirror to download.
174
- The base of this mirror is http://libgen.io/ads.php? '''
199
+ The base of this mirror is http://booksdescr.org '''
175
200
req = request .Request (link , headers = DownloadBook .headers )
176
201
source = request .urlopen (req )
177
202
soup = BeautifulSoup (source , 'lxml' )
178
- mother_link = "https://libgen.pw"
179
203
180
204
for a in soup .find_all ('a' ):
181
- if a .text == 'Open download' :
182
- item_url = a .attrs ['href' ]
183
- getpage_url = mother_link + item_url
184
- req2 = request .Request (getpage_url , headers = DownloadBook .headers )
185
- source2 = request .urlopen (req2 )
186
- soup2 = BeautifulSoup (source2 , 'lxml' )
187
-
188
- for a in soup2 .find_all ('a' ):
189
- if a .text == 'Get' :
190
- download_url = mother_link + a .attrs ['href' ]
191
- break
205
+ if a .text == 'Libgen' :
206
+ download_url = a .attrs ['href' ]
207
+ DownloadBook .save_book (download_url , filename )
192
208
193
209
194
- if os .path .exists (DOWNLOAD_PATH ) and os .path .isdir (DOWNLOAD_PATH ):
195
- print ('Downloading...' )
196
- path = '{}/{}' .format (DOWNLOAD_PATH , filename )
197
- request .urlretrieve (download_url , filename = path )
198
- print ('Book downloaded to {}' .format (os .path .abspath (path )))
199
- elif os .path .isfile (DOWNLOAD_PATH ):
200
- print ('The download path is not a directory. Change it in settings.py' )
201
- else :
202
- print ('The download path does not exist. Change it in settings.py' )
203
-
204
210
def second_mirror (link , filename ):
205
211
'''This is the second mirror to download.
206
- The base of this mirror is https://libgen.pw/view.php?*'''
207
- link = link .replace ("view" , "download" )
208
- pass
212
+ The base of this mirror is https://libgen.me'''
213
+ req = request .Request (link , headers = DownloadBook .headers )
214
+ source = request .urlopen (req )
215
+ soup = BeautifulSoup (source , 'lxml' )
216
+ mother_url = "https://libgen.me"
217
+
218
+ for a in soup .find_all ('a' ):
219
+ if a .text == 'Get from vault' :
220
+ next_link = a .attrs ['href' ]
221
+ next_req = request .Request (mother_url + next_link , headers = DownloadBook .headers )
222
+ next_source = request .urlopen (next_req )
223
+ next_soup = BeautifulSoup (next_source , 'lxml' )
224
+ for next_a in next_soup .find_all ('a' ):
225
+ if next_a .text == 'Get' :
226
+ item_url = next_a .attrs ['href' ]
227
+ DownloadBook .save_book (item_url , filename )
209
228
210
229
def third_mirror (link , filename ):
211
230
'''This is the third mirror to download.
212
- The base of this mirror is http://en.bookfi.net/md5/*'''
213
- pass
231
+ The base of this mirror is http://library1.org'''
232
+ req = request .Request (link , headers = DownloadBook .headers )
233
+ source = request .urlopen (req )
234
+ soup = BeautifulSoup (source , 'lxml' )
235
+
236
+ for a in soup .find_all ('a' ):
237
+ if a .text == 'GET' :
238
+ download_url = a .attrs ['href' ]
239
+ DownloadBook .save_book (download_url , filename )
214
240
215
241
def fourth_mirror (link , filename ):
216
242
'''This is the fourth mirror to download.
217
- The base of this mirror is http://b-ok.org/md5/*'''
218
- pass
243
+ The base of this mirror is https://b-ok.cc'''
244
+ req = request .Request (link , headers = DownloadBook .headers )
245
+ source = request .urlopen (req )
246
+ soup = BeautifulSoup (source , 'lxml' )
247
+ mother_url = "https://b-ok.cc"
248
+
249
+ for a in soup .find_all ('a' ):
250
+ if a .text == 'DOWNLOAD' :
251
+ next_link = a .attrs ['href' ]
252
+ next_req = request .Request (mother_url + next_link , headers = DownloadBook .headers )
253
+ next_source = request .urlopen (next_req )
254
+ next_soup = BeautifulSoup (next_source , 'lxml' )
255
+ for next_a in next_soup .find_all ('a' ):
256
+ if ' Download ' in next_a .text :
257
+ item_url = next_a .attrs ['href' ]
258
+ DownloadBook .save_book (mother_url + item_url , filename )
259
+
260
+ def fifth_mirror (link , filename ):
261
+ '''This is the fifth mirror to download.
262
+ The base of this mirror is https://bookfi.net'''
263
+ req = request .Request (link , headers = DownloadBook .headers )
264
+ source = request .urlopen (req )
265
+ soup = BeautifulSoup (source , 'lxml' )
266
+
267
+ for a in soup .find_all ('a' ):
268
+ if 'Скачать' in a .text :
269
+ download_url = a .attrs ['href' ]
270
+ DownloadBook .save_book (download_url , filename )
271
+
219
272
220
273
221
274
if __name__ == '__main__' :
@@ -255,6 +308,7 @@ def fourth_mirror(link, filename):
255
308
else :
256
309
raw_books = getSearchResults (search_term , page , sel_column )
257
310
311
+
258
312
if raw_books :
259
313
new_books , new_mirrors = formatBooks (raw_books , page )
260
314
books += new_books
0 commit comments