Skip to content

Commit 5f78604

Browse files
author
Nadal Rodrigo
committed
update code to libgen changes
1 parent e0f9abd commit 5f78604

File tree

2 files changed

+121
-66
lines changed

2 files changed

+121
-66
lines changed

pylibgen.py

+120-66
Original file line numberDiff line numberDiff line change
@@ -31,35 +31,37 @@ def getSearchResults(term, page, column):
3131
def formatBooks(books, page):
3232
fmt_books = []
3333
books_mirrors = [] # List of dics with complete titles and mirrors
34-
35-
for i, rawbook in enumerate(books):
36-
i += (page - 1) * 25
37-
34+
cont_book = (page - 1)*25 + 1
35+
for rawbook in books:
36+
3837
book_attrs = rawbook.find_all('td')
3938

40-
authors = [a.text for a in book_attrs[1].find_all('a')]
41-
author = ', '.join(authors[:N_AUTHORS])
42-
author = author[:MAX_CHARS_AUTHORS]
43-
44-
title = book_attrs[2].find(title=True).text
45-
tinytitle = title[:MAX_CHARS_TITLE]
46-
47-
publisher = book_attrs[3].text[:MAX_CHARS_PUBLISHER]
48-
year = book_attrs[4].text
49-
lang = book_attrs[6].text[:2] # Show only 2 first characters
50-
size = book_attrs[7].text
51-
ext = book_attrs[8].text
52-
mirror_list = [] # List of all the four mirrors
53-
for mirror in range(9, 13):
54-
mirror_list.append(book_attrs[mirror].a.attrs['href'])
55-
56-
book = (str(i + 1), author, tinytitle, publisher,
57-
year, lang, ext, size) # Start at 1
58-
59-
book_mirrors = {'title': title, 'mirrors': mirror_list}
60-
books_mirrors.append(book_mirrors)
61-
62-
fmt_books.append(book)
39+
if len(book_attrs) >= 14:
40+
authors = [a.text for a in book_attrs[1].find_all('a')]
41+
author = ', '.join(authors[:N_AUTHORS])
42+
author = author[:MAX_CHARS_AUTHORS]
43+
44+
title = book_attrs[2].find(title=True).text
45+
tinytitle = title[:MAX_CHARS_TITLE]
46+
47+
publisher = book_attrs[3].text[:MAX_CHARS_PUBLISHER]
48+
year = book_attrs[4].text
49+
lang = book_attrs[6].text[:2] # Show only 2 first characters
50+
size = book_attrs[7].text
51+
ext = book_attrs[8].text
52+
mirror_list = {} # Dictionary of all the four mirrors
53+
for i in range(10, 15):
54+
mirror = i - 10
55+
if book_attrs[i].a:
56+
mirror_list[mirror] = book_attrs[i].a.attrs['href']
57+
58+
book = (str(cont_book), author, tinytitle, publisher,
59+
year, lang, ext, size) # Start at 1
60+
61+
book_mirrors = {'title': title, 'mirrors': mirror_list}
62+
books_mirrors.append(book_mirrors)
63+
cont_book += 1
64+
fmt_books.append(book)
6365

6466
return(fmt_books, books_mirrors)
6567

@@ -88,7 +90,7 @@ def selectBook(books, mirrors, page, n_books):
8890
title = '{}.{}'.format(
8991
mirrors[choice]['title'], books[choice][-2])
9092

91-
if True:
93+
if False:
9294
''' This is the default mirror.
9395
In the case we can get the other mirrors to work,
9496
change True to a boolean variable defined in settings.py
@@ -97,14 +99,20 @@ def selectBook(books, mirrors, page, n_books):
9799
DownloadBook.default_mirror(
98100
mirrors[choice]['mirrors'][0], title)
99101
else:
100-
number_of_mirrors = 4
101-
print(
102-
"\n #1: Mirror libgen.io (default)",
103-
"\n #2: Mirror libgen.pw",
104-
"\n #3: Mirror bookfi.net",
105-
"\n #4: Mirror b-ok",
106-
)
107-
while True:
102+
number_of_mirrors = len(mirrors[choice]['mirrors'])
103+
print_list = (
104+
"#1: Mirror bookdescr.org (default)",
105+
"#2: Mirror libgen.me",
106+
"#3: Mirror library1.org",
107+
"#4: Mirror b-ok.cc",
108+
"#5: Mirror bookfi.net")
109+
110+
while SHOW_MIRRORS:
111+
print("\nMirrors Available: \n")
112+
ava_mirrors = list(mirrors[choice]['mirrors'].keys())
113+
for mir in ava_mirrors:
114+
print(print_list[mir])
115+
108116
option = input(
109117
'\nType # of mirror to start download or q to quit: ')
110118

@@ -125,6 +133,9 @@ def selectBook(books, mirrors, page, n_books):
125133
DownloadBook.fourth_mirror(
126134
mirrors[choice]['mirrors'][3], title)
127135
pass
136+
elif int(option) == 5:
137+
DownloadBook.fifth_mirror(
138+
mirrors[choice]['mirrors'][4], title)
128139

129140
return(False)
130141

@@ -169,53 +180,95 @@ class DownloadBook():
169180
'Connection': connection,
170181
}
171182

183+
def save_book(download_link, file_name):
184+
if os.path.exists(DOWNLOAD_PATH) and os.path.isdir(DOWNLOAD_PATH):
185+
bad_chars = '\/:*?"<>|'
186+
for char in bad_chars:
187+
file_name = file_name.replace(char, " ")
188+
print('Downloading...')
189+
path = '{}/{}'.format(DOWNLOAD_PATH, file_name)
190+
request.urlretrieve(download_link, filename=path)
191+
print('Book downloaded to {}'.format(os.path.abspath(path)))
192+
elif os.path.isfile(DOWNLOAD_PATH):
193+
print('The download path is not a directory. Change it in settings.py')
194+
else:
195+
print('The download path does not exist. Change it in settings.py')
196+
172197
def default_mirror(link, filename):
173198
'''This is the default (and first) mirror to download.
174-
The base of this mirror is http://libgen.io/ads.php?'''
199+
The base of this mirror is http://booksdescr.org'''
175200
req = request.Request(link, headers=DownloadBook.headers)
176201
source = request.urlopen(req)
177202
soup = BeautifulSoup(source, 'lxml')
178-
mother_link = "https://libgen.pw"
179203

180204
for a in soup.find_all('a'):
181-
if a.text == 'Open download':
182-
item_url = a.attrs['href']
183-
getpage_url = mother_link + item_url
184-
req2 = request.Request(getpage_url, headers=DownloadBook.headers)
185-
source2 = request.urlopen(req2)
186-
soup2 = BeautifulSoup(source2, 'lxml')
187-
188-
for a in soup2.find_all('a'):
189-
if a.text == 'Get':
190-
download_url = mother_link + a.attrs['href']
191-
break
205+
if a.text == 'Libgen':
206+
download_url = a.attrs['href']
207+
DownloadBook.save_book(download_url, filename)
192208

193209

194-
if os.path.exists(DOWNLOAD_PATH) and os.path.isdir(DOWNLOAD_PATH):
195-
print('Downloading...')
196-
path = '{}/{}'.format(DOWNLOAD_PATH, filename)
197-
request.urlretrieve(download_url, filename=path)
198-
print('Book downloaded to {}'.format(os.path.abspath(path)))
199-
elif os.path.isfile(DOWNLOAD_PATH):
200-
print('The download path is not a directory. Change it in settings.py')
201-
else:
202-
print('The download path does not exist. Change it in settings.py')
203-
204210
def second_mirror(link, filename):
205211
'''This is the second mirror to download.
206-
The base of this mirror is https://libgen.pw/view.php?*'''
207-
link = link.replace("view", "download")
208-
pass
212+
The base of this mirror is https://libgen.me'''
213+
req = request.Request(link, headers=DownloadBook.headers)
214+
source = request.urlopen(req)
215+
soup = BeautifulSoup(source, 'lxml')
216+
mother_url = "https://libgen.me"
217+
218+
for a in soup.find_all('a'):
219+
if a.text == 'Get from vault':
220+
next_link = a.attrs['href']
221+
next_req = request.Request(mother_url + next_link, headers=DownloadBook.headers)
222+
next_source = request.urlopen(next_req)
223+
next_soup = BeautifulSoup(next_source, 'lxml')
224+
for next_a in next_soup.find_all('a'):
225+
if next_a.text == 'Get':
226+
item_url = next_a.attrs['href']
227+
DownloadBook.save_book(item_url, filename)
209228

210229
def third_mirror(link, filename):
211230
'''This is the third mirror to download.
212-
The base of this mirror is http://en.bookfi.net/md5/*'''
213-
pass
231+
The base of this mirror is http://library1.org'''
232+
req = request.Request(link, headers=DownloadBook.headers)
233+
source = request.urlopen(req)
234+
soup = BeautifulSoup(source, 'lxml')
235+
236+
for a in soup.find_all('a'):
237+
if a.text == 'GET':
238+
download_url = a.attrs['href']
239+
DownloadBook.save_book(download_url, filename)
214240

215241
def fourth_mirror(link, filename):
216242
'''This is the fourth mirror to download.
217-
The base of this mirror is http://b-ok.org/md5/*'''
218-
pass
243+
The base of this mirror is https://b-ok.cc'''
244+
req = request.Request(link, headers=DownloadBook.headers)
245+
source = request.urlopen(req)
246+
soup = BeautifulSoup(source, 'lxml')
247+
mother_url = "https://b-ok.cc"
248+
249+
for a in soup.find_all('a'):
250+
if a.text == 'DOWNLOAD':
251+
next_link = a.attrs['href']
252+
next_req = request.Request(mother_url + next_link, headers=DownloadBook.headers)
253+
next_source = request.urlopen(next_req)
254+
next_soup = BeautifulSoup(next_source, 'lxml')
255+
for next_a in next_soup.find_all('a'):
256+
if ' Download ' in next_a.text:
257+
item_url = next_a.attrs['href']
258+
DownloadBook.save_book(mother_url + item_url, filename)
259+
260+
def fifth_mirror(link, filename):
261+
'''This is the fifth mirror to download.
262+
The base of this mirror is https://bookfi.net'''
263+
req = request.Request(link, headers=DownloadBook.headers)
264+
source = request.urlopen(req)
265+
soup = BeautifulSoup(source, 'lxml')
266+
267+
for a in soup.find_all('a'):
268+
if 'Скачать' in a.text:
269+
download_url = a.attrs['href']
270+
DownloadBook.save_book(download_url, filename)
271+
219272

220273

221274
if __name__ == '__main__':
@@ -255,6 +308,7 @@ def fourth_mirror(link, filename):
255308
else:
256309
raw_books = getSearchResults(search_term, page, sel_column)
257310

311+
258312
if raw_books:
259313
new_books, new_mirrors = formatBooks(raw_books, page)
260314
books += new_books

settings.py

+1
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
MAX_CHARS_AUTHORS = 25 # Maximum characters displayed for the author. Change according to N_AUTHORS.
44
MAX_CHARS_TITLE = 50 # Maximum characters displayed for the book title
55
MAX_CHARS_PUBLISHER = 20 # Maximum characters displayed for the publisher.
6+
SHOW_MIRRORS = True # Set to True or False depending if you want the program to show the download mirrors.

0 commit comments

Comments
 (0)