-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathphoto_downloader.py
191 lines (159 loc) · 7.31 KB
/
photo_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# Library Requirements
import requests
import urllib.request
import os
import tkinter
from bs4 import BeautifulSoup
from PIL import Image
import magic
# Global Variable Definitions
base_url = "https://manganelo.com/chapter/yovbxa13526492"
first_chapter_url = base_url + "/chapter_1"
chapternames = []
chapterid = []
r = requests.get("http://manganelo.com")
soup = BeautifulSoup(r.content, "html.parser")
i = 0
pages = 0
dir_name = ""
# tkinter setup
root = tkinter.Tk()
tf = tkinter.Frame(root)
bf = tkinter.Frame(root)
label_1 = tkinter.Label(bf, text="Enter a URL like the one above then hit download")
label_2 = tkinter.Label(bf, text="Only works with Mangakakalot")
label_3 = tkinter.Label(bf, text="GUI will freeze when downloading, this is normal")
urlentry = tkinter.Entry(tf, width=50)
urlentry.delete(0, tkinter.END)
urlentry.insert(0, "https://manganelo.com/chapter/yovbxa13526492")
root.title("Manga Downloader")
failedloops = 0
# Build the request opener to prevent download requests from getting blocked
opener = urllib.request.build_opener()
opener.addheaders = [('User-Agent',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')]
urllib.request.install_opener(opener)
# Finds all of the id's of the chapters and the names of the chapters
def chapter_name():
for link in soup.find_all("option"):
#print(link)
if (
link.get_text() != "Fullsize" and link.get_text() != "Large" and link.get_text() != "Medium" and link.get_text() != "Small" and link.get_text() not in chapternames):
chapterid.append(link.get('data-c'))
#print("Printing link.get(data-c)")
#print(link.get('data-c'))
# print(linkz.get('value'))
chapternames.append(link.get_text())
#print("Printing link.get_text")
#print((link.get_text()))
# print(linkz.get_text())
# Will remove any characters that can not be saved in the file system such as : or /
def clean_string(string):
valid_chars = '-_.() abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
for x in string:
if x not in valid_chars:
string = string.replace(x, '')
return string
def setup():
# Downloads the first page to get the id's from them
global r
global base_url
global soup
base_url = urlentry.get()
r = requests.get(base_url + "/chapter_1")
soup = BeautifulSoup(r.content, "html.parser")
print("Done packing")
chapter_name()
dl_loop()
def dl_loop():
global i
global filetyperenamed
global pages
global dir_name
for chapter in chapterid:
print(chapternames[i] + " CHAPTER ID")
# Downloads the first page of each chapter to count how many pages there are in each chapter
# Will save the director name as the chapter name but cleaned up for windows
dir_name = clean_string(chapternames[i])
if not os.path.exists(dir_name):
print(dir_name)
os.makedirs(dir_name)
# Checks if the last character in the title name is a space. Windows will auto delete the last character if it is a space and will result in python not being able to find the dir
if dir_name[-1:] == " ":
# Will change dir_name and remove the space
dir_name = dir_name[:-1]
text_string = " "
# loops through each <option> tag and sees if its an id, which the length is much longer, or a page number, less than 5 just to be safe
new_r = requests.get(base_url + "/chapter_" + chapter, headers={'referer': "https://manganelo.com/"})
# Makes a new soup object for the current page
new_soup = BeautifulSoup(new_r.content, "html.parser")
pages = 1
failedloops = 0
for img in new_soup.find_all("img"):
# print("TEST looping through image options")
# If the image tag has cdn.mangaeden.com in it it will know that is the link to the manga image
looping = True
while looping:
if (("/chapter" in img.get("src")) or ("/vol" in img.get("src")) ):
##print(img.get("src"))
img_link = img.get("src")
print(img.get("src"))
# Writes the link to the file so it will go from First page to last page
text_string = str(img_link) + "\n" + text_string
# makes the full path of the image to be saved at
full_file_path = os.path.join(dir_name, str(pages) + img_link[-4:])
#print("TEST Wrote image")
# Will open a new file for the image to be written
image_file = open(full_file_path, "wb")
# Dl's the image
img_r = requests.get(img_link, headers={'referer': "https://manganelo.com/"})
# Writes the image
image_file.write(img_r.content)
image_file.close()
try:
# Very Important!!!!!!!!!! This will check the file to see if it downloaded properly
# It opens the image and sees if it is a valid image.
# If NOT then it will crash and keep looping and attempting to dl the image
# If it is valid it adds one to page and then exits the loop to the next page link
img = Image.open(full_file_path)
img.verify()
#IMAGE CHECK
imagetype = magic.from_file(full_file_path,mime=True)
if(imagetype=="image/jpeg"):
if(full_file_path[-4:]!="jpg"):
os.rename(full_file_path,(full_file_path[:-3])+"jpg")
print("renamed to jpg")
elif(imagetype=="image/png"):
if(full_file_path[-4:]!="png"):
os.rename(full_file_path,(full_file_path[:-3])+"png")
print("renamed to png")
pages += 1
looping = False
except (IOError, SyntaxError) as e:
# Will crash here if the image is not valid and will NOT exit the loop
print(e)
pass
else:
looping = False
# Page might fail, had a couple times when their page was giving php errors and refreshing it would give me the correct page
if (pages == 1 and failedloops < 3):
failedloops += 1
#Commenting this out until I figure out how to make it work properly
#new_r = requests.get(base_url + "/chapter_" + chapter,headers={'referer': "https://manganelo.com/"})
#looping = True
i += 1
print("Finished downloading chapter \"" + dir_name + "\"")
def main():
button1 = tkinter.Button(tf, text="Download", width=45, command=setup)
bf.pack(side=tkinter.BOTTOM)
tf.pack()
urlentry.pack()
label_1.pack()
label_2.pack()
label_3.pack()
button1.pack()
root.mainloop()
print("All done")
# Run main function
if __name__ == "__main__":
main()