-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetSub.py
222 lines (191 loc) · 7.73 KB
/
getSub.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
'''Developed by Hamed Pariazar 2020
https://github.com/hamedpa
email : [email protected]'''
import imdb
from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests,re
from difflib import SequenceMatcher
import os
from tqdm import tqdm
from urllib.request import Request, urlopen
import time
import json
#find name of movie from string
def getNameMovie(name):
ia = imdb.IMDb()
search = ia.search_movie(name)
if(not search==[]):
return(search[0]['title'])
else:
return(0)
#add + between words in string
def addPlus(string):
if(len(string)>1):
tmp=''
sx = string.split(" ")
for x in sx:
tmp+=x
tmp+="+"
return tmp[0:len(tmp)-1]
else:
return string
#add - between words in string
def addDash(string):
if(len(string)>1):
tmp=''
sx = string.split(" ")
for x in sx:
tmp+=x
tmp+="-"
return tmp[0:len(tmp)-1]
else:
return string
#check name exist in word or not
def checkExistedInString(link,name):
wordSplit = name.split()
checksum = len(wordSplit)
for x in wordSplit:
if(x in link):
checksum -= 1
if(checksum==0):
return True
else:
return False
#this funciton is correcting name of movie if necessary
def correctInput(nameOfMovie):
try:
inp = getNameMovie(nameOfMovie[0:15])
if(inp==0):
inp = getNameMovie(nameOfMovie[0:10])
except:
print('err')
#output
return inp
#get number from this function and make sure that how much two string are similar to each other
def similar(a, b):
return SequenceMatcher(None, a, b).ratio()
#remove extra words from string
def removeExtraString(mainString,word):
index = mainString.find ( word )
return mainString[0:index]
#make directory
def createDirectory(pathVideo,nameDirectory):
# define the name of the directory to be created
path = pathVideo+"\\"+nameDirectory
try:
os.makedirs(path)
except OSError:
print ("Creation of the directory %s failed" % path)
else:
print ("Successfully created the directory %s" % path)
#download subtitles with this function
def downloadSub(link,nameFile):
url = link
response = requests.get(url, stream=True)
with open(nameFile, "wb") as handle:
for data in tqdm(response.iter_content()):
handle.write(data)
#extract subtitles links from servers and websites
def getSubLink(nameMovie,searchLink,keyword,afterLink):
parser = 'html.parser' # or 'lxml' (preferred) or 'html5lib', if installed
mkvlist = []
if(not "0" in afterLink):
site= searchLink+addPlus(nameMovie)+afterLink
else:
site= searchLink+addPlus(nameMovie)
#print(site)
hdr = {'User-Agent': 'Mozilla/5.0'}
req = Request(site,headers=hdr)
page = urlopen(req)
x = nameMovie
x = x.split(" ")
redkeyword1 = x[0].lower()
if(redkeyword1 in "the"):
redkeyword1 = x[1].lower()
soup = BeautifulSoup(page, parser, from_encoding=page.info().get_param('charset'))
for link in soup.find_all('a', href=True):
#time.sleep(1)
li = link['href'][(len(link['href'])-len(nameMovie))-10:]
if((similar(nameMovie,li[(len(li)-len(nameMovie))-10:])*100)>40):
if (keyword in link['href'] ):
try:
site = link['href']
req = Request(site,headers=hdr)
page = urlopen(req)
soup2 = BeautifulSoup(page, parser, from_encoding=page.info().get_param('charset'))
for link2 in soup2.find_all('a', href=True):
if("zip" in link2['href'] or "rar" in link2['href']):
mkvlist.append(link2['href'])
#print(link2['href'])
except:
print('error')
else:
if(len(x)>3):
if(checkExistedInString(link['href'],nameMovie)):
if (keyword in link['href']):
try:
site = link['href']
req = Request(site,headers=hdr)
page = urlopen(req)
soup2 = BeautifulSoup(page, parser, from_encoding=page.info().get_param('charset'))
for link2 in soup2.find_all('a', href=True):
if("zip" in link2['href'] or "rar" in link2['href'] and redkeyword1 in link['href']):
mkvlist.append(link2['href'])
#print(link2['href'])
except:
print('error')
else:
if(redkeyword1 in link['href']):
if (keyword in link['href']):
try:
site = link['href']
req = Request(site,headers=hdr)
page = urlopen(req)
soup2 = BeautifulSoup(page, parser, from_encoding=page.info().get_param('charset'))
for link2 in soup2.find_all('a', href=True):
if("zip" in link2['href'] or "rar" in link2['href'] and redkeyword1 in link['href']):
mkvlist.append(link2['href'])
#print(link2['href'])
except:
print('error')
return mkvlist
def main():
currentLink = []
afterlink = []
keyword = []
print("Welecome to subDownloader 2020 - Download all subtitles without any trouble ")
print("Please enter address of movies: ")
path = input() #"M:\\myfilm"
newpath = "movie_with_sub\\"
with open('websites.json') as json_file:
data = json.load(json_file)
for p in data['websites']:
currentLink.append(p['link'])
afterlink.append(p['afterlink'])
keyword.append(p['keyword'])
for root, dirs, files in os.walk(path):
for filename in files:
#print(os.path.join(root, filename))
if((".mkv" in filename or ".mp4" in filename) and (newpath not in (os.path.join(root, filename)))):
correctName = correctInput(filename)
createDirectory(path,newpath+filename)
# Move a file by renaming it's path
os.rename(os.path.join(root, filename),path+"\\"+newpath+filename+"\\"+filename)
for c in range(len(currentLink)):
try:
print("Download subtitle of "+correctName+" from server "+str((c+1))+" ...")
if(not getSubLink(correctName,currentLink[c],keyword[c],str(afterlink[c])) ==[]):
result1 = getSubLink(correctName,currentLink[c],keyword[c],str(afterlink[c]))[0]
if("p" in result1[len(result1):]):
downloadSub(result1,path+"\\"+newpath+filename+'\\'+filename+" "+str(c)+".zip")
else:
downloadSub(result1,path+"\\"+newpath+filename+'\\'+filename+" "+str(c)+".rar")
print("Subtitle of "+correctName+" Downloaded successfully")
print()
except:
print
else:
print()
if __name__ == "__main__":
main()