-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathalgos_v1.py
130 lines (108 loc) · 3.96 KB
/
algos_v1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# Task : Main functions of iUser which does language translations, speech to text and community based decision model
# Date : 10 Dec 2016
# Version : 0.1
# Author : Vigneshwer
# importing modules
import requests
import json
from google.cloud import translate
import unicodedata
import sys
import pickle
# ML modules
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import Pipeline
from sklearn import metrics
reload(sys)
sys.setdefaultencoding('utf8')
# Function converting speech to text
def speech_2_text(audio_location):
url = "https://api.wit.ai/speech"
headers = {
'authorization': "Bearer X",
'content-type': "audio/wav",
'cache-control': "no-cache",
'postman-token': "ad7f8d5a-0d50-a751-2b1a-895c1b8bfbbd"
}
data = open(str(audio_location),'rb').read()
response = requests.request("POST", url, data=data ,headers=headers)
print response.text
return response.json()
# Identify language
def identify_lang(audio_loc):
id = 'en'
print "The language id is " + str(id)
return id
# Convert to corresponding localized text
def text_converter(input_text,lang_id,target_id):
translate_client = translate.Client()
# Text can also be a sequence of strings, in which case this method
# will return a sequence of results for each text.
url = "https://translation.googleapis.com/language/translate/v2"
api_token = "nnnnn"
payload = {
'q': str(input_text) ,
'source': lang_id,
'target': target_id,
'format': 'text'
}
headers = {
'authorization': "Bearer "+str(api_token),
'content-type': "application/json",
'cache-control': "no-cache",
'postman-token': "867863ca-6f80-1358-7811-fc81a1974b66"
}
response = requests.request("POST", url, data=str(payload), headers=headers)
print response.json()
return response.json()
#Model trained on whatsapp community data
def community_model(input_text):
classifier = pickle.load(open("./model/community_classifier.p","rb"))
result = classifier.predict([input_text])
return result
#Model which helps in making a decision
def decision_making(final_decision):
if final_decision[0] == str(1):
return "Hi There !! This is a cycling group where people plan trips & discuss about gears"
elif final_decision[0] == str(2):
return "The next tour is this Saturday at 17th December, please contact @dvigneshwer"
elif final_decision[0] == str(3):
return "Visit Hoodi Decathlon to buy this gear"
else:
return "Plese try again"
def entry_point(audio_location,target_lang):
# Converting audio to speech
# audio_location = "./lang_data/english/english_sample.wav"
response = speech_2_text(audio_location)
print "The converted text is :- " + str(response["_text"])
converted_text = str(response["_text"])
# Identifying language
lang_id = identify_lang(audio_location)
target_id = target_lang
print "The target language is :- " + str(target_id)
#Convert to localized contents
localized_content = text_converter(converted_text,lang_id,target_id)
message = localized_content['data']['translations'][0]['translatedText']
print "The localized content is " + str(message)
#Community Model
final_decision = community_model(str(converted_text))
#Decision making - 3rd output
decision_made = decision_making(final_decision)
print "The decision made is" + str(decision_made)
# Procssed data to client in json format
processed_data = {}
processed_data['Speech_2_text']=converted_text
processed_data['Localized_content']=str(message)
processed_data['Decision']=str(decision_made)
# print type(processed_data)
# print processed_data
return processed_data
def entry_point2(text,org_lang,tar_lang):
eng_conv = text_converter(text,org_lang,tar_lang)
eng_mssg = eng_conv['data']['translations'][0]['translatedText']
print "Converted to english :- " +str(eng_mssg)
processed_data = {}
processed_data['conv_text'] = str(eng_mssg)
return processed_data