-
Notifications
You must be signed in to change notification settings - Fork 179
/
Copy pathclient.py
154 lines (123 loc) · 5.9 KB
/
client.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
import asyncio
import grpc
import requests
import mii.legacy as mii
from .grpc_related.proto import legacymodelresponse_pb2 as modelresponse_pb2
from .grpc_related.proto import legacymodelresponse_pb2_grpc as modelresponse_pb2_grpc
from .constants import GRPC_MAX_MSG_SIZE, TaskType, DeploymentType, REQUIRED_KEYS_PER_TASK
from .method_table import GRPC_METHOD_TABLE
from .config import MIIConfig
from .utils import import_score_file
def _get_mii_config(deployment_name):
mii_config = import_score_file(deployment_name, DeploymentType.LOCAL).mii_config
return MIIConfig(**mii_config)
def mii_query_handle(deployment_name):
"""Get a query handle for a local deployment:
mii/examples/local/gpt2-query-example.py
mii/examples/local/roberta-qa-query-example.py
Arguments:
deployment_name: Name of the deployment. Used as an identifier for posting queries for ``LOCAL`` deployment.
Returns:
query_handle: A query handle with a single method `.query(request_dictionary)` using which queries can be sent to the model.
"""
if deployment_name in mii.non_persistent_models:
inference_pipeline, task = mii.non_persistent_models[deployment_name]
return MIINonPersistentClient(task, deployment_name)
mii_config = _get_mii_config(deployment_name)
return MIIClient(mii_config.model_conf.task,
"localhost", # TODO: This can probably be removed
mii_config.port_number)
def create_channel(host, port):
return grpc.aio.insecure_channel(
f"{host}:{port}",
options=[
("grpc.max_send_message_length",
GRPC_MAX_MSG_SIZE),
("grpc.max_receive_message_length",
GRPC_MAX_MSG_SIZE),
],
)
class MIIClient:
"""
Client to send queries to a single endpoint.
"""
def __init__(self, task, host, port):
self.asyncio_loop = asyncio.get_event_loop()
channel = create_channel(host, port)
self.stub = modelresponse_pb2_grpc.ModelResponseStub(channel)
self.task = task
async def _request_async_response(self, request_dict, **query_kwargs):
if self.task not in GRPC_METHOD_TABLE:
raise ValueError(f"unknown task: {self.task}")
task_methods = GRPC_METHOD_TABLE[self.task]
proto_request = task_methods.pack_request_to_proto(request_dict, **query_kwargs)
proto_response = await getattr(self.stub, task_methods.method)(proto_request)
return task_methods.unpack_response_from_proto(proto_response)
def query(self, request_dict, **query_kwargs):
return self.asyncio_loop.run_until_complete(
self._request_async_response(request_dict,
**query_kwargs))
async def terminate_async(self):
await self.stub.Terminate(
modelresponse_pb2.google_dot_protobuf_dot_empty__pb2.Empty())
def terminate(self):
self.asyncio_loop.run_until_complete(self.terminate_async())
async def create_session_async(self, session_id):
return await self.stub.CreateSession(
modelresponse_pb2.SessionID(session_id=session_id))
def create_session(self, session_id):
assert (
self.task == TaskType.TEXT_GENERATION
), f"Session creation only available for task '{TaskType.TEXT_GENERATION}'."
return self.asyncio_loop.run_until_complete(
self.create_session_async(session_id))
async def destroy_session_async(self, session_id):
await self.stub.DestroySession(modelresponse_pb2.SessionID(session_id=session_id)
)
def destroy_session(self, session_id):
assert (
self.task == TaskType.TEXT_GENERATION
), f"Session deletion only available for task '{TaskType.TEXT_GENERATION}'."
self.asyncio_loop.run_until_complete(self.destroy_session_async(session_id))
class MIINonPersistentClient:
def __init__(self, task, deployment_name):
self.task = task
self.deployment_name = deployment_name
def query(self, request_dict, **query_kwargs):
assert (
self.deployment_name in mii.non_persistent_models
), f"deployment: {self.deployment_name} not found"
task_methods = GRPC_METHOD_TABLE[self.task]
inference_pipeline = mii.non_persistent_models[self.deployment_name][0]
for key in REQUIRED_KEYS_PER_TASK[self.task]:
assert key in request_dict, f"Task '{self.task}' requires '{key}' key"
if self.task == TaskType.QUESTION_ANSWERING:
args = (request_dict["question"], request_dict["context"])
kwargs = query_kwargs
elif self.task == TaskType.ZERO_SHOT_IMAGE_CLASSIFICATION:
args = (request_dict["image"], request_dict["candidate_labels"])
kwargs = query_kwargs
elif self.task == TaskType.TEXT2IMG:
args = (request_dict["prompt"], request_dict.get("negative_prompt", None))
kwargs = query_kwargs
elif self.task == TaskType.INPAINTING:
negative_prompt = request_dict.get("negative_prompt", None)
args = (request_dict["prompt"],
request_dict["image"],
request_dict["mask_image"],
negative_prompt)
kwargs = query_kwargs
else:
args = (request_dict["query"], )
kwargs = query_kwargs
return task_methods.run_inference(inference_pipeline, args, query_kwargs)
def terminate(self):
print(f"Terminating {self.deployment_name}...")
del mii.non_persistent_models[self.deployment_name]
def terminate_restful_gateway(deployment_name):
mii_config = _get_mii_config(deployment_name)
if mii_config.enable_restful_api:
requests.get(f"http://localhost:{mii_config.restful_api_port}/terminate")