Merge pull request #10 from guardrails-ai/fix_remote_inferencing

Fix remote inferencing
tryolabs · Aug 27, 2024 · c0082b8 · c0082b8
2 parents baf839e + 7859c92
commit c0082b8
Showing 1 changed file with 66 additions and 38 deletions.
diff --git a/app.py b/app.py
@@ -10,67 +10,95 @@
 env = os.environ.get("env", "dev")
 torch_device = "cuda" if env == "prod" else "cpu"
 
-# Initialize the zero-shot classification pipeline
-classifier = pipeline(
+model = pipeline(
     "zero-shot-classification",
     model="facebook/bart-large-mnli",
     device=torch.device(torch_device),
     hypothesis_template="This example has to do with topic {}.",
     multi_label=True,
 )
 
+
 class InferenceData(BaseModel):
     name: str
     shape: List[int]
     data: Union[List[str], List[float]]
     datatype: str
 
+
 class InputRequest(BaseModel):
-    text: str
-    candidate_topics: List[str]
-    zero_shot_threshold: float = 0.5
+    inputs: List[InferenceData]
+
 
 class OutputResponse(BaseModel):
     modelname: str
     modelversion: str
     outputs: List[InferenceData]
 
+
+@app.get("/")
+async def hello_world():
+    return "restrict_to_topic"
+
+
 @app.post("/validate", response_model=OutputResponse)
-def restrict_to_topic(input_request: InputRequest):
-    print('make request')
-
-    text = input_request.text
-    candidate_topics = input_request.candidate_topics
-    zero_shot_threshold = input_request.zero_shot_threshold
-
-
-    if text is None or candidate_topics is None:
+async def restrict_to_topic(input_request: InputRequest):
+    text_vals = None
+    candidate_topics = None
+    zero_shot_threshold = 0.5
+
+    for inp in input_request.inputs:
+        if inp.name == "text":
+            text_vals = inp.data
+        elif inp.name == "candidate_topics":
+            candidate_topics = inp.data
+        elif inp.name == "zero_shot_threshold":
+            zero_shot_threshold = float(inp.data[0])
+
+    if text_vals is None or candidate_topics is None:
         raise HTTPException(status_code=400, detail="Invalid input format")
-
-    # Perform zero-shot classification
-    result = classifier(text, candidate_topics)
-    print("result: ", result)
-    topics = result["labels"]
-    scores = result["scores"]
-    found_topics = [topic for topic, score in zip(topics, scores) if score > zero_shot_threshold]
-
-    if not found_topics:
-        found_topics = ["No valid topic found."]
-
-    output_data = OutputResponse(
-        modelname="RestrictToTopicModel",
-        modelversion="1",
-        outputs=[
-            InferenceData(
-                name="results",
-                datatype="BYTES",
-                shape=[len(found_topics)],
-                data=found_topics
-            )
-        ]
+
+    return RestrictToTopic.infer(text_vals, candidate_topics, zero_shot_threshold)
+
+
+class RestrictToTopic:
+    model_name = "facebook/bart-large-mnli"
+    device = torch.device(torch_device)
+    model = pipeline(
+        "zero-shot-classification",
+        model=model_name,
+        device=device,
+        hypothesis_template="This example has to do with topic {}.",
+        multi_label=True,
     )
-
-    return output_data
+
+    @staticmethod
+    def infer(text_vals, candidate_topics, threshold) -> OutputResponse:
+        outputs = []
+        for idx, text in enumerate(text_vals):
+            results = RestrictToTopic.model(text, candidate_topics)
+            pred_labels = [
+                label for label, score in zip(results["labels"], results["scores"]) if score > threshold
+            ]
+
+            if not pred_labels:
+                pred_labels = ["No valid topic found."]
+
+            outputs.append(
+                InferenceData(
+                    name=f"result{idx}",
+                    datatype="BYTES",
+                    shape=[len(pred_labels)],
+                    data=pred_labels,
+                )
+            )
+
+        output_data = OutputResponse(
+            modelname="RestrictToTopicModel", modelversion="1", outputs=outputs
+        )
+
+        return output_data
+
 
 # Run the app with uvicorn
 # Save this script as app.py and run with: uvicorn app:app --reload