Moved hf_pipeline_llama2 under configs/llm.

drazvan · drazvan · commit d5d21fa41e1b · 2023-11-03T21:59:26.000+02:00
diff --git a/examples/__init__.py b/examples/__init__.py
@@ -0,0 +1,14 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/examples/configs/llm/__init__.py b/examples/configs/llm/__init__.py
@@ -0,0 +1,14 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/examples/configs/llm/hf_pipeline_llama2/README.md b/examples/configs/llm/hf_pipeline_llama2/README.md
@@ -11,10 +11,10 @@ Please install additional package via :
 `pip install accelerate transformers==4.33.1 --upgrade`
 
 
-The `meta-llama/Llama-2-13b-chat-hf` LLM model has been tested on the topical rails evaluation sets, results are available [here](../../../nemoguardrails/eval/README.md).
+The `meta-llama/Llama-2-13b-chat-hf` LLM model has been tested on the topical rails evaluation sets, results are available [here](../../../../nemoguardrails/eval/README.md).
 We have also tested the factchecking rail for the same model with good results.
 There are examples on how to use the models with a HF repo id or from a local path.
 
 In this folder, the guardrails application is very basic, but anyone can change it with any other more complex configuration.
 
-**Disclaimer**: The `meta-llama/Llama-2-13b-chat-hf` LLM on tested on basic usage combining a toy example of a knowledge base, further experiments of prompt engineering needs to be done on [fact-checking](./config.yml#L133-142) for more complex queries as this model may not work correctly. Thorough testing and optimizations are needed before considering a production deployment.
+**Disclaimer**: The `meta-llama/Llama-2-13b-chat-hf` LLM on tested on basic usage combining a toy example of a knowledge base, further experiments of prompt engineering needs to be done on [fact-checking](config.yml#L133-142) for more complex queries as this model may not work correctly. Thorough testing and optimizations are needed before considering a production deployment.
diff --git a/examples/configs/llm/hf_pipeline_llama2/__init__.py b/examples/configs/llm/hf_pipeline_llama2/__init__.py
@@ -0,0 +1,14 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/examples/configs/llm/hf_pipeline_llama2/config.py b/examples/configs/llm/hf_pipeline_llama2/config.py
@@ -12,22 +12,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
 import os.path
-import pickle
-from pathlib import Path
-from typing import Optional
-import os,sys
+
 import torch
 from langchain import HuggingFacePipeline
-from langchain.chains import RetrievalQA
-from langchain.embeddings import HuggingFaceEmbeddings
-from langchain.llms import BaseLLM
-from langchain.text_splitter import CharacterTextSplitter
-from langchain.vectorstores import FAISS
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+
 from nemoguardrails import LLMRails, RailsConfig
-from nemoguardrails.actions import action
-from nemoguardrails.actions.actions import ActionResult
 from nemoguardrails.llm.helpers import get_llm_instance_wrapper
 from nemoguardrails.llm.providers import register_llm_provider
 
@@ -39,7 +31,7 @@ def _get_model_config(config: RailsConfig, type: str):
             return model_config
 
 
-def _load_model(model_name_or_path, device, num_gpus,hf_auth_token=None, debug=False):
+def _load_model(model_name_or_path, device, num_gpus, hf_auth_token=None, debug=False):
     """Load an HF locally saved checkpoint."""
     if device == "cpu":
         kwargs = {}
@@ -69,9 +61,14 @@ def _load_model(model_name_or_path, device, num_gpus,hf_auth_token=None, debug=F
             model_name_or_path, low_cpu_mem_usage=True, **kwargs
         )
     else:
-        tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_auth_token=hf_auth_token, use_fast=False)
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_name_or_path, use_auth_token=hf_auth_token, use_fast=False
+        )
         model = AutoModelForCausalLM.from_pretrained(
-            model_name_or_path, low_cpu_mem_usage=True,use_auth_token=hf_auth_token, **kwargs
+            model_name_or_path,
+            low_cpu_mem_usage=True,
+            use_auth_token=hf_auth_token,
+            **kwargs,
         )
 
     if device == "cuda" and num_gpus == 1:
@@ -100,8 +97,12 @@ def init_main_llm(config: RailsConfig):
     model_path = model_config.parameters.get("path")
     device = model_config.parameters.get("device", "cuda")
     num_gpus = model_config.parameters.get("num_gpus", 1)
-    hf_token=os.environ["HF_TOKEN"] # [TODO] to register this into the config.yaml as well
-    model, tokenizer = _load_model(model_path, device, num_gpus,hf_auth_token=hf_token, debug=False)
+    hf_token = os.environ[
+        "HF_TOKEN"
+    ]  # [TODO] to register this into the config.yaml as well
+    model, tokenizer = _load_model(
+        model_path, device, num_gpus, hf_auth_token=hf_token, debug=False
+    )
 
     # repo_id="TheBloke/Wizard-Vicuna-13B-Uncensored-HF"
     # pipe = pipeline("text-generation", model=repo_id, device_map={"":"cuda:0"}, max_new_tokens=256, temperature=0.1, do_sample=True,use_cache=True)
diff --git a/examples/configs/llm/hf_pipeline_llama2/config.yml b/examples/configs/llm/hf_pipeline_llama2/config.yml
@@ -138,5 +138,5 @@ prompts:
       You are given a task to identify if the hypothesis is grounded and entailed to the evidence.
       You will only use the contents of the evidence and not rely on external knowledge.
       <</SYS>>
-      
+
       [INST]Answer with yes/no. "evidence": {{ evidence }} "hypothesis": {{ response }} "entails":[/INST]
diff --git a/examples/configs/llm/hf_pipeline_llama2/factcheck.co b/examples/configs/llm/hf_pipeline_llama2/factcheck.co
diff --git a/examples/configs/llm/hf_pipeline_llama2/general.co b/examples/configs/llm/hf_pipeline_llama2/general.co
@@ -38,7 +38,7 @@ define user ask additional help
 define bot offer additional help
   "If you have any more questions or if there's anything else I can help you with, please don't hesitate to ask."
 
-define flow 
+define flow
   user ask additional help
   bot offer additional help
 
diff --git a/examples/configs/llm/hf_pipeline_llama2/kb/report.md b/examples/configs/llm/hf_pipeline_llama2/kb/report.md