Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add multi-language system prompts and BedrockChatAdapter implementation #576

Merged
merged 13 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -458,3 +458,6 @@ lib/user-interface/react-app/src/graphql/subscriptions.ts
# js function
!lib/authentication/lambda/updateUserPoolClient/index.js
!lib/authentication/lambda/updateOidcSecret/index.js
/.project
/.pydevproject
/outputs.json
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,12 @@ def on_llm_end(
"total_tokens": 0,
}
self.usage = {
"input_tokens": self.usage.get("input_tokens")
+ generation.message.usage_metadata.get("input_tokens"),
"output_tokens": self.usage.get("output_tokens")
+ generation.message.usage_metadata.get("output_tokens"),
"total_tokens": self.usage.get("total_tokens")
+ generation.message.usage_metadata.get("total_tokens"),
"input_tokens": self.usage.get("input_tokens", 0)
+ generation.message.usage_metadata.get("input_tokens", 0),
"output_tokens": self.usage.get("output_tokens", 0)
+ generation.message.usage_metadata.get("output_tokens", 0),
"total_tokens": self.usage.get("total_tokens", 0)
+ generation.message.usage_metadata.get("total_tokens", 0),
}


Expand Down Expand Up @@ -199,7 +199,7 @@ def run_with_chain_v2(self, user_prompt, workspace_id=None):
input={"input": user_prompt}, config=config
)
if "answer" in response:
answer = response.get("answer") # Rag flow
answer = response.get("answer") # RAG flow
else:
answer = response.content
except Exception as e:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# flake8: noqa
from .base import *
from adapters.bedrock.base import *
Original file line number Diff line number Diff line change
@@ -1,97 +1,128 @@
import os
from typing import Any, List

from ..base import ModelAdapter
from genai_core.registry import registry
import genai_core.clients

from aws_lambda_powertools import Logger

from typing import Any, List
from adapters.base import ModelAdapter
from genai_core.registry import registry
from langchain_core.messages import BaseMessage
from langchain_core.messages.ai import AIMessage
from langchain_core.messages.human import HumanMessage
from langchain_aws import ChatBedrockConverse
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.prompts.prompt import PromptTemplate
from adapters.shared.prompts.system_prompts import (
prompts,
locale,
) # Import prompts and language

logger = Logger()

# Setting programmatic log level
# logger.setLevel("DEBUG")


def get_guardrails() -> dict:
if "BEDROCK_GUARDRAILS_ID" in os.environ:
logger.debug("Guardrails ID found in environment variables.")
return {
"guardrailIdentifier": os.environ["BEDROCK_GUARDRAILS_ID"],
"guardrailVersion": os.environ.get("BEDROCK_GUARDRAILS_VERSION", "DRAFT"),
}
logger.debug("No guardrails ID found.")
return {}


class BedrockChatAdapter(ModelAdapter):
def __init__(self, model_id, *args, **kwargs):
self.model_id = model_id

logger.info(f"Initializing BedrockChatAdapter with model_id: {model_id}")
super().__init__(*args, **kwargs)

def get_qa_prompt(self):
system_prompt = (
"Use the following pieces of context to answer the question at the end."
" If you don't know the answer, just say that you don't know, "
"don't try to make up an answer. \n\n{context}"
# Fetch the QA prompt based on the current language
qa_system_prompt = prompts[locale]["qa_prompt"]
# Append the context placeholder if needed
qa_system_prompt_with_context = qa_system_prompt + "\n\n{context}"
logger.info(
f"Generating QA prompt template with: {qa_system_prompt_with_context}"
)
return ChatPromptTemplate.from_messages(

# Create the ChatPromptTemplate
chat_prompt_template = ChatPromptTemplate.from_messages(
[
("system", system_prompt),
("system", qa_system_prompt_with_context),
MessagesPlaceholder("chat_history"),
("human", "{input}"),
]
)

# Trace the ChatPromptTemplate by logging its content
logger.debug(f"ChatPromptTemplate messages: {chat_prompt_template.messages}")

return chat_prompt_template

def get_prompt(self):
prompt_template = ChatPromptTemplate(
# Fetch the conversation prompt based on the current language
conversation_prompt = prompts[locale]["conversation_prompt"]
logger.info("Generating general conversation prompt template.")
chat_prompt_template = ChatPromptTemplate.from_messages(
[
(
"system",
(
"The following is a friendly conversation between "
"a human and an AI."
"If the AI does not know the answer to a question, it "
"truthfully says it does not know."
),
),
("system", conversation_prompt),
MessagesPlaceholder(variable_name="chat_history"),
("human", "{input}"),
]
)

return prompt_template
# Trace the ChatPromptTemplate by logging its content
logger.debug(f"ChatPromptTemplate messages: {chat_prompt_template.messages}")
return chat_prompt_template

def get_condense_question_prompt(self):
contextualize_q_system_prompt = (
"Given the following conversation and a follow up"
" question, rephrase the follow up question to be a standalone question."
)
return ChatPromptTemplate.from_messages(
# Fetch the prompt based on the current language
condense_question_prompt = prompts[locale]["condense_question_prompt"]
logger.info("Generating condense question prompt template.")
chat_prompt_template = ChatPromptTemplate.from_messages(
[
("system", contextualize_q_system_prompt),
("system", condense_question_prompt),
MessagesPlaceholder("chat_history"),
("human", "{input}"),
]
)
# Trace the ChatPromptTemplate by logging its content
logger.debug(f"ChatPromptTemplate messages: {chat_prompt_template.messages}")
return chat_prompt_template

def get_llm(self, model_kwargs={}, extra={}):
bedrock = genai_core.clients.get_bedrock_client()
params = {}
if "temperature" in model_kwargs:
params["temperature"] = model_kwargs["temperature"]
if "topP" in model_kwargs:
params["top_p"] = model_kwargs["topP"]
if "maxTokens" in model_kwargs:
params["max_tokens"] = model_kwargs["maxTokens"]

# Collect temperature, topP, and maxTokens if available
temperature = model_kwargs.get("temperature")
top_p = model_kwargs.get("topP")
max_tokens = model_kwargs.get("maxTokens")

if temperature is not None:
params["temperature"] = temperature
if top_p:
params["top_p"] = top_p
if max_tokens:
params["max_tokens"] = max_tokens

# Fetch guardrails if any
guardrails = get_guardrails()
if len(guardrails.keys()) > 0:
params["guardrails"] = guardrails

# Log all parameters in a single log entry, including full guardrails
logger.info(
f"Creating LLM chain for model {self.model_id}",
model_kwargs=model_kwargs,
temperature=temperature,
top_p=top_p,
max_tokens=max_tokens,
guardrails=guardrails,
)

# Return ChatBedrockConverse instance with the collected params
return ChatBedrockConverse(
client=bedrock,
model=self.model_id,
Expand All @@ -107,47 +138,102 @@ class BedrockChatNoStreamingAdapter(BedrockChatAdapter):
"""Some models do not support system streaming using the converse API"""

def __init__(self, *args, **kwargs):
logger.info(
"Initializing BedrockChatNoStreamingAdapter with disabled streaming."
)
super().__init__(disable_streaming=True, *args, **kwargs)


class BedrockChatNoSystemPromptAdapter(BedrockChatAdapter):
"""Some models do not support system and message history in the conversion API"""
"""Some models do not support system and message history in the conversation API"""

def get_prompt(self):
template = """The following is a friendly conversation between a human and an AI. If the AI does not know the answer to a question, it truthfully says it does not know.
# Fetch the conversation prompt and translated
# words based on the current language
conversation_prompt = prompts[locale]["conversation_prompt"]
question_word = prompts[locale]["question_word"]
assistant_word = prompts[locale]["assistant_word"]
logger.info("Generating no-system-prompt template for conversation.")

# Combine conversation prompt, chat history, and input into the template
template = f"""{conversation_prompt}

{{chat_history}}

Current conversation:
{chat_history}
{question_word}: {{input}}

Question: {input}
{assistant_word}:"""

Assistant:""" # noqa: E501
return PromptTemplateWithHistory(
template=template, input_variables=["input", "chat_history"]
# Create the PromptTemplateWithHistory instance
prompt_template = PromptTemplateWithHistory(
input_variables=["input", "chat_history"], template=template
)

# Log the content of PromptTemplateWithHistory before returning
logger.debug(f"PromptTemplateWithHistory template: {prompt_template.template}")

return prompt_template

def get_condense_question_prompt(self):
template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {input}
Standalone question:""" # noqa: E501
return PromptTemplateWithHistory(
template=template, input_variables=["input", "chat_history"]
# Fetch the prompt and translated words based on the current language
condense_question_prompt = prompts[locale]["condense_question_prompt"]
logger.debug(f"condense_question_prompt: {condense_question_prompt}")

follow_up_input_word = prompts[locale]["follow_up_input_word"]
logger.debug(f"follow_up_input_word: {follow_up_input_word}")

standalone_question_word = prompts[locale]["standalone_question_word"]
logger.debug(f"standalone_question_word: {standalone_question_word}")

chat_history_word = prompts[locale]["chat_history_word"]
logger.debug(f"chat_history_word: {chat_history_word}")

logger.debug("Generating no-system-prompt template for condensing question.")

# Combine the prompt with placeholders
template = f"""{condense_question_prompt}
{chat_history_word}: {{chat_history}}
{follow_up_input_word}: {{input}}
{standalone_question_word}:"""
# Log the content of template
logger.debug(f"get_condense_question_prompt: Template content: {template}")
# Create the PromptTemplateWithHistory instance
prompt_template = PromptTemplateWithHistory(
input_variables=["input", "chat_history"], template=template
)

# Log the content of PromptTemplateWithHistory before returning
logger.debug(f"PromptTemplateWithHistory template: {prompt_template.template}")

return prompt_template

def get_qa_prompt(self):
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
# Fetch the QA prompt and translated words based on the current language
qa_system_prompt = prompts[locale]["qa_prompt"]
question_word = prompts[locale]["question_word"]
helpful_answer_word = prompts[locale]["helpful_answer_word"]
logger.info("Generating no-system-prompt QA template.")

{context}
# Append the context placeholder if needed

Question: {input}
Helpful Answer:""" # noqa: E501
return PromptTemplateWithHistory(
template=template, input_variables=["input", "content"]
# Combine the prompt with placeholders
template = f"""{qa_system_prompt}

{{context}}

{question_word}: {{input}}
{helpful_answer_word}:"""

# Create the PromptTemplateWithHistory instance
prompt_template = PromptTemplateWithHistory(
input_variables=["input", "context"], template=template
)

# Log the content of PromptTemplateWithHistory before returning
logger.debug(f"PromptTemplateWithHistory template: {prompt_template.template}")

return prompt_template


class BedrockChatNoStreamingNoSystemPromptAdapter(BedrockChatNoSystemPromptAdapter):
"""Some models do not support system streaming using the converse API"""
Expand All @@ -164,26 +250,11 @@ def __init__(self, *args, **kwargs):
)
registry.register(r"^bedrock\.cohere\.command-r.*", BedrockChatAdapter)
registry.register(r"^bedrock.anthropic.claude*", BedrockChatAdapter)
registry.register(
r"^bedrock.meta.llama*",
BedrockChatAdapter,
)
registry.register(
r"^bedrock.mistral.mistral-large*",
BedrockChatAdapter,
)
registry.register(
r"^bedrock.mistral.mistral-small*",
BedrockChatAdapter,
)
registry.register(
r"^bedrock.mistral.mistral-7b-*",
BedrockChatNoSystemPromptAdapter,
)
registry.register(
r"^bedrock.mistral.mixtral-*",
BedrockChatNoSystemPromptAdapter,
)
registry.register(r"^bedrock.meta.llama*", BedrockChatAdapter)
registry.register(r"^bedrock.mistral.mistral-large*", BedrockChatAdapter)
registry.register(r"^bedrock.mistral.mistral-small*", BedrockChatAdapter)
registry.register(r"^bedrock.mistral.mistral-7b-*", BedrockChatNoSystemPromptAdapter)
registry.register(r"^bedrock.mistral.mixtral-*", BedrockChatNoSystemPromptAdapter)
registry.register(r"^bedrock.amazon.titan-t*", BedrockChatNoSystemPromptAdapter)


Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
from langchain_openai import ChatOpenAI
from ..base import ModelAdapter
from adapters.base import ModelAdapter
from genai_core.registry import registry


Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
# flake8: noqa
from .meta.llama2_chat import *
from .prompts.system_prompts import *
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# flake8: noqa
from .system_prompts import *
Loading
Loading