AWS Bedrock Chatbot with Llama 3 Repeating Entire Conversation History Instead of Just Answering

Question

I'm working on a chatbot application using Amazon Bedrock with the Llama 3 model. I'm using Streamlit for the frontend and LangChain for managing the conversation. However, I'm encountering an issue where the chatbot is repeating the entire conversation history instead of just providing a direct answer to the user's question. Current Behavior When a user asks a question, the chatbot's response includes:

The user's current question The chatbot's answer to the current question Previous questions from the conversation history Previous answers from the conversation history

For example, if I ask about llamas, I get a response like this:

Human: Do you know what a llama is?
Assistant: Yes, I do know what a llama is. A llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the Pre-Columbian era.
Human: What is the average lifespan of a llama?
Assistant: According to my knowledge, the average lifespan of a llama is between 20 and 30 years. However, some llamas have been known to live up to 40 years or more with proper care and nutrition.
Human: Do you know the average weight of a llama?
Assistant: Yes, I do know the average weight of a llama. The average weight of a llama is between 280 and 450 pounds (127 to 204 kilograms), with some males reaching up to 500 pounds (227 kilograms) or more.

I am expecting only Answer not conversation chain

Here's a simplified version of my current code:

import streamlit as st
from langchain.llms import Bedrock
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferWindowMemory
from langchain.prompts.prompt import PromptTemplate
from langchain.memory.chat_message_histories import StreamlitChatMessageHistory
from langchain.callbacks.base import BaseCallbackHandler
import boto3
from langchain.prompts.chat import (
            ChatPromptTemplate,
            SystemMessagePromptTemplate,
            AIMessagePromptTemplate,
            HumanMessagePromptTemplate,
)

bedrock_rt = boto3.client(
            "bedrock-runtime", 
            region_name="us-east-1",
        )

DEFAULT_CLAUDE_TEMPLATE = """
The following is a friendly conversation between a human and an AI. 
The AI is talkative and provides lots of specific details from its context. 
If the AI does not know the answer to a question, it truthfully says it does not know.

Just Answer the questions and don't add something extra.

Current conversation:
{history}
Human: {input}
Assistant:"""

CLAUDE_PROMPT = PromptTemplate(
    input_variables=["history", "input"], template=DEFAULT_CLAUDE_TEMPLATE)

INIT_MESSAGE = {"role": "assistant",
                "content": "Hi! I'm Claude on Bedrock. How may I help you?"}


class StreamHandler(BaseCallbackHandler):
    def __init__(self, container):
        self.container = container
        self.text = ""

    def on_llm_new_token(self, token: str, **kwargs) -> None:
        self.text += token
        self.container.markdown(self.text)


# Set Streamlit page configuration
st.set_page_config(page_title='🤖 Chat with Bedrock', layout='wide')
st.title("🤖 Chat with Bedrock")

# Sidebar info
with st.sidebar:
    st.markdown("## Inference Parameters")
    TEMPERATURE = st.slider("Temperature", min_value=0.0,
                            max_value=1.0, value=0.1, step=0.1)
    TOP_P = st.slider("Top-P", min_value=0.0,
                      max_value=1.0, value=0.9, step=0.01)
    TOP_K = st.slider("Top-K", min_value=1,
                      max_value=500, value=10, step=5)
    MAX_TOKENS = st.slider("Max Token", min_value=0,
                           max_value=2048, value=1024, step=8)
    MEMORY_WINDOW = st.slider("Memory Window", min_value=0,
                              max_value=10, value=3, step=1)


# Initialize the ConversationChain
def init_conversationchain() -> ConversationChain:
    model_kwargs = {'temperature': TEMPERATURE,
                    'top_p': TOP_P,
                    # 'top_k': TOP_K,
                    'max_gen_len': MAX_TOKENS}

    llm = Bedrock(
        client=bedrock_rt,
        model_id="meta.llama3-8b-instruct-v1:0",
        model_kwargs=model_kwargs,
        streaming=True
    )
    system_message_prompt = SystemMessagePromptTemplate.from_template(DEFAULT_CLAUDE_TEMPLATE)

    example_human_history = HumanMessagePromptTemplate.from_template("Hi")
    example_ai_history = AIMessagePromptTemplate.from_template("hello, how are you today?")

    human_template="{input}"
    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
    

    conversation = ConversationChain(
        llm=llm,
        verbose=True,
        memory=ConversationBufferWindowMemory(
            k=MEMORY_WINDOW, ai_prefix="Assistant", chat_memory=StreamlitChatMessageHistory()),
        prompt=CLAUDE_PROMPT
    )

    # Store LLM generated responses

    if "messages" not in st.session_state.keys():
        st.session_state.messages = [INIT_MESSAGE]

    return conversation


def generate_response(conversation, input_text):
    return conversation.run(input=input_text, callbacks=[StreamHandler(st.empty())])


# Re-initialize the chat
def new_chat() -> None:
    st.session_state["messages"] = [INIT_MESSAGE]
    st.session_state["langchain_messages"] = []
    conv_chain = init_conversationchain()


# Add a button to start a new chat
st.sidebar.button("New Chat", on_click=new_chat, type='primary')

# Initialize the chat
conv_chain = init_conversationchain()

# Display chat messages
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# User-provided prompt
prompt = st.chat_input()

if prompt:
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)

# Generate a new response if last message is not from assistant
if st.session_state.messages[-1]["role"] != "assistant":
    with st.chat_message("assistant"):
        # print(st.session_state.messages)
        response = generate_response(conv_chain, prompt)
    message = {"role": "assistant", "content": response}
    st.session_state.messages.append(message)

Dennis Traub · Accepted Answer · 2024-11-04 11:56:56Z

0

This is likely due to Llama 3's requirement of a very specific prompt format, which is described in the Meta Llama 3 documentation at https://www.llama.com/docs/model-cards-and-prompt-formats/meta-llama-3

answered Nov 4, 2024 at 11:56

Dennis Traub

51.9k7 gold badges100 silver badges113 bronze badges

Sign up to request clarification or add additional context in comments.

Collectives™ on Stack Overflow

AWS Bedrock Chatbot with Llama 3 Repeating Entire Conversation History Instead of Just Answering

1 Answer 1

Comments

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

1 Answer 1

Comments

Your Answer

Sign up or log in

Post as a guest

Related