0

I'm working on a chatbot application using Amazon Bedrock with the Llama 3 model. I'm using Streamlit for the frontend and LangChain for managing the conversation. However, I'm encountering an issue where the chatbot is repeating the entire conversation history instead of just providing a direct answer to the user's question. Current Behavior When a user asks a question, the chatbot's response includes:

The user's current question The chatbot's answer to the current question Previous questions from the conversation history Previous answers from the conversation history

For example, if I ask about llamas, I get a response like this:

Human: Do you know what a llama is?
Assistant: Yes, I do know what a llama is. A llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the Pre-Columbian era.
Human: What is the average lifespan of a llama?
Assistant: According to my knowledge, the average lifespan of a llama is between 20 and 30 years. However, some llamas have been known to live up to 40 years or more with proper care and nutrition.
Human: Do you know the average weight of a llama?
Assistant: Yes, I do know the average weight of a llama. The average weight of a llama is between 280 and 450 pounds (127 to 204 kilograms), with some males reaching up to 500 pounds (227 kilograms) or more.

I am expecting only Answer not conversation chain

Here's a simplified version of my current code:

import streamlit as st
from langchain.llms import Bedrock
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferWindowMemory
from langchain.prompts.prompt import PromptTemplate
from langchain.memory.chat_message_histories import StreamlitChatMessageHistory
from langchain.callbacks.base import BaseCallbackHandler
import boto3
from langchain.prompts.chat import (
            ChatPromptTemplate,
            SystemMessagePromptTemplate,
            AIMessagePromptTemplate,
            HumanMessagePromptTemplate,
)

bedrock_rt = boto3.client(
            "bedrock-runtime", 
            region_name="us-east-1",
        )

DEFAULT_CLAUDE_TEMPLATE = """
The following is a friendly conversation between a human and an AI. 
The AI is talkative and provides lots of specific details from its context. 
If the AI does not know the answer to a question, it truthfully says it does not know.

Just Answer the questions and don't add something extra.

Current conversation:
{history}
Human: {input}
Assistant:"""

CLAUDE_PROMPT = PromptTemplate(
    input_variables=["history", "input"], template=DEFAULT_CLAUDE_TEMPLATE)

INIT_MESSAGE = {"role": "assistant",
                "content": "Hi! I'm Claude on Bedrock. How may I help you?"}


class StreamHandler(BaseCallbackHandler):
    def __init__(self, container):
        self.container = container
        self.text = ""

    def on_llm_new_token(self, token: str, **kwargs) -> None:
        self.text += token
        self.container.markdown(self.text)


# Set Streamlit page configuration
st.set_page_config(page_title='🤖 Chat with Bedrock', layout='wide')
st.title("🤖 Chat with Bedrock")

# Sidebar info
with st.sidebar:
    st.markdown("## Inference Parameters")
    TEMPERATURE = st.slider("Temperature", min_value=0.0,
                            max_value=1.0, value=0.1, step=0.1)
    TOP_P = st.slider("Top-P", min_value=0.0,
                      max_value=1.0, value=0.9, step=0.01)
    TOP_K = st.slider("Top-K", min_value=1,
                      max_value=500, value=10, step=5)
    MAX_TOKENS = st.slider("Max Token", min_value=0,
                           max_value=2048, value=1024, step=8)
    MEMORY_WINDOW = st.slider("Memory Window", min_value=0,
                              max_value=10, value=3, step=1)


# Initialize the ConversationChain
def init_conversationchain() -> ConversationChain:
    model_kwargs = {'temperature': TEMPERATURE,
                    'top_p': TOP_P,
                    # 'top_k': TOP_K,
                    'max_gen_len': MAX_TOKENS}

    llm = Bedrock(
        client=bedrock_rt,
        model_id="meta.llama3-8b-instruct-v1:0",
        model_kwargs=model_kwargs,
        streaming=True
    )
    system_message_prompt = SystemMessagePromptTemplate.from_template(DEFAULT_CLAUDE_TEMPLATE)

    example_human_history = HumanMessagePromptTemplate.from_template("Hi")
    example_ai_history = AIMessagePromptTemplate.from_template("hello, how are you today?")

    human_template="{input}"
    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
    

    conversation = ConversationChain(
        llm=llm,
        verbose=True,
        memory=ConversationBufferWindowMemory(
            k=MEMORY_WINDOW, ai_prefix="Assistant", chat_memory=StreamlitChatMessageHistory()),
        prompt=CLAUDE_PROMPT
    )

    # Store LLM generated responses

    if "messages" not in st.session_state.keys():
        st.session_state.messages = [INIT_MESSAGE]

    return conversation


def generate_response(conversation, input_text):
    return conversation.run(input=input_text, callbacks=[StreamHandler(st.empty())])


# Re-initialize the chat
def new_chat() -> None:
    st.session_state["messages"] = [INIT_MESSAGE]
    st.session_state["langchain_messages"] = []
    conv_chain = init_conversationchain()


# Add a button to start a new chat
st.sidebar.button("New Chat", on_click=new_chat, type='primary')

# Initialize the chat
conv_chain = init_conversationchain()

# Display chat messages
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# User-provided prompt
prompt = st.chat_input()

if prompt:
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)

# Generate a new response if last message is not from assistant
if st.session_state.messages[-1]["role"] != "assistant":
    with st.chat_message("assistant"):
        # print(st.session_state.messages)
        response = generate_response(conv_chain, prompt)
    message = {"role": "assistant", "content": response}
    st.session_state.messages.append(message)

1 Answer 1

0

This is likely due to Llama 3's requirement of a very specific prompt format, which is described in the Meta Llama 3 documentation at https://www.llama.com/docs/model-cards-and-prompt-formats/meta-llama-3

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.