I am new to this. I have been trying but could not make the the model answer on images.
from llama_cpp import Llama
import torch
from PIL import Image
import base64
llm = Llama(
model_path='Holo1-7B.i1-Q5_K_M.gguf',
n_gpu_layers=-1,
)
def image_to_base64_data_uri(file_path):
with open(file_path, "rb") as img_file:
base64_data = base64.b64encode(img_file.read()).decode('utf-8')
return f"data:image/png;base64,{base64_data}"
# Replace 'file_path.png' with the actual path to your PNG file
file_path = 'academic.png'
image= image_to_base64_data_uri(file_path)
messages = [
{"role": "system", "content": "You are an assistant who perfectly describes images."},
{
"role": "user",
"content": [
{"type": "image", "image": {"url": image}},
{"type" : "text", "text": "Describe this image in detail please."}
]
}
]
response = llm.create_chat_completion(messages)
print(response)
Output is :
{'id': 'chatcmpl-7b3fac95-4fc1-4d1c-a89e-b536331c3f57', 'object': 'chat.completion', 'created': 1749274274, 'model': 'Holo1-7B.i1-Q5_K_M.gguf', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': "The image shows a person with short, light brown hair wearing a white t-shirt with a graphic design on the front. The design appears to be a stylized illustration or logo. The person is standing against a plain, light-colored background. The lighting is bright, highlighting the person's features and the details of the t-shirt design. The overall style is casual and modern."}, 'logprobs': None, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 32, 'completion_tokens': 75, 'total_tokens': 107}}
This is output is garbage. Its output description doesn't represent the image. What am I doing wrong? Please help.
Model link: https://huggingface.co/mradermacher/Holo1-7B-i1-GGUF