from pyhere import here
import sys
import os
import pandas as pd
sys.path.append(os.path.abspath('../..')) # adding the absolute path
from openai import OpenAI
client = OpenAI()6 Custom Memory
6.1 Text Only Memory
6.1.0.1 Fn: Helper _get_messages_user_assistant()
from typing import List
import pandas as pd
def _get_messages_user_assistant_text(mem_df: pd.DataFrame,
user_key = "user",
assistant_key = "assistant") -> list[dict]:
"""Convert user-assistant DataFrame into a list of message dictionaries."""
messages = []
for i in range(len(mem_df)):
if mem_df[user_key][i]: # Check if 'user' cell is not empty
messages.append({"role": user_key, "content": mem_df[user_key][i]})
if mem_df[assistant_key][i]: # Check if 'assistant' cell is not empty
messages.append({"role": assistant_key, "content": mem_df[assistant_key][i]})
return messages
# Example usage
mem_df = pd.DataFrame({"user": ["Hi", "Nothing"], "assistant": ["How can I help you?", "Bye"]})
messages = _get_messages_user_assistant_text(mem_df)
print(messages)[{'role': 'user', 'content': 'Hi'}, {'role': 'assistant', 'content': 'How can I help you?'}, {'role': 'user', 'content': 'Nothing'}, {'role': 'assistant', 'content': 'Bye'}]
mem_df = pd.DataFrame({"user": ["Hi", "There"], "assistant": ["How can I...", "Hello"]})
mem_df.loc[1]
mem_df.loc[1, "assistant"] = "Ha"
# _get_messages_user_assistant(mem_df)Ha
6.1.0.2 Main Fn
from typing import List
import pandas as pd
def get_completions_text_mem_df(prompts: List[str], model = "gpt-4o"):
msg: List[dict[str, str]]
mem_df = pd.DataFrame({"user": [], "assistant": []})
for i in range(len(prompts)):
# Get user text
mem_df.loc[i] = [prompts[i], None]
# Convert to user-assistance messages
msg = _get_messages_user_assistant_text(mem_df)
# Generate Response
completion = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": "You are a helpful assistant."},
*msg
]
)
# response_text = "Resp: " + user_texts[i] # For Debug
response_text = completion.choices[0].message.content
# Update memory DF
mem_df.loc[i, "assistant"] = response_text
return mem_df
get_completions_text_mem_df(["Hi, my name is Kittipos.", "What's my name?"])| user | assistant | |
|---|---|---|
| 0 | Hi, my name is Kittipos. | Hello, Kittipos! How can I assist you today? |
| 1 | What's my name? | Your name is Kittipos. How can I help you furt... |
6.2 Vision Memory
6.2.0.1 Helper: _get_messages_user_assistant_text_image()
import pandas as pd
mem_vision_df = pd.DataFrame({
"user_text": ["Convert data from this image to markdown text"] * 2,
"user_image_url": ["url1", "url2"],
"assistant_text": ["out1", "out2"]
})
mem_vision_df| user_text | user_image_url | assistant_text | |
|---|---|---|---|
| 0 | Convert data from this image to markdown text | url1 | out1 |
| 1 | Convert data from this image to markdown text | url2 | out2 |
def _get_messages_user_assistant_text_image(mem_vision_df: pd.DataFrame, image_detail = "high") -> list[dict]:
"""Convert user (text + image) + assistant DataFrame into a list of message dictionaries."""
messages: List[dict[str, str | List]] = []
for i in range(len(mem_vision_df)):
user_text = mem_vision_df["user_text"][i]
user_image_url = mem_vision_df["user_image_url"][i]
assistant_text = mem_vision_df["assistant_text"][i]
if user_text and user_image_url:
messages.append({"role": "user",
"content": [
{"type": "text", "text": user_text},
{"type": "image_url", "image_url": {"url": user_image_url, "detail": image_detail}}
]})
if assistant_text:
messages.append({"role": "assistant", "content": assistant_text})
return messages
_get_messages_user_assistant_text_image(mem_vision_df)[{'role': 'user',
'content': [{'type': 'text',
'text': 'Convert data from this image to markdown text'},
{'type': 'image_url', 'image_url': {'url': 'url1', 'detail': 'high'}}]},
{'role': 'assistant', 'content': 'out1'},
{'role': 'user',
'content': [{'type': 'text',
'text': 'Convert data from this image to markdown text'},
{'type': 'image_url', 'image_url': {'url': 'url2', 'detail': 'high'}}]},
{'role': 'assistant', 'content': 'out2'}]
mem_vision_df| user_text | user_image_url | assistant_text | |
|---|---|---|---|
| 0 | Convert data from this image to markdown text | url1 | out1 |
| 1 | Convert data from this image to markdown text | url2 | out2 |
6.2.0.2 Main Fn
from typing import List
import pandas as pd
def get_completions_vision_mem_df(image_prompt: str,
image_prompt_next: str | None = None,
image_urls: List[str] | None = None,
base64_images: List[str] | None = None,
system_prompt: str = "You are a helpful assistant.",
model = "gpt-4o"):
msg: List[dict[str, str | List]] = []
mem_vision_df = pd.DataFrame({"user_text": [], "user_image_url": [], "assistant_text": []})
if all([base64_images, image_urls]):
raise ValueError("Must choose one of: `image_prompt` or `image_urls`")
# If Provided `base64_images`, ignore `image_urls`
if base64_images:
image_urls = [f"data:image/png;base64,{base64_image}" for base64_image in base64_images]
for i in range(len(image_urls)):
# For non-first iteration, if next image prompt is provided, use it.
if i != 0 and image_prompt_next:
image_prompt = image_prompt_next
# Add Image prompt and URL to Memory DF
mem_vision_df.loc[i] = [image_prompt, image_urls[i], None]
# Convert to user-assistance messages
msg = _get_messages_user_assistant_text_image(mem_vision_df)
# Generate Response
completion = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": system_prompt},
*msg
]
)
response_text = completion.choices[0].message.content
# response_text = "Resp: " + image_prompt # For Debug
# Update memory DF
mem_vision_df.loc[i, "assistant_text"] = response_text
return mem_vision_df6.2.1 Vision Mem Execute
# Image URL
resp_vision_df_1 = get_completions_vision_mem_df(image_prompt="Give exactly 3 words that gives impression to this image",
image_prompt_next="This too",
image_urls = ["https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
"https://upload.wikimedia.org/wikipedia/commons/f/f0/Hintersee-Hochkalter.jpg"]
)
resp_vision_df_1| user_text | user_image_url | assistant_text | |
|---|---|---|---|
| 0 | Give exactly 3 words that gives impression to ... | https://upload.wikimedia.org/wikipedia/commons... | Peaceful, serene, scenic. |
| 1 | This too | https://upload.wikimedia.org/wikipedia/commons... | Majestic, tranquil, natural. |
import base64
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')img_paths = ["../../img/misc/70.png", "../../img/misc/100.png"]
imgs_enc = [encode_image(img_path) for img_path in img_paths]resp_vision_df_2 = get_completions_vision_mem_df(image_prompt="What is the speed?",
system_prompt="""
You are the car speed reporter. Report the current speed and previous speed in the prior image (if provided).
""",
base64_images=imgs_enc)resp_vision_df_2["assistant_text"].to_list()['The current speed is 100 kilometers per hour.',
'The current speed is 110 kilometers per hour. The previous speed was 100 kilometers per hour.']