6 Custom Memory

from pyhere import here
import sys
import os
import pandas as pd
sys.path.append(os.path.abspath('../..')) # adding the absolute path

from openai import OpenAI
client = OpenAI()

6.1 Text Only Memory

6.1.0.1 Fn: Helper `_get_messages_user_assistant()`

from typing import List
import pandas as pd


def _get_messages_user_assistant_text(mem_df: pd.DataFrame, 
                                 user_key = "user", 
                                 assistant_key = "assistant") -> list[dict]:
    """Convert user-assistant DataFrame into a list of message dictionaries."""
    messages = []
    for i in range(len(mem_df)):
        if mem_df[user_key][i]:  # Check if 'user' cell is not empty
            messages.append({"role": user_key, "content": mem_df[user_key][i]})
        if mem_df[assistant_key][i]:  # Check if 'assistant' cell is not empty
            messages.append({"role": assistant_key, "content": mem_df[assistant_key][i]})
    return messages

# Example usage
mem_df = pd.DataFrame({"user": ["Hi", "Nothing"], "assistant": ["How can I help you?", "Bye"]})
messages = _get_messages_user_assistant_text(mem_df)
print(messages)

[{'role': 'user', 'content': 'Hi'}, {'role': 'assistant', 'content': 'How can I help you?'}, {'role': 'user', 'content': 'Nothing'}, {'role': 'assistant', 'content': 'Bye'}]

mem_df = pd.DataFrame({"user": ["Hi", "There"], "assistant": ["How can I...", "Hello"]})
mem_df.loc[1]

mem_df.loc[1, "assistant"] = "Ha"
# _get_messages_user_assistant(mem_df)

Ha

6.1.0.2 Main Fn

from typing import List
import pandas as pd

def get_completions_text_mem_df(prompts: List[str], model = "gpt-4o"):
    
    msg: List[dict[str, str]]
    mem_df = pd.DataFrame({"user": [], "assistant": []})
    
    for i in range(len(prompts)):
        # Get user text 
        mem_df.loc[i] = [prompts[i], None] 
        # Convert to user-assistance messages 
        msg = _get_messages_user_assistant_text(mem_df)
        # Generate Response
        completion = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                *msg
            ]
        )
        # response_text = "Resp: " + user_texts[i] # For Debug
        response_text = completion.choices[0].message.content
        # Update memory DF
        mem_df.loc[i, "assistant"] = response_text
        
    return mem_df
    
get_completions_text_mem_df(["Hi, my name is Kittipos.", "What's my name?"])

	user	assistant
0	Hi, my name is Kittipos.	Hello, Kittipos! How can I assist you today?
1	What's my name?	Your name is Kittipos. How can I help you furt...

6.2 Vision Memory

6.2.0.1 Helper: `_get_messages_user_assistant_text_image()`

import pandas as pd

mem_vision_df = pd.DataFrame({
    "user_text": ["Convert data from this image to markdown text"] * 2,
    "user_image_url": ["url1", "url2"],
    "assistant_text": ["out1", "out2"]
}) 
mem_vision_df

	user_text	user_image_url	assistant_text
0	Convert data from this image to markdown text	url1	out1
1	Convert data from this image to markdown text	url2	out2

def _get_messages_user_assistant_text_image(mem_vision_df: pd.DataFrame, image_detail = "high") -> list[dict]:
    """Convert user (text + image) + assistant DataFrame into a list of message dictionaries."""
    
    messages: List[dict[str, str | List]] = []
    
    for i in range(len(mem_vision_df)):
        
        user_text = mem_vision_df["user_text"][i]
        user_image_url = mem_vision_df["user_image_url"][i]
        assistant_text = mem_vision_df["assistant_text"][i]
        
        if user_text and user_image_url:
            messages.append({"role": "user", 
                             "content": [
                                 {"type": "text", "text": user_text},
                                 {"type": "image_url", "image_url": {"url": user_image_url, "detail": image_detail}}
                             ]})
        if assistant_text:
            messages.append({"role": "assistant", "content": assistant_text})
            
    return messages

_get_messages_user_assistant_text_image(mem_vision_df)

[{'role': 'user',
  'content': [{'type': 'text',
    'text': 'Convert data from this image to markdown text'},
   {'type': 'image_url', 'image_url': {'url': 'url1', 'detail': 'high'}}]},
 {'role': 'assistant', 'content': 'out1'},
 {'role': 'user',
  'content': [{'type': 'text',
    'text': 'Convert data from this image to markdown text'},
   {'type': 'image_url', 'image_url': {'url': 'url2', 'detail': 'high'}}]},
 {'role': 'assistant', 'content': 'out2'}]

mem_vision_df

	user_text	user_image_url	assistant_text
0	Convert data from this image to markdown text	url1	out1
1	Convert data from this image to markdown text	url2	out2

6.2.0.2 Main Fn

from typing import List
import pandas as pd

def get_completions_vision_mem_df(image_prompt: str, 
                                  image_prompt_next: str | None = None,
                                  image_urls: List[str] | None = None, 
                                  base64_images: List[str] | None = None, 
                                  system_prompt: str = "You are a helpful assistant.",
                                  model = "gpt-4o"):
    
    msg: List[dict[str, str | List]] = []
    mem_vision_df = pd.DataFrame({"user_text": [], "user_image_url": [], "assistant_text": []})
    
    if all([base64_images, image_urls]):
        raise ValueError("Must choose one of: `image_prompt` or `image_urls`")
    
    # If Provided `base64_images`, ignore `image_urls`
    if base64_images:
        image_urls = [f"data:image/png;base64,{base64_image}" for base64_image in base64_images]
    
    for i in range(len(image_urls)):
        
        # For non-first iteration, if next image prompt is provided, use it.
        if i != 0 and image_prompt_next:
            image_prompt = image_prompt_next
        
        # Add Image prompt and URL to Memory DF
        mem_vision_df.loc[i] = [image_prompt, image_urls[i], None]
        
        # Convert to user-assistance messages
        msg = _get_messages_user_assistant_text_image(mem_vision_df)
        
        # Generate Response
        completion = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": system_prompt},
                *msg
            ]
        )
        response_text = completion.choices[0].message.content
        # response_text = "Resp: " + image_prompt # For Debug
        
        # Update memory DF
        mem_vision_df.loc[i, "assistant_text"] = response_text 
        
    return mem_vision_df

6.2.1 Vision Mem Execute

# Image URL

resp_vision_df_1 = get_completions_vision_mem_df(image_prompt="Give exactly 3 words that gives impression to this image",
                                                 image_prompt_next="This too",
                                               image_urls = ["https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg", 
                                            "https://upload.wikimedia.org/wikipedia/commons/f/f0/Hintersee-Hochkalter.jpg"] 
                              )
resp_vision_df_1

	user_text	user_image_url	assistant_text
0	Give exactly 3 words that gives impression to ...	https://upload.wikimedia.org/wikipedia/commons...	Peaceful, serene, scenic.
1	This too	https://upload.wikimedia.org/wikipedia/commons...	Majestic, tranquil, natural.

import base64

def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

img_paths = ["../../img/misc/70.png", "../../img/misc/100.png"]
imgs_enc = [encode_image(img_path) for img_path in img_paths]

resp_vision_df_2 = get_completions_vision_mem_df(image_prompt="What is the speed?", 
                                                 system_prompt="""
                                                 You are the car speed reporter. Report the current speed and previous speed in the prior image (if provided).
                                                 """,
                                                 base64_images=imgs_enc)

resp_vision_df_2["assistant_text"].to_list()

['The current speed is 100 kilometers per hour.',
 'The current speed is 110 kilometers per hour. The previous speed was 100 kilometers per hour.']