Google Gen AI 5-Day Intensive: Day One (1/5)

Codelab 2/2 from day one. The code lab is here in Kaggle. Download the code on Github to run it locally.
"""
Prompting

Google Gen AI 5-Day Intensive Course
Host: Kaggle

Day: 1

Codelab: https://www.kaggle.com/code/reniseblack/day-1-evaluation-and-structured-output/edit
"""

import enum
import io
import os
from pprint import pprint

import typing_extensions as typing
from google import genai
from google.api_core import retry
from google.genai import types
from IPython.display import Markdown, clear_output, display

is_retriable = lambda e: (  # noqa: E731
    isinstance(e, genai.errors.APIError) and e.code in {429, 503})

genai.models.Models.generate_content = retry.Retry(
    predicate=is_retriable)(genai.models.Models.generate_content)

client = genai.Client(api_key=os.environ["GOOGLE_API_KEY"])

# Create and send a single-turn prompt
MODEL = "gemini-2.0-flash"
prompt = "Explain AI to me like I'm a kid."
response = client.models.generate_content(
    model=MODEL,
    contents=prompt
)

# Get the response text then render as HTML (optional)
response_text = response.text
print("Model response: \n\n", Markdown(response.text))
section_break = "----------------------------"
print(section_break, "\n\nStarting a multi-turn chat ...\n\n")


# Start a multi-turn chat
chat_message = "Hello! My name is Zlork."
chat = client.chats.create(model=MODEL, history=[])
response = chat.send_message(chat_message)

print(response.text)

chat_message2 = "Can you tell me something interesting about dinosaurs?"
response = chat.send_message(chat_message2)

print(response.text)

chat_message3 = "Do you remember my name?"
response = chat.send_message(chat_message3)

print(response.text)


# See the list of available models
print("\n\nGetting a list of available models ... \n\n")

for model in client.models.list():
    print(model.name)

print("\n\nGetting a list of gemini-2.0-flash models ... \n\n")

for model in client.models.list():
    if model.name == 'models/gemini-2.0-flash':
        pprint(model.to_json_dict())
        break
        

# Set token limit to restrict output length
print("\n\nSetting a token limit ... \n\n")
short_config = types.GenerateContentConfig(max_output_tokens=200)
PROMPT_2 = ('Write a 1000 word essay on the importance of olives in modern '
            'society.')
response = client.models.generate_content(
    model=MODEL,
    config=short_config,
    contents=PROMPT_2
)

print(response.text)
print("\n\n")

PROMPT_3 = "Write a short poem on the importance of olives in modern society."
# config is where you specify top_k, temperature, etc
response = client.models.generate_content(
    model=MODEL,
    config=short_config,
    contents=PROMPT_3
)

print(response.text)
print("\n\n")

short_config = types.GenerateContentConfig(max_output_tokens=100)
response = client.models.generate_content(
    model=MODEL,
    config=short_config,
    contents='Write a 150 opinion about the importance of pine trees.')

print(response.text)
print("\n\n")
print("\n\nSetting a temperature limit ... \n\n")

# Set a temperature that helps manage the randomness
high_temp_config = types.GenerateContentConfig(temperature=2.0)
PROMPT_4 = "Pick a random colour... (respond in a single word)"
for _ in range (5):
    response = client.models.generate_content(
        model=MODEL,
        config=high_temp_config,
        contents = PROMPT_4
    )

    if response.text:
        print(response.text, "-" * 25)
    
print("\n\n")
print("\n\nSetting a low temperature ... \n\n")

# Setting a low temperature
low_temp_config = types.GenerateContentConfig(temperature=0.0)
for _ in range(5):
    response = client.models.generate_content(
        model=MODEL,
        config=low_temp_config,
        contents = PROMPT_4
    )
    
    if response.text:
        print(response.text, "-" * 25)
    
print("\n\n")
print("\n\nSetting top-p ... \n\n")

# Setting top-p that also controls output diversity
# It will determine when to stop selecting tokens that are least probable
TEMPERATURE_2 = 1.0
TOP_P1 = 0.05
model_config = types.GenerateContentConfig(
    # These are default gemini-2.0-flash values
    temperature=TEMPERATURE_2,
    top_p=TOP_P1
)

STORY_PROMPT = ("You are a creative writer. Write a short story about a cat"\
    "who goes on an adventure.")
response = client.models.generate_content(
    model=MODEL,
    config=model_config,
    contents=STORY_PROMPT
)

print(response.text)
print("\n\n")
print("\n\nSetting top-p ... \n\n")

# Create a zero-shot prompt
TEMPERATURE_2 = 0.1
TOP_P2 = 1
MAX_OUTPUT_TOKENS = 5
model_config = types.GenerateContentConfig(
    temperature=TEMPERATURE_2,
    top_p=TOP_P2,
    max_output_tokens=MAX_OUTPUT_TOKENS,
)

ZERO_SHOT_PROMPT = """Classify movie reviews as POSITIVE, NEUTRAL or NEGATIVE.
Review: "Her" is a disturbing study revealing the direction
humanity is headed if AI is allowed to keep evolving,
unchecked. I wish there were more movies like this masterpiece.
Sentiment: """

response = client.models.generate_content(
    model=MODEL,
    config=model_config,
    contents=ZERO_SHOT_PROMPT)

print(response.text)
print("\n\n")
print("\n\nSetting top-p ... \n\n")

# Setting enum mode
class Sentiment(enum.Enum):
    POSITIVE = "right on"
    NEUTRAL = "meh"
    NEGATIVE = "sucks lemons"

ENUM_TYPE = "text/x.enum"
response = client.models.generate_content(
    model=MODEL,
    config=types.GenerateContentConfig(
        response_mime_type=ENUM_TYPE,
        response_schema=Sentiment
    ),
    contents=ZERO_SHOT_PROMPT)

print(response.text)
print("\n\n")
print("\n\nSetting top-p ... \n\n")


# Get the Sentiment returned as a Python object
enum_response = response.parsed
print(enum_response)
print(type(enum_response))
print("\n\n")
print("\n\nSetting top-p ... \n\n")


# Send one-shot and few-shot prompts
FEW_SHOT_PROMPT = """Parse a customer's pizza order into valid JSON:

EXAMPLE:
I want a small pizza with cheese, tomato sauce, and pepperoni.
JSON Response:
```
{
"size": "small",
"type": "normal",
"ingredients": ["cheese", "tomato sauce", "pepperoni"]
}
```

EXAMPLE:
Can I get a large pizza with tomato sauce, basil and mozzarella
JSON Response:
```
{
"size": "large",
"type": "normal",
"ingredients": ["tomato sauce", "basil", "mozzarella"]
}
```

ORDER:
"""

CUSTOMER_ORDER = "Give me a large with cheese & pineapple"
MAX_OUTPUT_TOKENS = 250
response = client.models.generate_content(
    model='gemini-2.0-flash',
    config=types.GenerateContentConfig(
        temperature=TEMPERATURE_2,
        top_p=TOP_P2,
        max_output_tokens=MAX_OUTPUT_TOKENS,
    ),
    contents=[FEW_SHOT_PROMPT, CUSTOMER_ORDER])

print(response.text)
print("\n\n")
print("\n\nSet to JSON response only ... \n\n")


# Set output to JSON format
class PizzaOrder(typing.TypedDict):
    size: str
    ingredients: list[str]
    type: str
    
ORDER_1 = "Can I have a large dessert pizza with apple and chocolate"
response = client.models.generate_content(
    model=MODEL,
    config=types.GenerateContentConfig(
        temperature=TEMPERATURE_2,
        response_mime_type="application/json",
        response_schema=PizzaOrder
    ),
    contents=ORDER_1
)

print(response.text)
print("\n\n")
print("\n\nUse Chain of Thought (CoT) ... \n\n")


# Use Chain of Thought (CoT) to show reasoning to check hallucinations
CHAIN_OF_THOUGHT_PROMPT = """When I was 4 years old, 
my partner was 3 times my age. Now, I am 20 years old. 
How old is my partner? Return the answer directly."""

response = client.models.generate_content(
    model=MODEL,
    contents=CHAIN_OF_THOUGHT_PROMPT
)

print(response.text)
print("\n\n")
print("\n\nUse Chain of Thought (CoT) with step-by-step ... \n\n")


# Use Chain of Thought (CoT) to show reasoning to check hallucinations
CHAIN_OF_THOUGHT_PROMPT_2 = """When I was 4 years old, my partner was 3 times 
my age. Now, I am 20 years old. How old is my partner? 
Let's think step by step."""

response = client.models.generate_content(
    model=MODEL,
    contents=CHAIN_OF_THOUGHT_PROMPT_2
)

Markdown(response.text)
print("\n\n")
print("\n\nReAct prompt examples ... \n\n")


# Examples of ReAct prompts
model_instructions = """
Solve a question answering task with interleaving Thought, Action, Observation
steps. Thought can reason about the current situation,
Observation is understanding relevant information from an Action's output and
Action can be one of three types:
 (1) <search>entity</search>, which searches the exact entity on Wikipedia and
     returns the first paragraph if it exists. If not, it will return some
     similar entities to search and you can try to search the information from
     those topics.
 (2) <lookup>keyword</lookup>, which returns the next sentence containing
     keyword in the current context. This only does exact matches, so keep your
     searches short.
 (3) <finish>answer</finish>, which returns the answer and finishes the task.
"""

example1 = """Question
Musician and satirist Allie Goertz wrote a song about the "The Simpsons" ch\
aracter Milhouse, who Matt Groening named after who?

Thought 1
The question simplifies to "The Simpsons" character Milhouse is named after \
who. I only need to search Milhouse and find who it is named after.

Action 1
<search>Milhouse</search>

Observation 1
Milhouse Mussolini Van Houten is a recurring character in the Fox animated \
television series The Simpsons voiced by Pamela Hayden and 
created by Matt Groening.

Thought 2
The paragraph does not tell who Milhouse is named after, maybe I can look u\
p "named after".

Action 2
<lookup>named after</lookup>

Observation 2
Milhouse was named after U.S. president Richard Nixon, whose middle name wa\
s Milhous.

Thought 3
Milhouse was named after U.S. president Richard Nixon, so the answer is Ric\
hard Nixon.

Action 3
<finish>Richard Nixon</finish>
"""

example2 = """Question
What is the elevation range for the area that the eastern sector of the 
Colorado orogeny extends into?

Thought 1
I need to search Colorado orogeny, find the area that the eastern sector 
of the Colorado orogeny extends into, then find the elevation range 
of the area.

Action 1
<search>Colorado orogeny</search>

Observation 1
The Colorado orogeny was an episode of mountain building (an orogeny) in Colora
do and surrounding areas.

Thought 2
It does not mention the eastern sector. So I need to look up eastern sector.

Action 2
<lookup>eastern sector</lookup>

Observation 2
The eastern sector extends into the High Plains and is called the Central Plain
s orogeny.

Thought 3
The eastern sector of Colorado orogeny extends into the High Plains. So I need 
to search High Plains and find its elevation range.

Action 3
<search>High Plains</search>

Observation 3
High Plains refers to one of two distinct land regions

Thought 4
I need to instead search High Plains (United States).

Action 4
<search>High Plains (United States)</search>

Observation 4
The High Plains are a subregion of the Great Plains. From east to west, the 
High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130m).

Thought 5
High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1
,800 to 7,000 ft.

Action 5
<finish>1,800 to 7,000 ft</finish>
"""

question = """Question
Who was the youngest author listed on the transformers NLP paper?
"""

# You will perform the Action; so generate up to, but not including, 
# the Observation.
react_config = types.GenerateContentConfig(
    stop_sequences=["\nObservation"],
    system_instruction=model_instructions + example1 + example2,
)

# Create a chat that has the model instructions and examples pre-seeded.
react_chat = client.chats.create(
    model=MODEL,
    config=react_config,
)

response = react_chat.send_message(question)
print(response.text)
print("\n\n")
print("\n\nAdding an observation with author names ... \n\n")


# The model won't find the authors, so supply them with a followup
observation = """Observation 1
[1706.03762] Attention Is All You Need
Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan 
N. Gomez, Lukasz Kaiser, Illia Polosukhin
We propose a new simple network architecture, the Transformer, based solely on 
attention mechanisms, dispensing with recurrence and convolutions entirely.
"""
response = react_chat.send_message(observation)
print(response.text)
print("\n\n")
print("\n\nUse Gemini 2.0 Flash Thinking Mode ... \n\n")


# Examples of ReAct prompts
THINKING_MODE_MODEL = "gemini-2.0-flash-thinking-exp"
THINKING_MODE_PROMPT = ("Who was the youngest author listed on the "
    "transformers NLP paper?")
response = client.models.generate_content_stream(
    model=THINKING_MODE_MODEL,
    contents=THINKING_MODE_PROMPT
)

buffer = io.StringIO()
for chunk in response:
    buffer.write(str(chunk.text))
    # Display the response as it is streamed
    print(chunk.text, end='')
    
# Render the response as formatted markdown
clear_output()
Markdown(buffer.getvalue())
print("\n\n")
print("\n\nUse Gemini 2.0 to generate code ... \n\n")


# Generate code wth Gemini
# The Gemini models love to talk, so it helps to specify they stick to the code
# if that is all that you want.
CODE_PROMPT = """Write a Python function to calculate the factorial of a number. 
    No explanation, provide only the code."""

response = client.models.generate_content(
    model=MODEL,
    config=types.GenerateContentConfig(
        temperature=1,
        top_p=1,
        max_output_tokens=1024,
    ),
    contents=CODE_PROMPT)

Markdown(response.text)
print("\n\n")
print("\n\nUse Gemini 2.0 to execute code ... \n\n")


# Execute code wth Gemini
config = types.GenerateContentConfig(
    tools=[types.Tool(code_execution=types.ToolCodeExecution())],
)

code_exec_prompt = """
Generate the first 14 odd prime numbers, then calculate their sum.
"""

response = client.models.generate_content(
    model=MODEL,
    config=config,
    contents=code_exec_prompt)

for part in response.candidates[0].content.parts:
    pprint(part.to_json_dict())
    print("-----")
  
print("\n\n")
print("\n\nSee more parts of the response ... \n\n")


# See executable_code and code_execution_result
for part in response.candidates[0].content.parts:
    if part.text:
        display(Markdown(part.text))
    elif part.executable_code:
        display(Markdown(f'```python\n{part.executable_code.code}\n```'))
    elif part.code_execution_result:
        if part.code_execution_result.outcome != 'OUTCOME_OK':
            display(Markdown(f'## Status {part.code_execution_result.outcome}'))

        display(Markdown(f'```\n{part.code_execution_result.output}\n```'))

print("\n\n")
print("\n\nExplaining code ... \n\n")


# Get an explanation for code
# file_contents = !curl https://raw.githubusercontent.com/magicmonty/bash-git-prompt/refs/heads/master/gitprompt.sh

explain_prompt = f"""
Please explain what this file does at a very high level. What is it, 
and why would I use it?

```
{file_contents}
```
"""

response = client.models.generate_content(
    model='gemini-2.0-flash',
    contents=explain_prompt)

Markdown(response.text)
Google Gen AI 5-Day Intensive: Day One (1/5)

Recent Posts

Leave a Reply Cancel reply