Google Gen AI 5-Day Intensive: Day One (2/2)

Codelab 2/2 from day one. The code lab is here in Kaggle.

"""
Prompting

Google Gen AI 5-Day Intensive Course
Host: Kaggle

Day: 1

Codelab: https://www.kaggle.com/code/reniseblack/day-1-evaluation-and-structured-output/edit
"""

import enum
import io
import os
from pprint import pprint

import typing_extensions as typing
from google import genai
from google.api_core import retry
from google.genai import types
from IPython.display import Markdown, clear_output, display

is_retriable = lambda e: (  # noqa: E731
    isinstance(e, genai.errors.APIError) and e.code in {429, 503})

genai.models.Models.generate_content = retry.Retry(
    predicate=is_retriable)(genai.models.Models.generate_content)

client = genai.Client(api_key=os.environ["GOOGLE_API_KEY"])

# Create and send a single-turn prompt
MODEL = "gemini-2.0-flash"
prompt = "Explain AI to me like I'm a kid."
response = client.models.generate_content(
    model=MODEL,
    contents=prompt
)

# Get the response text then render as HTML (optional)
response_text = response.text
print("Model response: \n\n", Markdown(response.text))
section_break = "----------------------------"
print(section_break, "\n\nStarting a multi-turn chat ...\n\n")


# Start a multi-turn chat
chat_message = "Hello! My name is Zlork."
chat = client.chats.create(model=MODEL, history=[])
response = chat.send_message(chat_message)

print(response.text)

chat_message2 = "Can you tell me something interesting about dinosaurs?"
response = chat.send_message(chat_message2)

print(response.text)

chat_message3 = "Do you remember my name?"
response = chat.send_message(chat_message3)

print(response.text)


# See the list of available models
print("\n\nGetting a list of available models ... \n\n")

for model in client.models.list():
    print(model.name)

print("\n\nGetting a list of gemini-2.0-flash models ... \n\n")

for model in client.models.list():
    if model.name == 'models/gemini-2.0-flash':
        pprint(model.to_json_dict())
        break
        

# Set token limit to restrict output length
print("\n\nSetting a token limit ... \n\n")
short_config = types.GenerateContentConfig(max_output_tokens=200)
PROMPT_2 = ('Write a 1000 word essay on the importance of olives in modern '
            'society.')
response = client.models.generate_content(
    model=MODEL,
    config=short_config,
    contents=PROMPT_2
)

print(response.text)
print("\n\n")

PROMPT_3 = "Write a short poem on the importance of olives in modern society."
# config is where you specify top_k, temperature, etc
response = client.models.generate_content(
    model=MODEL,
    config=short_config,
    contents=PROMPT_3
)

print(response.text)
print("\n\n")

short_config = types.GenerateContentConfig(max_output_tokens=100)
response = client.models.generate_content(
    model=MODEL,
    config=short_config,
    contents='Write a 150 opinion about the importance of pine trees.')

print(response.text)
print("\n\n")
print("\n\nSetting a temperature limit ... \n\n")

# Set a temperature that helps manage the randomness
high_temp_config = types.GenerateContentConfig(temperature=2.0)
PROMPT_4 = "Pick a random colour... (respond in a single word)"
for _ in range (5):
    response = client.models.generate_content(
        model=MODEL,
        config=high_temp_config,
        contents = PROMPT_4
    )

    if response.text:
        print(response.text, "-" * 25)
    
print("\n\n")
print("\n\nSetting a low temperature ... \n\n")

# Setting a low temperature
low_temp_config = types.GenerateContentConfig(temperature=0.0)
for _ in range(5):
    response = client.models.generate_content(
        model=MODEL,
        config=low_temp_config,
        contents = PROMPT_4
    )
    
    if response.text:
        print(response.text, "-" * 25)
    
print("\n\n")
print("\n\nSetting top-p ... \n\n")

# Setting top-p that also controls output diversity
# It will determine when to stop selecting tokens that are least probable
TEMPERATURE_2 = 1.0
TOP_P1 = 0.05
model_config = types.GenerateContentConfig(
    # These are default gemini-2.0-flash values
    temperature=TEMPERATURE_2,
    top_p=TOP_P1
)

STORY_PROMPT = ("You are a creative writer. Write a short story about a cat"\
    "who goes on an adventure.")
response = client.models.generate_content(
    model=MODEL,
    config=model_config,
    contents=STORY_PROMPT
)

print(response.text)
print("\n\n")
print("\n\nSetting top-p ... \n\n")

# Create a zero-shot prompt
TEMPERATURE_2 = 0.1
TOP_P2 = 1
MAX_OUTPUT_TOKENS = 5
model_config = types.GenerateContentConfig(
    temperature=TEMPERATURE_2,
    top_p=TOP_P2,
    max_output_tokens=MAX_OUTPUT_TOKENS,
)

ZERO_SHOT_PROMPT = """Classify movie reviews as POSITIVE, NEUTRAL or NEGATIVE.
Review: "Her" is a disturbing study revealing the direction
humanity is headed if AI is allowed to keep evolving,
unchecked. I wish there were more movies like this masterpiece.
Sentiment: """

response = client.models.generate_content(
    model=MODEL,
    config=model_config,
    contents=ZERO_SHOT_PROMPT)

print(response.text)
print("\n\n")
print("\n\nSetting top-p ... \n\n")

# Setting enum mode
class Sentiment(enum.Enum):
    POSITIVE = "right on"
    NEUTRAL = "meh"
    NEGATIVE = "sucks lemons"

ENUM_TYPE = "text/x.enum"
response = client.models.generate_content(
    model=MODEL,
    config=types.GenerateContentConfig(
        response_mime_type=ENUM_TYPE,
        response_schema=Sentiment
    ),
    contents=ZERO_SHOT_PROMPT)

print(response.text)
print("\n\n")
print("\n\nSetting top-p ... \n\n")


# Get the Sentiment returned as a Python object
enum_response = response.parsed
print(enum_response)
print(type(enum_response))
print("\n\n")
print("\n\nSetting top-p ... \n\n")


# Send one-shot and few-shot prompts
FEW_SHOT_PROMPT = """Parse a customer's pizza order into valid JSON:

EXAMPLE:
I want a small pizza with cheese, tomato sauce, and pepperoni.
JSON Response:
```
{
"size": "small",
"type": "normal",
"ingredients": ["cheese", "tomato sauce", "pepperoni"]
}
```

EXAMPLE:
Can I get a large pizza with tomato sauce, basil and mozzarella
JSON Response:
```
{
"size": "large",
"type": "normal",
"ingredients": ["tomato sauce", "basil", "mozzarella"]
}
```

ORDER:
"""

CUSTOMER_ORDER = "Give me a large with cheese & pineapple"
MAX_OUTPUT_TOKENS = 250
response = client.models.generate_content(
    model='gemini-2.0-flash',
    config=types.GenerateContentConfig(
        temperature=TEMPERATURE_2,
        top_p=TOP_P2,
        max_output_tokens=MAX_OUTPUT_TOKENS,
    ),
    contents=[FEW_SHOT_PROMPT, CUSTOMER_ORDER])

print(response.text)
print("\n\n")
print("\n\nSet to JSON response only ... \n\n")


# Set output to JSON format
class PizzaOrder(typing.TypedDict):
    size: str
    ingredients: list[str]
    type: str
    
ORDER_1 = "Can I have a large dessert pizza with apple and chocolate"
response = client.models.generate_content(
    model=MODEL,
    config=types.GenerateContentConfig(
        temperature=TEMPERATURE_2,
        response_mime_type="application/json",
        response_schema=PizzaOrder
    ),
    contents=ORDER_1
)

print(response.text)
print("\n\n")
print("\n\nUse Chain of Thought (CoT) ... \n\n")


# Use Chain of Thought (CoT) to show reasoning to check hallucinations
CHAIN_OF_THOUGHT_PROMPT = """When I was 4 years old, 
my partner was 3 times my age. Now, I am 20 years old. 
How old is my partner? Return the answer directly."""

response = client.models.generate_content(
    model=MODEL,
    contents=CHAIN_OF_THOUGHT_PROMPT
)

print(response.text)
print("\n\n")
print("\n\nUse Chain of Thought (CoT) with step-by-step ... \n\n")


# Use Chain of Thought (CoT) to show reasoning to check hallucinations
CHAIN_OF_THOUGHT_PROMPT_2 = """When I was 4 years old, my partner was 3 times 
my age. Now, I am 20 years old. How old is my partner? 
Let's think step by step."""

response = client.models.generate_content(
    model=MODEL,
    contents=CHAIN_OF_THOUGHT_PROMPT_2
)

Markdown(response.text)
print("\n\n")
print("\n\nReAct prompt examples ... \n\n")


# Examples of ReAct prompts
model_instructions = """
Solve a question answering task with interleaving Thought, Action, Observation
steps. Thought can reason about the current situation,
Observation is understanding relevant information from an Action's output and
Action can be one of three types:
 (1) <search>entity</search>, which searches the exact entity on Wikipedia and
     returns the first paragraph if it exists. If not, it will return some
     similar entities to search and you can try to search the information from
     those topics.
 (2) <lookup>keyword</lookup>, which returns the next sentence containing
     keyword in the current context. This only does exact matches, so keep your
     searches short.
 (3) <finish>answer</finish>, which returns the answer and finishes the task.
"""

example1 = """Question
Musician and satirist Allie Goertz wrote a song about the "The Simpsons" ch\
aracter Milhouse, who Matt Groening named after who?

Thought 1
The question simplifies to "The Simpsons" character Milhouse is named after \
who. I only need to search Milhouse and find who it is named after.

Action 1
<search>Milhouse</search>

Observation 1
Milhouse Mussolini Van Houten is a recurring character in the Fox animated \
television series The Simpsons voiced by Pamela Hayden and 
created by Matt Groening.

Thought 2
The paragraph does not tell who Milhouse is named after, maybe I can look u\
p "named after".

Action 2
<lookup>named after</lookup>

Observation 2
Milhouse was named after U.S. president Richard Nixon, whose middle name wa\
s Milhous.

Thought 3
Milhouse was named after U.S. president Richard Nixon, so the answer is Ric\
hard Nixon.

Action 3
<finish>Richard Nixon</finish>
"""

example2 = """Question
What is the elevation range for the area that the eastern sector of the 
Colorado orogeny extends into?

Thought 1
I need to search Colorado orogeny, find the area that the eastern sector 
of the Colorado orogeny extends into, then find the elevation range 
of the area.

Action 1
<search>Colorado orogeny</search>

Observation 1
The Colorado orogeny was an episode of mountain building (an orogeny) in Colora
do and surrounding areas.

Thought 2
It does not mention the eastern sector. So I need to look up eastern sector.

Action 2
<lookup>eastern sector</lookup>

Observation 2
The eastern sector extends into the High Plains and is called the Central Plain
s orogeny.

Thought 3
The eastern sector of Colorado orogeny extends into the High Plains. So I need 
to search High Plains and find its elevation range.

Action 3
<search>High Plains</search>

Observation 3
High Plains refers to one of two distinct land regions

Thought 4
I need to instead search High Plains (United States).

Action 4
<search>High Plains (United States)</search>

Observation 4
The High Plains are a subregion of the Great Plains. From east to west, the 
High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130m).

Thought 5
High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1
,800 to 7,000 ft.

Action 5
<finish>1,800 to 7,000 ft</finish>
"""

question = """Question
Who was the youngest author listed on the transformers NLP paper?
"""

# You will perform the Action; so generate up to, but not including, 
# the Observation.
react_config = types.GenerateContentConfig(
    stop_sequences=["\nObservation"],
    system_instruction=model_instructions + example1 + example2,
)

# Create a chat that has the model instructions and examples pre-seeded.
react_chat = client.chats.create(
    model=MODEL,
    config=react_config,
)

response = react_chat.send_message(question)
print(response.text)
print("\n\n")
print("\n\nAdding an observation with author names ... \n\n")


# The model won't find the authors, so supply them with a followup
observation = """Observation 1
[1706.03762] Attention Is All You Need
Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan 
N. Gomez, Lukasz Kaiser, Illia Polosukhin
We propose a new simple network architecture, the Transformer, based solely on 
attention mechanisms, dispensing with recurrence and convolutions entirely.
"""
response = react_chat.send_message(observation)
print(response.text)
print("\n\n")
print("\n\nUse Gemini 2.0 Flash Thinking Mode ... \n\n")


# Examples of ReAct prompts
THINKING_MODE_MODEL = "gemini-2.0-flash-thinking-exp"
THINKING_MODE_PROMPT = ("Who was the youngest author listed on the "
    "transformers NLP paper?")
response = client.models.generate_content_stream(
    model=THINKING_MODE_MODEL,
    contents=THINKING_MODE_PROMPT
)

buffer = io.StringIO()
for chunk in response:
    buffer.write(str(chunk.text))
    # Display the response as it is streamed
    print(chunk.text, end='')
    
# Render the response as formatted markdown
clear_output()
Markdown(buffer.getvalue())
print("\n\n")
print("\n\nUse Gemini 2.0 to generate code ... \n\n")


# Generate code wth Gemini
# The Gemini models love to talk, so it helps to specify they stick to the code
# if that is all that you want.
CODE_PROMPT = """Write a Python function to calculate the factorial of a number. 
    No explanation, provide only the code."""

response = client.models.generate_content(
    model=MODEL,
    config=types.GenerateContentConfig(
        temperature=1,
        top_p=1,
        max_output_tokens=1024,
    ),
    contents=CODE_PROMPT)

Markdown(response.text)
print("\n\n")
print("\n\nUse Gemini 2.0 to execute code ... \n\n")


# Execute code wth Gemini
config = types.GenerateContentConfig(
    tools=[types.Tool(code_execution=types.ToolCodeExecution())],
)

code_exec_prompt = """
Generate the first 14 odd prime numbers, then calculate their sum.
"""

response = client.models.generate_content(
    model=MODEL,
    config=config,
    contents=code_exec_prompt)

for part in response.candidates[0].content.parts:
    pprint(part.to_json_dict())
    print("-----")
  
print("\n\n")
print("\n\nSee more parts of the response ... \n\n")


# See executable_code and code_execution_result
for part in response.candidates[0].content.parts:
    if part.text:
        display(Markdown(part.text))
    elif part.executable_code:
        display(Markdown(f'```python\n{part.executable_code.code}\n```'))
    elif part.code_execution_result:
        if part.code_execution_result.outcome != 'OUTCOME_OK':
            display(Markdown(f'## Status {part.code_execution_result.outcome}'))

        display(Markdown(f'```\n{part.code_execution_result.output}\n```'))

print("\n\n")
print("\n\nExplaining code ... \n\n")


# Get an explanation for code
# file_contents = !curl https://raw.githubusercontent.com/magicmonty/bash-git-prompt/refs/heads/master/gitprompt.sh

explain_prompt = f"""
Please explain what this file does at a very high level. What is it, 
and why would I use it?

```
{file_contents}
```
"""

response = client.models.generate_content(
    model='gemini-2.0-flash',
    contents=explain_prompt)

Markdown(response.text)

Google Gen AI 5-Day Intensive: Day One (1/2)

Codelab 1/2 from day one. The code lab is here in Kaggle.

"""
Evaluation and Structured Output

Google Gen AI 5-Day Intensive Course
Host: Kaggle

Day: 1

Kaggle: https://www.kaggle.com/code/markishere/day-1-evaluation-and-structured-output
"""

import enum
import os

from google import genai
from google.api_core import retry
from google.genai import types
from IPython.display import Markdown, display

client = genai.Client(api_key=os.environ["GOOGLE_API_KEY"])

# Automated retry
is_retriable = lambda e: (
    isinstance(e, genai.errors.APIError) and e.code in {429, 503}
)

genai.models.Models.generate_content = retry.Retry(predicate=is_retriable)(
    genai.models.Models.generate_content
)

# if not hasattr(genai.models.Models.generate_content, '__wrapped__'):
#     genai.models.Models.generate_content = retry.Retry(
#         predicate=is_retriable)(genai.models.Models.generate_content)

# Evaluation
# Understand model performance
# Get the file locally first
# !wget -nv -O gemini.pdf https://storage.googleapis.com/cloud-samples-data/generative-ai/pdf/2403.05530.pdf
document_file = client.files.upload(file="/Users/renise/Documents/Python/gen_ai/day_one/gemini.pdf")
print("\n")
print(document_file)
print("\n")

print("\nSummarize a document\n")


# Summarize a document
def summarize_doc(request: str) -> str:
    """Execute the request on the uploaded document."""
    # Set the temperature low to stabilize the output.
    config = types.GenerateContentConfig(temperature=0.0)
    response = client.models.generate_content(
        model="gemini-2.0-flash",
        config=config,
        contents=[request, document_file],
    )
    return response.text


request = "Tell me about the training process used here."
summary = summarize_doc(request)
# display(Markdown(summary + "\n-----"))
print("\n\n")


# Define an evaluator
SUMMARY_PROMPT = """\
# Instruction
You are an expert evaluator. Your task is to evaluate the quality of the responses generated by AI models.
We will provide you with the user input and an AI-generated responses.
You should first read the user input carefully for analyzing the task, and then evaluate the quality of the responses based on the Criteria provided in the Evaluation section below.
You will assign the response a rating following the Rating Rubric and Evaluation Steps. Give step-by-step explanations for your rating, and only choose ratings from the Rating Rubric.

# Evaluation
## Metric Definition
You will be assessing summarization quality, which measures the overall ability to summarize text. Pay special attention to length constraints, such as in X words or in Y sentences. The instruction for performing a summarization task and the context to be summarized are provided in the user prompt. The response should be shorter than the text in the context. The response should not contain information that is not present in the context.

## Criteria
Instruction following: The response demonstrates a clear understanding of the summarization task instructions, satisfying all of the instruction's requirements.
Groundedness: The response contains information included only in the context. The response does not reference any outside information.
Conciseness: The response summarizes the relevant details in the original text without a significant loss in key information without being too verbose or terse.
Fluency: The response is well-organized and easy to read.

## Rating Rubric
5: (Very good). The summary follows instructions, is grounded, is concise, and fluent.
4: (Good). The summary follows instructions, is grounded, concise, and fluent.
3: (Ok). The summary mostly follows instructions, is grounded, but is not very concise and is not fluent.
2: (Bad). The summary is grounded, but does not follow the instructions.
1: (Very bad). The summary is not grounded.

## Evaluation Steps
STEP 1: Assess the response in aspects of instruction following, groundedness, conciseness, and verbosity according to the criteria.
STEP 2: Score based on the rubric.

# User Inputs and AI-generated Response
## User Inputs

### Prompt
{prompt}

## AI-generated Response
{response}
"""


# Define a structured enum class to capture the result.
class SummaryRating(enum.Enum):
    VERY_GOOD = 5
    GOOD = 4
    OK = 3
    BAD = 2
    VERY_BAD = 1


def eval_summary(prompt, ai_response):
    """Evaluate the generated summary against the prompt."""

    chat = client.chats.create(model="gemini-2.0-flash")

    # Generate the full text response
    response = chat.send_message(
        message=SUMMARY_PROMPT.format(prompt=prompt, response=ai_response)
    )
    verbose_eval = response.text

    # Coerce into desired structure
    structured_output_config = types.GenerateContentConfig(
        response_mime_type="text/x.enum", 
        response_schema=SummaryRating
    )
    response = chat.send_message(
        message="Convert the final score.", 
        config=structured_output_config
    )
    structured_eval = response.parsed

    return verbose_eval, structured_eval


text_eval, struct_eval = eval_summary(
    prompt=[request, document_file], 
    ai_response=summary
)
Markdown(text_eval)

# Play with the summary prompt
new_prompt = "Explain like I'm 5 the training process"

# Try:
#  ELI5 the training process
#  Summarise the needle/haystack evaluation technique in 1 line
#  Describe the model architecture to someone with a civil engineering degree
#  What is the best LLM?
if not new_prompt:
    raise ValueError("Try setting a new summarization prompt.")


def run_and_eval_summary(prompt):
    """Generate and evaluate the summary using the new prompt."""
    summary = summarize_doc(new_prompt)
    display(Markdown(summary + "\n-----"))

    text, struct = eval_summary([new_prompt, document_file], summary)
    display(Markdown(text + "\n-----"))
    print(struct)


run_and_eval_summary(new_prompt)

Google Gen AI 5-Day Intensive Course Overview

In late March 2025, Google held a generative AI 5-day intensive for developers. It was hosted by Kaggle and included daily codelabs for the first four days. The course included daily live video sessions on YouTube with insights from current Google employees.

Source: Google’s The Keyword Blog

The course featured:

  • YouTube videos: Each day a one hour live session hosted by Google that featured a Q&A codelab reviews.
  • Discord server: The Kaggle server gave attendees the opportunity to ask questions, share ideas and get support.
  • Kaggle notebooks: Codelabs were hosted in Kaggle notebooks and the day one notebook is here as an example.

The course is targeted towards developers who already know how to write code. If you’re new to coding, the code will be more challenging because the codelabs require you to read the provided code.

It taught us how to use Vertex AI and Gemini APIs to implement generative AI for a range of use cases. We learned about agents, functions, creating custom models, how to fine tune a model, to name a few. The course is structured to provide coding examples that developers can use to develop there own generative AI solutions.

To take a future offering of the course, check this page for when another session will be offered. The first sesson was offered in November 2024 and the second was held in April 2025.