Building Production-Ready AI Chatbots with Next.js and Google Gemini
A hands-on guide to building production-ready AI chatbots using Next.js 15, Google's Gemini API, streaming responses, conversation memory, and enterprise-grade error handling.
Building Production-Ready AI Chatbots with Next.js and Google Gemini
AI chatbots have become essential for modern web applications — from customer support to interactive documentation. In this guide, I'll walk you through building a production-grade chatbot using Next.js 15 and Google's Gemini API, based on my real-world experience building the chatbot on my own portfolio.
Why Google Gemini?
Google Gemini offers several advantages for chatbot development:
- Generous free tier: 15 RPM for Gemini 1.5 Flash, perfect for portfolio/small business sites
- Multimodal capabilities: Can process text, images, audio, and video
- Long context window: Up to 1M tokens for processing large documents
- Fast inference: Gemini Flash is optimized for speed
- Structured output: Native JSON mode for predictable responses
Project Setup
npx create-next-app@latest ai-chatbot --typescript --tailwind --app
cd ai-chatbot
npm install @google/generative-ai
Environment Configuration
# .env.local
GOOGLE_GENERATIVE_AI_API_KEY=your_api_key_here
Building the API Route
Create the chat API endpoint with streaming support:
// app/api/chat/route.ts
import { GoogleGenerativeAI } from "@google/generative-ai";
import { NextRequest } from "next/server";
const genAI = new GoogleGenerativeAI(
process.env.GOOGLE_GENERATIVE_AI_API_KEY!
);
const SYSTEM_PROMPT = `You are a helpful assistant for a web development agency.
You specialize in React, Next.js, and modern frontend technologies.
Keep responses concise, professional, and helpful.
If asked about pricing, direct users to the contact form.`;
export async function POST(req: NextRequest) {
try {
const { messages, context } = await req.json();
const model = genAI.getGenerativeModel({
model: "gemini-1.5-flash",
systemInstruction: SYSTEM_PROMPT,
});
// Build conversation history
const history = messages.slice(0, -1).map((msg: { role: string; content: string }) => ({
role: msg.role === "user" ? "user" : "model",
parts: [{ text: msg.content }],
}));
const chat = model.startChat({ history });
const lastMessage = messages[messages.length - 1].content;
const result = await chat.sendMessageStream(lastMessage);
// Create a streaming response
const encoder = new TextEncoder();
const stream = new ReadableStream({
async start(controller) {
try {
for await (const chunk of result.stream) {
const text = chunk.text();
controller.enqueue(encoder.encode(`data: ${JSON.stringify({ text })}\n\n`));
}
controller.enqueue(encoder.encode("data: [DONE]\n\n"));
controller.close();
} catch (error) {
controller.error(error);
}
},
});
return new Response(stream, {
headers: {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
Connection: "keep-alive",
},
});
} catch (error) {
console.error("Chat API error:", error);
return Response.json(
{ error: "Failed to generate response" },
{ status: 500 }
);
}
}
The Chat Component
Build a fully-featured chat UI with streaming support:
"use client";
import React, { useState, useRef, useCallback, useEffect } from "react";
interface Message {
id: string;
role: "user" | "assistant";
content: string;
timestamp: Date;
}
export function ChatBot() {
const [messages, setMessages] = useState<Message[]>([]);
const [input, setInput] = useState("");
const [isLoading, setIsLoading] = useState(false);
const [isOpen, setIsOpen] = useState(false);
const messagesEndRef = useRef<HTMLDivElement>(null);
const inputRef = useRef<HTMLInputElement>(null);
const scrollToBottom = useCallback(() => {
messagesEndRef.current?.scrollIntoView({ behavior: "smooth" });
}, []);
useEffect(() => {
scrollToBottom();
}, [messages, scrollToBottom]);
const sendMessage = async () => {
if (!input.trim() || isLoading) return;
const userMessage: Message = {
id: crypto.randomUUID(),
role: "user",
content: input.trim(),
timestamp: new Date(),
};
setMessages(prev => [...prev, userMessage]);
setInput("");
setIsLoading(true);
// Create placeholder for assistant message
const assistantId = crypto.randomUUID();
setMessages(prev => [
...prev,
{ id: assistantId, role: "assistant", content: "", timestamp: new Date() },
]);
try {
const response = await fetch("/api/chat", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
messages: [...messages, userMessage].map(m => ({
role: m.role,
content: m.content,
})),
}),
});
if (!response.ok) throw new Error("Failed to get response");
const reader = response.body?.getReader();
const decoder = new TextDecoder();
if (!reader) throw new Error("No response body");
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
const lines = chunk.split("\n").filter(line => line.startsWith("data: "));
for (const line of lines) {
const data = line.replace("data: ", "");
if (data === "[DONE]") continue;
try {
const parsed = JSON.parse(data);
setMessages(prev =>
prev.map(msg =>
msg.id === assistantId
? { ...msg, content: msg.content + parsed.text }
: msg
)
);
} catch {
// Skip malformed chunks
}
}
}
} catch (error) {
setMessages(prev =>
prev.map(msg =>
msg.id === assistantId
? { ...msg, content: "Sorry, I encountered an error. Please try again." }
: msg
)
);
} finally {
setIsLoading(false);
}
};
return (
<>
{/* Chat Toggle Button */}
<button
onClick={() => setIsOpen(!isOpen)}
className="fixed bottom-6 right-6 w-14 h-14 bg-blue-600 rounded-full
shadow-lg flex items-center justify-center z-50
hover:bg-blue-700 transition-colors"
aria-label={isOpen ? "Close chat" : "Open chat"}
>
{isOpen ? "✕" : "💬"}
</button>
{/* Chat Window */}
{isOpen && (
<div className="fixed bottom-24 right-6 w-96 h-[500px] bg-gray-900
rounded-2xl shadow-2xl flex flex-col z-40 border border-gray-800">
{/* Header */}
<div className="p-4 border-b border-gray-800">
<h3 className="text-white font-semibold">AI Assistant</h3>
<p className="text-gray-400 text-sm">Ask me anything about web development</p>
</div>
{/* Messages */}
<div className="flex-1 overflow-y-auto p-4 space-y-4">
{messages.map(message => (
<div
key={message.id}
className={`flex ${message.role === "user" ? "justify-end" : "justify-start"}`}
>
<div
className={`max-w-[80%] rounded-xl px-4 py-2 text-sm ${
message.role === "user"
? "bg-blue-600 text-white"
: "bg-gray-800 text-gray-200"
}`}
>
{message.content || "●●●"}
</div>
</div>
))}
<div ref={messagesEndRef} />
</div>
{/* Input */}
<div className="p-4 border-t border-gray-800">
<form
onSubmit={(e) => {
e.preventDefault();
sendMessage();
}}
className="flex gap-2"
>
<input
ref={inputRef}
value={input}
onChange={e => setInput(e.target.value)}
placeholder="Type a message..."
className="flex-1 bg-gray-800 text-white rounded-lg px-4 py-2 text-sm
focus:outline-none focus:ring-2 focus:ring-blue-500"
disabled={isLoading}
/>
<button
type="submit"
disabled={isLoading || !input.trim()}
className="bg-blue-600 text-white px-4 py-2 rounded-lg text-sm
disabled:opacity-50 hover:bg-blue-700 transition-colors"
>
Send
</button>
</form>
</div>
</div>
)}
</>
);
}
Rate Limiting and Security
API Rate Limiting
// lib/rate-limit.ts
const rateLimit = new Map<string, { count: number; resetTime: number }>();
export function checkRateLimit(
ip: string,
maxRequests: number = 10,
windowMs: number = 60000
): boolean {
const now = Date.now();
const record = rateLimit.get(ip);
if (!record || now > record.resetTime) {
rateLimit.set(ip, { count: 1, resetTime: now + windowMs });
return true;
}
if (record.count >= maxRequests) {
return false;
}
record.count++;
return true;
}
Input Sanitization
export function sanitizeInput(input: string): string {
return input
.trim()
.slice(0, 1000) // Limit message length
.replace(/[<>]/g, ""); // Remove potential HTML
}
Conversation Memory
For longer conversations, implement a sliding window approach:
function trimConversation(messages: Message[], maxTokens: number = 4000): Message[] {
// Estimate tokens (rough approximation: 1 token ≈ 4 characters)
let totalChars = 0;
const trimmed: Message[] = [];
// Always keep the system message and latest messages
for (let i = messages.length - 1; i >= 0; i--) {
const msgChars = messages[i].content.length;
if (totalChars + msgChars > maxTokens * 4) break;
totalChars += msgChars;
trimmed.unshift(messages[i]);
}
return trimmed;
}
Predefined Questions
Add quick-action buttons for common queries:
const predefinedQuestions = [
"What services do you offer?",
"Tell me about your experience",
"How can I start a project?",
"What technologies do you use?",
];
{messages.length === 0 && (
<div className="grid grid-cols-2 gap-2 p-4">
{predefinedQuestions.map((question) => (
<button
key={question}
onClick={() => {
setInput(question);
sendMessage();
}}
className="text-left text-xs bg-gray-800 hover:bg-gray-700
text-gray-300 p-3 rounded-lg transition-colors"
>
{question}
</button>
))}
</div>
)}
Testing Your Chatbot
// __tests__/chat.test.ts
import { POST } from "@/app/api/chat/route";
import { NextRequest } from "next/server";
describe("Chat API", () => {
it("should return a streaming response", async () => {
const req = new NextRequest("http://localhost/api/chat", {
method: "POST",
body: JSON.stringify({
messages: [{ role: "user", content: "Hello" }],
}),
});
const response = await POST(req);
expect(response.status).toBe(200);
expect(response.headers.get("Content-Type")).toBe("text/event-stream");
});
it("should handle empty messages", async () => {
const req = new NextRequest("http://localhost/api/chat", {
method: "POST",
body: JSON.stringify({ messages: [] }),
});
const response = await POST(req);
expect(response.status).toBe(400);
});
});
Deployment Considerations
- Edge Runtime: Use Edge runtime for lower latency if your provider supports it
- Monitoring: Track response times, error rates, and user satisfaction
- Fallback responses: Always have a graceful degradation strategy
- Cost management: Set spending limits on your AI provider dashboard
- Content moderation: Implement content filtering for inappropriate inputs
Conclusion
Building a production-ready AI chatbot with Next.js and Google Gemini is surprisingly accessible. The key ingredients are proper streaming implementation, robust error handling, rate limiting, and a polished UI. Start with a simple implementation and iterate based on user feedback. The chatbot on my own portfolio uses exactly these patterns, and it's been a genuine value-add for visitor engagement. Remember, the best chatbot isn't the smartest one — it's the one that reliably helps users find what they need.