rim() || isStreaming) return;
// Reset error state
setError(null);
// Create user turn
const userTurn: ChatTurn = {
id: crypto.randomUUID(),
role: 'user',
content: userContent.trim(),
timestamp: Date.now(),
};
setTurns(prev => [...prev, userTurn]);
// Prepare assistant turn placeholder
const assistantTurnId = crypto.randomUUID();
const assistantTurn: ChatTurn = {
id: assistantTurnId,
role: 'assistant',
content: '',
timestamp: Date.now(),
};
setTurns(prev => [...prev, assistantTurn]);
setIsStreaming(true);
streamBufferRef.current = '';
// Cancel any existing request
if (abortControllerRef.current) {
abortControllerRef.current.abort();
}
abortControllerRef.current = new AbortController();
try {
const response = await fetch(config.endpoint, {
method: 'POST',
headers: {
'Authorization': `Bearer ${config.apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: config.model,
messages: [...turns, userTurn].map(t => ({
role: t.role,
content: t.content,
})),
stream: true,
max_tokens: config.maxTokens || 1024,
}),
signal: abortControllerRef.current.signal,
});
if (!response.ok) {
throw new Error(`Request failed with status ${response.status}`);
}
const reader = response.body?.getReader();
const decoder = new TextDecoder();
if (!reader) {
throw new Error('ReadableStream not supported');
}
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value, { stream: true });
const lines = chunk.split('\n');
for (const line of lines) {
if (line.startsWith('data: ')) {
const payload = line.slice(6);
if (payload === '[DONE]') continue;
try {
const json = JSON.parse(payload);
const deltaContent = json.choices?.[0]?.delta?.content;
if (deltaContent) {
streamBufferRef.current += deltaContent;
// Update state with accumulated content
setTurns(prev =>
prev.map(t =>
t.id === assistantTurnId
? { ...t, content: streamBufferRef.current }
: t
)
);
}
} catch (parseErr) {
// Ignore malformed JSON chunks common in streaming
console.warn('Stream parse warning:', parseErr);
}
}
}
}
} catch (err) {
if (err instanceof Error && err.name === 'AbortError') {
console.log('Stream aborted by user');
} else {
const message = err instanceof Error ? err.message : 'Unknown stream error';
setError(message);
// Remove failed assistant turn
setTurns(prev => prev.filter(t => t.id !== assistantTurnId));
}
} finally {
setIsStreaming(false);
abortControllerRef.current = null;
}
}, [turns, isStreaming, config]);
const cancelStream = useCallback(() => {
if (abortControllerRef.current) {
abortControllerRef.current.abort();
}
}, []);
return { turns, isStreaming, error, submitQuery, cancelStream };
}
#### 3. Implementation: The Chat Interface
The component focuses on rendering, auto-scrolling, and markdown handling.
```typescript
import React, { useRef, useEffect } from 'react';
import ReactMarkdown from 'react-markdown';
import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter';
import { vscDarkPlus } from 'react-syntax-highlighter/dist/esm/styles/prism';
import { useConversationStream, ChatTurn } from './useConversationStream';
interface ChatDeckProps {
config: {
apiKey: string;
endpoint: string;
model: string;
};
}
export function ChatDeck({ config }: ChatDeckProps) {
const { turns, isStreaming, error, submitQuery, cancelStream } = useConversationStream(config);
const [inputValue, setInputValue] = useState('');
const messagesEndRef = useRef<HTMLDivElement>(null);
const chatContainerRef = useRef<HTMLDivElement>(null);
// Auto-scroll logic: only scroll if user is near bottom
const shouldAutoScroll = useRef(true);
useEffect(() => {
const container = chatContainerRef.current;
if (!container) return;
const handleScroll = () => {
const { scrollTop, scrollHeight, clientHeight } = container;
// If user is within 50px of bottom, enable auto-scroll
shouldAutoScroll.current = scrollHeight - scrollTop - clientHeight < 50;
};
container.addEventListener('scroll', handleScroll);
return () => container.removeEventListener('scroll', handleScroll);
}, []);
useEffect(() => {
if (shouldAutoScroll.current && messagesEndRef.current) {
messagesEndRef.current.scrollIntoView({ behavior: 'smooth' });
}
}, [turns, isStreaming]);
const handleSend = () => {
if (inputValue.trim()) {
submitQuery(inputValue);
setInputValue('');
}
};
return (
<div className="chat-container">
<div ref={chatContainerRef} className="messages-area">
{turns.map(turn => (
<MessageBubble key={turn.id} turn={turn} />
))}
{isStreaming && (
<div className="typing-indicator">
<span>Model is thinking...</span>
</div>
)}
<div ref={messagesEndRef} />
</div>
{error && (
<div className="error-banner">
{error}
<button onClick={() => window.location.reload()}>Retry</button>
</div>
)}
<div className="input-area">
<textarea
value={inputValue}
onChange={e => setInputValue(e.target.value)}
onKeyDown={e => {
if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault();
handleSend();
}
}}
placeholder="Type your message..."
disabled={isStreaming}
/>
{isStreaming ? (
<button onClick={cancelStream} className="stop-btn">Stop</button>
) : (
<button onClick={handleSend} disabled={!inputValue.trim()}>Send</button>
)}
</div>
</div>
);
}
function MessageBubble({ turn }: { turn: ChatTurn }) {
return (
<div className={`message ${turn.role}`}>
<div className="role-label">
{turn.role === 'user' ? 'You' : 'Assistant'}
</div>
<div className="content">
<ReactMarkdown
components={{
code({ className, children, ...props }) {
const match = /language-(\w+)/.exec(className || '');
return match ? (
<SyntaxHighlighter
style={vscDarkPlus}
language={match[1]}
PreTag="div"
{...props}
>
{String(children).replace(/\n$/, '')}
</SyntaxHighlighter>
) : (
<code className={className} {...props}>
{children}
</code>
);
},
}}
>
{turn.content}
</ReactMarkdown>
{turn.content === '' && isStreaming && <span className="cursor-blink">▋</span>}
</div>
</div>
);
}
Pitfall Guide
-
Stream Abandonment and Memory Leaks
- Explanation: If a user navigates away or sends a new message while a stream is active, the previous fetch request continues consuming bandwidth and CPU.
- Fix: Always use
AbortController. Cancel the previous request before starting a new one, and abort on component unmount.
-
Excessive Re-renders During Streaming
- Explanation: Updating React state on every token can cause the UI to re-render hundreds of times per second, leading to jank and high CPU usage.
- Fix: Accumulate content in a
useRef and update state only when necessary. Alternatively, use a custom hook that batches updates or updates state every N tokens.
-
Auto-Scroll Jitter
- Explanation: Forcing scroll-to-bottom on every update interrupts the user if they are scrolling up to read previous messages.
- Fix: Track scroll position. Only auto-scroll if the user is already near the bottom of the container.
-
Markdown Security Vulnerabilities
- Explanation: LLMs can generate malicious HTML or JavaScript within markdown blocks.
- Fix: Use
rehype-sanitize to strip dangerous attributes and tags. Restrict react-markdown plugins to safe extensions like remark-gfm.
-
Context Window Overflow
- Explanation: Sending the entire conversation history without limits can exceed the model's context window, causing API errors or degraded quality.
- Fix: Implement a sliding window strategy. Trim older messages when the token count approaches the model's limit.
-
Ignoring Partial Stream Errors
- Explanation: Network drops can result in partial responses. The UI might display incomplete text without indicating an error.
- Fix: Catch errors in the stream loop. If the stream ends unexpectedly, mark the turn with an error state or remove the incomplete message.
-
Blocking the Main Thread with Markdown Parsing
- Explanation: Parsing large markdown blocks synchronously can freeze the UI.
- Fix: For very long responses, consider virtualizing the message list or using web workers for markdown parsing if performance degrades.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Real-time UX Critical | Chunked Streaming | Lowest perceived latency; keeps users engaged. | Higher bandwidth usage; complex state management. |
| Simple Bot / Low Traffic | Batch Request | Simpler implementation; easier error handling. | Higher perceived latency; potential user drop-off. |
| Code-Heavy Responses | Streaming + Syntax Highlighting | Essential for readability of code blocks. | Requires react-syntax-highlighter; slightly heavier bundle. |
| Mobile / Low Bandwidth | Batch with Loading Skeleton | Reduces connection overhead; predictable UX. | May feel slower on high-latency networks. |
Configuration Template
Use this template to configure the streaming hook for different environments.
// chatConfig.ts
export const CHAT_CONFIG = {
production: {
apiKey: process.env.REACT_APP_LLM_API_KEY,
endpoint: 'https://api.provider.com/v1/chat/completions',
model: 'claude-3-5-sonnet-20241022',
maxTokens: 2048,
},
development: {
apiKey: 'dev-key-placeholder',
endpoint: 'https://api.provider.com/v1/chat/completions',
model: 'claude-3-5-sonnet-20241022',
maxTokens: 512,
},
};
Quick Start Guide
- Install Dependencies:
npm install react-markdown react-syntax-highlighter
- Create the Hook: Copy the
useConversationStream implementation into hooks/useConversationStream.ts.
- Create the Component: Copy the
ChatDeck implementation into components/ChatDeck.tsx.
- Wire Up: Import
ChatDeck in your app and pass the configuration object.
<ChatDeck config={CHAT_CONFIG.production} />
- Test: Verify streaming works, auto-scroll behaves correctly, and markdown renders safely. Check network tab for proper stream handling.