let attempt = 0; attempt <= this.maxRetries; attempt++) {
try {
const response = await fetch(${this.baseUrl}/chat/completions, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: Bearer ${this.apiKey},
},
body: JSON.stringify({
model: request.model ?? 'deepseek-r1',
messages: [{ role: 'user', content: request.prompt }],
max_tokens: request.maxTokens ?? 1024,
temperature: request.temperature ?? 0.7,
}),
signal: request.signal,
});
if (response.status === 429) {
const retryAfter = response.headers.get('Retry-After') ?? '1';
await new Promise((res) => setTimeout(res, Number(retryAfter) * 1000));
continue;
}
if (!response.ok) {
const errBody = await response.json().catch(() => ({}));
throw new Error(errBody.error?.message ?? `HTTP ${response.status}`);
}
const data = await response.json();
return {
id: data.id,
content: data.choices[0]?.message?.content ?? '',
usage: {
promptTokens: data.usage?.prompt_tokens ?? 0,
completionTokens: data.usage?.completion_tokens ?? 0,
totalTokens: data.usage?.total_tokens ?? 0,
},
};
} catch (err) {
lastError = err instanceof Error ? err : new Error(String(err));
if (attempt < this.maxRetries && !request.signal?.aborted) {
await new Promise((res) => setTimeout(res, 1000 * 2 ** attempt));
}
}
}
throw lastError ?? new Error('Request failed after retries');
}
}
**Architecture Rationale:**
- Class encapsulation centralizes retry logic, header management, and endpoint configuration.
- Exponential backoff with `Retry-After` header respect prevents cascading 429 errors.
- `AbortSignal` propagation enables upstream cancellation without orphaned network requests.
- Explicit token mapping ensures cost tracking aligns with billing metadata.
### Step 2: State Orchestration Hook
React state must handle asynchronous lifecycles, prevent race conditions, and expose clean interfaces to the view layer. A custom hook abstracts the client while managing conversation history, loading states, and error boundaries.
```typescript
// hooks/useConversationEngine.ts
import { useState, useCallback, useRef } from 'react';
import { DeepSeekClient, ChatRequest, ChatResponse } from '../lib/deepseek-client';
export interface ConversationMessage {
id: string;
role: 'user' | 'assistant';
content: string;
tokens?: number;
timestamp: number;
}
export function useConversationEngine(client: DeepSeekClient) {
const [messages, setMessages] = useState<ConversationMessage[]>([]);
const [isProcessing, setIsProcessing] = useState(false);
const [error, setError] = useState<string | null>(null);
const abortRef = useRef<AbortController | null>(null);
const submit = useCallback(
async (prompt: string) => {
if (!prompt.trim() || isProcessing) return;
abortRef.current?.abort();
abortRef.current = new AbortController();
const userMsg: ConversationMessage = {
id: crypto.randomUUID(),
role: 'user',
content: prompt,
timestamp: Date.now(),
};
setMessages((prev) => [...prev, userMsg]);
setIsProcessing(true);
setError(null);
try {
const request: ChatRequest = {
prompt,
signal: abortRef.current.signal,
};
const response = await client.chat(request);
const assistantMsg: ConversationMessage = {
id: response.id,
role: 'assistant',
content: response.content,
tokens: response.usage.totalTokens,
timestamp: Date.now(),
};
setMessages((prev) => [...prev, assistantMsg]);
} catch (err) {
if (err instanceof Error && err.name !== 'AbortError') {
setError(err.message);
}
} finally {
setIsProcessing(false);
}
},
[client, isProcessing]
);
const reset = useCallback(() => {
abortRef.current?.abort();
setMessages([]);
setError(null);
setIsProcessing(false);
}, []);
return { messages, isProcessing, error, submit, reset };
}
Architecture Rationale:
AbortController reference prevents overlapping requests when users submit rapidly.
AbortError filtering ensures intentional cancellations don't trigger error UI.
crypto.randomUUID() provides stable keys without relying on API response IDs for user messages.
- Dependency array optimization prevents unnecessary hook re-renders.
Step 3: Render-Optimized View Component
The UI layer must handle scroll management, loading states, and error display without triggering layout thrashing. Separating message rendering from input handling improves maintainability.
// components/MessageConsole.tsx
import { useState, useRef, useEffect, FormEvent } from 'react';
import { useConversationEngine } from '../hooks/useConversationEngine';
import { DeepSeekClient } from '../lib/deepseek-client';
const client = new DeepSeekClient({
baseUrl: import.meta.env.VITE_DEEPSEEK_BASE_URL ?? 'https://api.deepseek.com/v1',
apiKey: import.meta.env.VITE_DEEPSEEK_API_KEY ?? '',
});
export function MessageConsole() {
const { messages, isProcessing, error, submit, reset } = useConversationEngine(client);
const [input, setInput] = useState('');
const scrollAnchor = useRef<HTMLDivElement>(null);
useEffect(() => {
scrollAnchor.current?.scrollIntoView({ behavior: 'smooth', block: 'end' });
}, [messages, isProcessing]);
const handleSend = (e: FormEvent) => {
e.preventDefault();
submit(input);
setInput('');
};
return (
<div className="flex flex-col h-[600px] border rounded-lg bg-surface">
<div className="flex-1 overflow-y-auto p-4 space-y-4">
{messages.map((msg) => (
<div
key={msg.id}
className={`p-3 rounded-lg max-w-[80%] ${
msg.role === 'user' ? 'ml-auto bg-primary text-primary-foreground' : 'bg-muted'
}`}
>
<p className="text-sm">{msg.content}</p>
{msg.tokens && (
<span className="text-xs opacity-60 mt-1 block">
{msg.tokens} tokens
</span>
)}
</div>
))}
{isProcessing && (
<div className="p-3 rounded-lg bg-muted animate-pulse">Processing...</div>
)}
{error && (
<div className="p-3 rounded-lg bg-destructive/10 text-destructive text-sm">
{error}
</div>
)}
<div ref={scrollAnchor} />
</div>
<form onSubmit={handleSend} className="p-3 border-t flex gap-2">
<input
value={input}
onChange={(e) => setInput(e.target.value)}
placeholder="Enter prompt..."
disabled={isProcessing}
className="flex-1 px-3 py-2 rounded border bg-background"
/>
<button
type="submit"
disabled={isProcessing || !input.trim()}
className="px-4 py-2 rounded bg-primary text-primary-foreground disabled:opacity-50"
>
Send
</button>
<button
type="button"
onClick={reset}
disabled={isProcessing || messages.length === 0}
className="px-4 py-2 rounded border disabled:opacity-50"
>
Clear
</button>
</form>
</div>
);
}
Architecture Rationale:
- Scroll anchor decouples DOM manipulation from state updates, preventing layout recalculation loops.
- Conditional rendering of token metadata keeps the UI clean while preserving observability.
- Form submission validation prevents empty or duplicate requests.
- Environment variable injection enables seamless local/production switching without code changes.
Pitfall Guide
1. Blocking the Main Thread with Synchronous Parsing
Explanation: Parsing large JSON responses or synchronously processing streaming chunks on the main thread freezes the UI, especially on mobile devices.
Fix: Use ReadableStream with chunked decoding, or offload heavy transformations to Web Workers. Keep UI updates batched via requestAnimationFrame or React's concurrent rendering.
Explanation: Users often submit multiple prompts before the first response completes. Without cancellation, orphaned requests consume bandwidth, trigger race conditions, and corrupt state order.
Fix: Maintain a persistent AbortController reference. Call .abort() before each new request. Filter AbortError in catch blocks to avoid false error states.
3. Naive Retry Logic Without Backoff
Explanation: Immediate retries on 429 or 5xx responses amplify server load and guarantee repeated failures. Fixed delays ignore server-specified Retry-After headers.
Fix: Implement exponential backoff with jitter. Always parse and respect Retry-After. Cap maximum retries to prevent infinite loops.
4. Exposing API Keys in Client-Side Bundles
Explanation: Hardcoding or bundling API keys in frontend code allows extraction via browser devtools or source maps. This violates security best practices and risks quota exhaustion.
Fix: Route requests through a backend proxy or serverless function. Use environment variables prefixed for your build tool (VITE_, NEXT_PUBLIC_, etc.) and validate keys server-side.
5. Token Drift from Untracked Context Windows
Explanation: Failing to track prompt and completion tokens leads to unexpected billing spikes and silent context window truncation. DeepSeek R1 supports extended contexts, but unbounded history grows linearly.
Fix: Store usage metadata per message. Implement sliding window truncation or summarization when token count exceeds thresholds. Log costs to analytics pipelines.
6. Race Conditions in Streaming State Updates
Explanation: Appending streaming chunks directly to state without atomic updates causes flickering, duplicated text, or lost characters when React batches renders.
Fix: Accumulate chunks in a mutable ref or local variable. Commit to React state only at stable boundaries (e.g., word breaks or periodic intervals). Use useRef for streaming buffers.
Explanation: Stripping all special characters or HTML entities breaks prompt engineering techniques, markdown formatting, and code generation capabilities.
Fix: Sanitize only for XSS vectors (<script>, javascript:, event handlers). Preserve markdown, code blocks, and structural syntax. Render output with a safe HTML parser like DOMPurify if needed.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Low traffic, internal tool | Direct client-side fetch with env vars | Fastest implementation, minimal infra | Low (pay-per-token only) |
| Public-facing production app | Backend proxy + streaming | Prevents key leakage, enables rate limiting, adds auth | Medium (proxy compute + tokens) |
| High-frequency chatbot | Streaming + Web Worker parsing | Keeps main thread responsive, handles backpressure | Low (same token cost, better UX) |
| Enterprise compliance | Server-side orchestration + audit logging | Meets data residency, enables PII redaction | High (infra + compliance overhead) |
Configuration Template
# .env.local
VITE_DEEPSEEK_BASE_URL=https://api.deepseek.com/v1
VITE_DEEPSEEK_API_KEY=sk-your-key-here
VITE_MAX_RETRIES=3
VITE_DEFAULT_MODEL=deepseek-r1
VITE_MAX_TOKENS=1024
VITE_TEMPERATURE=0.7
// config/llm-config.ts
export const LLM_CONFIG = {
baseUrl: import.meta.env.VITE_DEEPSEEK_BASE_URL,
apiKey: import.meta.env.VITE_DEEPSEEK_API_KEY,
maxRetries: Number(import.meta.env.VITE_MAX_RETRIES) || 3,
defaultModel: import.meta.env.VITE_DEFAULT_MODEL || 'deepseek-r1',
maxTokens: Number(import.meta.env.VITE_MAX_TOKENS) || 1024,
temperature: Number(import.meta.env.VITE_TEMPERATURE) || 0.7,
} as const;
Quick Start Guide
- Initialize Project: Create a React + TypeScript project using Vite or Next.js. Install dependencies:
npm install
- Add Configuration: Create
.env.local with your DeepSeek API credentials and copy the LLM_CONFIG template.
- Implement Client & Hook: Place
DeepSeekClient in lib/ and useConversationEngine in hooks/. Ensure TypeScript strict mode is enabled.
- Mount Component: Import
MessageConsole into your main route. Verify network tab shows proper Authorization headers and streaming behavior.
- Validate Production Readiness: Test rapid submissions, network throttling, and error states. Confirm
AbortController cancels in-flight requests and token metadata renders correctly.