Guia de Integração
Guia passo a passo para integrar o Urso Eco em sua aplicação. Exemplo completo com React, AudioContext e MediaRecorder.
Visão Geral
Este guia mostra como integrar o Urso Eco em uma aplicação web usando React. Cobriremos a configuração do WebSocket, captura de áudio via MediaRecorder/AudioContext, reprodução de áudio de resposta, tratamento de erros e estratégia de reconexão.
Pré-requisitos
- Chave de API Polar (
pk-*) - React 18+ com TypeScript
- Navegador moderno com suporte a WebSocket e MediaDevices
Passo 1: Configurar o WebSocket
Crie um hook customizado para gerenciar a conexão WebSocket:
// hooks/useUrsoEco.ts
import { useRef, useState, useCallback } from 'react';
interface UrsoEcoConfig {
apiKey: string;
persona?: string;
voice?: string;
onTextDelta?: (text: string, isFinal: boolean) => void;
onError?: (error: string) => void;
onSessionStart?: (sessionId: string) => void;
onSessionEnd?: () => void;
}
export function useUrsoEco(config: UrsoEcoConfig) {
const wsRef = useRef<WebSocket | null>(null);
const [isConnected, setIsConnected] = useState(false);
const [isListening, setIsListening] = useState(false);
const audioContextRef = useRef<AudioContext | null>(null);
const connect = useCallback(() => {
const ws = new WebSocket("wss://api.polarai.com.br/v1/urso-eco/stream");
ws.onopen = () => {
ws.send(JSON.stringify({
type: "auth",
api_key: config.apiKey,
persona: config.persona || "assistente_geral",
config: {
sample_rate: 16000,
channels: 1,
encoding: "pcm_s16le",
voice: config.voice || "ana",
language: "pt-BR",
enable_transcription: true,
enable_vad: true
}
}));
};
ws.onmessage = (event) => {
if (event.data instanceof Blob) {
playAudioFrame(event.data);
} else {
const msg = JSON.parse(event.data);
handleEvent(msg);
}
};
ws.onclose = () => {
setIsConnected(false);
scheduleReconnect();
};
ws.onerror = (error) => {
config.onError?.("Erro de conexão WebSocket");
};
wsRef.current = ws;
}, [config]);
const handleEvent = (msg: any) => {
switch (msg.type) {
case "session_start":
setIsConnected(true);
config.onSessionStart?.(msg.session_id);
break;
case "text_delta":
config.onTextDelta?.(msg.text, msg.is_final);
break;
case "error":
config.onError?.(msg.message);
break;
case "session_end":
setIsConnected(false);
config.onSessionEnd?.();
break;
}
};
const disconnect = useCallback(() => {
if (wsRef.current) {
wsRef.current.send(JSON.stringify({ type: "end_session" }));
wsRef.current.close();
wsRef.current = null;
}
setIsConnected(false);
}, []);
const sendAudio = useCallback((audioData: ArrayBuffer) => {
if (wsRef.current?.readyState === WebSocket.OPEN) {
wsRef.current.send(audioData);
}
}, []);
const interrupt = useCallback(() => {
if (wsRef.current?.readyState === WebSocket.OPEN) {
wsRef.current.send(JSON.stringify({ type: "interrupt" }));
}
}, []);
return {
connect,
disconnect,
sendAudio,
interrupt,
isConnected,
isListening
};
}Passo 2: Capturar Áudio do Microfone
// hooks/useAudioCapture.ts
import { useRef, useCallback, useState } from 'react';
export function useAudioCapture(onAudioData: (data: ArrayBuffer) => void) {
const streamRef = useRef<MediaStream | null>(null);
const audioContextRef = useRef<AudioContext | null>(null);
const processorRef = useRef<ScriptProcessorNode | null>(null);
const [isCapturing, setIsCapturing] = useState(false);
const startCapture = useCallback(async () => {
try {
const stream = await navigator.mediaDevices.getUserMedia({
audio: {
sampleRate: 16000,
channelCount: 1,
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true
}
});
streamRef.current = stream;
const audioContext = new AudioContext({ sampleRate: 16000 });
audioContextRef.current = audioContext;
const source = audioContext.createMediaStreamSource(stream);
const processor = audioContext.createScriptProcessor(1024, 1, 1);
processorRef.current = processor;
processor.onaudioprocess = (e) => {
const float32Data = e.inputBuffer.getChannelData(0);
const pcm16Data = convertFloat32ToPCM16(float32Data);
onAudioData(pcm16Data.buffer);
};
source.connect(processor);
processor.connect(audioContext.destination);
setIsCapturing(true);
} catch (error) {
console.error("Erro ao acessar microfone:", error);
throw error;
}
}, [onAudioData]);
const stopCapture = useCallback(() => {
processorRef.current?.disconnect();
audioContextRef.current?.close();
streamRef.current?.getTracks().forEach(track => track.stop());
setIsCapturing(false);
}, []);
return { startCapture, stopCapture, isCapturing };
}
function convertFloat32ToPCM16(float32Array: Float32Array): Int16Array {
const pcm16 = new Int16Array(float32Array.length);
for (let i = 0; i < float32Array.length; i++) {
const s = Math.max(-1, Math.min(1, float32Array[i]));
pcm16[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
}
return pcm16;
}Passo 3: Reproduzir Áudio de Resposta
// hooks/useAudioPlayback.ts
import { useRef, useCallback } from 'react';
export function useAudioPlayback() {
const audioContextRef = useRef<AudioContext | null>(null);
const queueRef = useRef<ArrayBuffer[]>([]);
const isPlayingRef = useRef(false);
const getAudioContext = useCallback(() => {
if (!audioContextRef.current) {
audioContextRef.current = new AudioContext({ sampleRate: 24000 });
}
return audioContextRef.current;
}, []);
const playAudioFrame = useCallback(async (blob: Blob) => {
const arrayBuffer = await blob.arrayBuffer();
queueRef.current.push(arrayBuffer);
if (!isPlayingRef.current) {
processQueue();
}
}, []);
const processQueue = async () => {
isPlayingRef.current = true;
const ctx = getAudioContext();
while (queueRef.current.length > 0) {
const buffer = queueRef.current.shift()!;
const pcm16 = new Int16Array(buffer);
const float32 = new Float32Array(pcm16.length);
for (let i = 0; i < pcm16.length; i++) {
float32[i] = pcm16[i] / 0x8000;
}
const audioBuffer = ctx.createBuffer(1, float32.length, 24000);
audioBuffer.getChannelData(0).set(float32);
const source = ctx.createBufferSource();
source.buffer = audioBuffer;
source.connect(ctx.destination);
await new Promise<void>((resolve) => {
source.onended = () => resolve();
source.start();
});
}
isPlayingRef.current = false;
};
const stopPlayback = useCallback(() => {
queueRef.current = [];
isPlayingRef.current = false;
}, []);
return { playAudioFrame, stopPlayback };
}Passo 4: Componente React Completo
// components/VoiceChat.tsx
import React, { useState, useRef } from 'react';
import { useUrsoEco } from '../hooks/useUrsoEco';
import { useAudioCapture } from '../hooks/useAudioCapture';
import { useAudioPlayback } from '../hooks/useAudioPlayback';
export function VoiceChat() {
const [transcript, setTranscript] = useState('');
const [status, setStatus] = useState<'idle' | 'connecting' | 'connected' | 'error'>('idle');
const { playAudioFrame, stopPlayback } = useAudioPlayback();
const ursoEco = useUrsoEco({
apiKey: process.env.NEXT_PUBLIC_POLAR_API_KEY!,
persona: 'assistente_geral',
voice: 'ana',
onTextDelta: (text, isFinal) => {
setTranscript(prev => prev + text);
if (isFinal) setTranscript(prev => prev + '\n');
},
onSessionStart: (sessionId) => {
setStatus('connected');
console.log('Sessão iniciada:', sessionId);
},
onSessionEnd: () => {
setStatus('idle');
},
onError: (error) => {
setStatus('error');
console.error('Erro:', error);
}
});
const audioCapture = useAudioCapture(ursoEco.sendAudio);
const handleStart = async () => {
setStatus('connecting');
ursoEco.connect();
// Aguardar conexão
await new Promise(resolve => setTimeout(resolve, 1000));
await audioCapture.startCapture();
};
const handleStop = () => {
audioCapture.stopCapture();
stopPlayback();
ursoEco.disconnect();
setStatus('idle');
};
const handleInterrupt = () => {
stopPlayback();
ursoEco.interrupt();
};
return (
<div className="voice-chat">
<div className="status">
Status: {status}
</div>
<div className="controls">
{status === 'idle' ? (
<button onClick={handleStart}>
Iniciar Conversa
</button>
) : (
<>
<button onClick={handleStop}>
Encerrar
</button>
<button onClick={handleInterrupt}>
Interromper
</button>
</>
)}
</div>
<div className="transcript">
<h3>Transcrição</h3>
<pre>{transcript}</pre>
</div>
</div>
);
}Passo 5: Tratamento de Erros
// Erros comuns e como tratá-los
function handleWebSocketError(error: any) {
switch (error.code) {
case 'auth_failed':
// API key inválida — verificar credenciais
console.error('API key inválida. Verifique sua chave pk-*');
break;
case 'rate_limited':
// Aguardar e tentar novamente
const retryAfter = error.retry_after || 30;
setTimeout(() => reconnect(), retryAfter * 1000);
break;
case 'quota_exceeded':
// Créditos insuficientes
console.error('Créditos insuficientes. Recarregue sua conta.');
break;
case 'session_timeout':
// Sessão expirou — reconectar
reconnect();
break;
default:
console.error('Erro desconhecido:', error.message);
reconnect();
}
}Passo 6: Estratégia de Reconexão
// utils/reconnect.ts
class ReconnectManager {
private attempts = 0;
private maxAttempts = 5;
private baseDelay = 1000; // 1 segundo
getDelay(): number {
// Backoff exponencial: 1s, 2s, 4s, 8s, 16s
const delay = this.baseDelay * Math.pow(2, this.attempts);
// Adicionar jitter para evitar thundering herd
const jitter = Math.random() * 1000;
return Math.min(delay + jitter, 30000); // Máximo 30s
}
shouldRetry(): boolean {
return this.attempts < this.maxAttempts;
}
async reconnect(connectFn: () => void): Promise<void> {
if (!this.shouldRetry()) {
console.error('Número máximo de tentativas atingido');
return;
}
const delay = this.getDelay();
console.log(`Reconectando em ${delay}ms (tentativa ${this.attempts + 1})`);
await new Promise(resolve => setTimeout(resolve, delay));
this.attempts++;
connectFn();
}
reset(): void {
this.attempts = 0;
}
}Dicas de Performance
- Tamanho do buffer: Use 1024 samples (64ms a 16kHz) para o
ScriptProcessor. Buffers menores reduzem latência mas aumentam carga de CPU. - Echo cancellation: Sempre habilite
echoCancellationnogetUserMediapara evitar feedback. - Codec Opus: Para conexões com banda limitada, considere usar encoding
"opus"na configuração. - Web Workers: Para processamento intensivo de áudio, considere usar um
AudioWorkletem vez deScriptProcessor(depreciado). - Permissões: Solicite permissão de microfone apenas quando o usuário clicar para iniciar, não no carregamento da página.