PolarPOLAR

Guia de Integração

Guia passo a passo para integrar o Urso Eco em sua aplicação. Exemplo completo com React, AudioContext e MediaRecorder.

Visão Geral

Este guia mostra como integrar o Urso Eco em uma aplicação web usando React. Cobriremos a configuração do WebSocket, captura de áudio via MediaRecorder/AudioContext, reprodução de áudio de resposta, tratamento de erros e estratégia de reconexão.

Pré-requisitos

  • Chave de API Polar (pk-*)
  • React 18+ com TypeScript
  • Navegador moderno com suporte a WebSocket e MediaDevices

Passo 1: Configurar o WebSocket

Crie um hook customizado para gerenciar a conexão WebSocket:

// hooks/useUrsoEco.ts
import { useRef, useState, useCallback } from 'react';

interface UrsoEcoConfig {
  apiKey: string;
  persona?: string;
  voice?: string;
  onTextDelta?: (text: string, isFinal: boolean) => void;
  onError?: (error: string) => void;
  onSessionStart?: (sessionId: string) => void;
  onSessionEnd?: () => void;
}

export function useUrsoEco(config: UrsoEcoConfig) {
  const wsRef = useRef<WebSocket | null>(null);
  const [isConnected, setIsConnected] = useState(false);
  const [isListening, setIsListening] = useState(false);
  const audioContextRef = useRef<AudioContext | null>(null);

  const connect = useCallback(() => {
    const ws = new WebSocket("wss://api.polarai.com.br/v1/urso-eco/stream");

    ws.onopen = () => {
      ws.send(JSON.stringify({
        type: "auth",
        api_key: config.apiKey,
        persona: config.persona || "assistente_geral",
        config: {
          sample_rate: 16000,
          channels: 1,
          encoding: "pcm_s16le",
          voice: config.voice || "ana",
          language: "pt-BR",
          enable_transcription: true,
          enable_vad: true
        }
      }));
    };

    ws.onmessage = (event) => {
      if (event.data instanceof Blob) {
        playAudioFrame(event.data);
      } else {
        const msg = JSON.parse(event.data);
        handleEvent(msg);
      }
    };

    ws.onclose = () => {
      setIsConnected(false);
      scheduleReconnect();
    };

    ws.onerror = (error) => {
      config.onError?.("Erro de conexão WebSocket");
    };

    wsRef.current = ws;
  }, [config]);

  const handleEvent = (msg: any) => {
    switch (msg.type) {
      case "session_start":
        setIsConnected(true);
        config.onSessionStart?.(msg.session_id);
        break;
      case "text_delta":
        config.onTextDelta?.(msg.text, msg.is_final);
        break;
      case "error":
        config.onError?.(msg.message);
        break;
      case "session_end":
        setIsConnected(false);
        config.onSessionEnd?.();
        break;
    }
  };

  const disconnect = useCallback(() => {
    if (wsRef.current) {
      wsRef.current.send(JSON.stringify({ type: "end_session" }));
      wsRef.current.close();
      wsRef.current = null;
    }
    setIsConnected(false);
  }, []);

  const sendAudio = useCallback((audioData: ArrayBuffer) => {
    if (wsRef.current?.readyState === WebSocket.OPEN) {
      wsRef.current.send(audioData);
    }
  }, []);

  const interrupt = useCallback(() => {
    if (wsRef.current?.readyState === WebSocket.OPEN) {
      wsRef.current.send(JSON.stringify({ type: "interrupt" }));
    }
  }, []);

  return {
    connect,
    disconnect,
    sendAudio,
    interrupt,
    isConnected,
    isListening
  };
}

Passo 2: Capturar Áudio do Microfone

// hooks/useAudioCapture.ts
import { useRef, useCallback, useState } from 'react';

export function useAudioCapture(onAudioData: (data: ArrayBuffer) => void) {
  const streamRef = useRef<MediaStream | null>(null);
  const audioContextRef = useRef<AudioContext | null>(null);
  const processorRef = useRef<ScriptProcessorNode | null>(null);
  const [isCapturing, setIsCapturing] = useState(false);

  const startCapture = useCallback(async () => {
    try {
      const stream = await navigator.mediaDevices.getUserMedia({
        audio: {
          sampleRate: 16000,
          channelCount: 1,
          echoCancellation: true,
          noiseSuppression: true,
          autoGainControl: true
        }
      });

      streamRef.current = stream;

      const audioContext = new AudioContext({ sampleRate: 16000 });
      audioContextRef.current = audioContext;

      const source = audioContext.createMediaStreamSource(stream);
      const processor = audioContext.createScriptProcessor(1024, 1, 1);
      processorRef.current = processor;

      processor.onaudioprocess = (e) => {
        const float32Data = e.inputBuffer.getChannelData(0);
        const pcm16Data = convertFloat32ToPCM16(float32Data);
        onAudioData(pcm16Data.buffer);
      };

      source.connect(processor);
      processor.connect(audioContext.destination);
      setIsCapturing(true);
    } catch (error) {
      console.error("Erro ao acessar microfone:", error);
      throw error;
    }
  }, [onAudioData]);

  const stopCapture = useCallback(() => {
    processorRef.current?.disconnect();
    audioContextRef.current?.close();
    streamRef.current?.getTracks().forEach(track => track.stop());
    setIsCapturing(false);
  }, []);

  return { startCapture, stopCapture, isCapturing };
}

function convertFloat32ToPCM16(float32Array: Float32Array): Int16Array {
  const pcm16 = new Int16Array(float32Array.length);
  for (let i = 0; i < float32Array.length; i++) {
    const s = Math.max(-1, Math.min(1, float32Array[i]));
    pcm16[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
  }
  return pcm16;
}

Passo 3: Reproduzir Áudio de Resposta

// hooks/useAudioPlayback.ts
import { useRef, useCallback } from 'react';

export function useAudioPlayback() {
  const audioContextRef = useRef<AudioContext | null>(null);
  const queueRef = useRef<ArrayBuffer[]>([]);
  const isPlayingRef = useRef(false);

  const getAudioContext = useCallback(() => {
    if (!audioContextRef.current) {
      audioContextRef.current = new AudioContext({ sampleRate: 24000 });
    }
    return audioContextRef.current;
  }, []);

  const playAudioFrame = useCallback(async (blob: Blob) => {
    const arrayBuffer = await blob.arrayBuffer();
    queueRef.current.push(arrayBuffer);

    if (!isPlayingRef.current) {
      processQueue();
    }
  }, []);

  const processQueue = async () => {
    isPlayingRef.current = true;
    const ctx = getAudioContext();

    while (queueRef.current.length > 0) {
      const buffer = queueRef.current.shift()!;
      const pcm16 = new Int16Array(buffer);
      const float32 = new Float32Array(pcm16.length);

      for (let i = 0; i < pcm16.length; i++) {
        float32[i] = pcm16[i] / 0x8000;
      }

      const audioBuffer = ctx.createBuffer(1, float32.length, 24000);
      audioBuffer.getChannelData(0).set(float32);

      const source = ctx.createBufferSource();
      source.buffer = audioBuffer;
      source.connect(ctx.destination);

      await new Promise<void>((resolve) => {
        source.onended = () => resolve();
        source.start();
      });
    }

    isPlayingRef.current = false;
  };

  const stopPlayback = useCallback(() => {
    queueRef.current = [];
    isPlayingRef.current = false;
  }, []);

  return { playAudioFrame, stopPlayback };
}

Passo 4: Componente React Completo

// components/VoiceChat.tsx
import React, { useState, useRef } from 'react';
import { useUrsoEco } from '../hooks/useUrsoEco';
import { useAudioCapture } from '../hooks/useAudioCapture';
import { useAudioPlayback } from '../hooks/useAudioPlayback';

export function VoiceChat() {
  const [transcript, setTranscript] = useState('');
  const [status, setStatus] = useState<'idle' | 'connecting' | 'connected' | 'error'>('idle');
  const { playAudioFrame, stopPlayback } = useAudioPlayback();

  const ursoEco = useUrsoEco({
    apiKey: process.env.NEXT_PUBLIC_POLAR_API_KEY!,
    persona: 'assistente_geral',
    voice: 'ana',
    onTextDelta: (text, isFinal) => {
      setTranscript(prev => prev + text);
      if (isFinal) setTranscript(prev => prev + '\n');
    },
    onSessionStart: (sessionId) => {
      setStatus('connected');
      console.log('Sessão iniciada:', sessionId);
    },
    onSessionEnd: () => {
      setStatus('idle');
    },
    onError: (error) => {
      setStatus('error');
      console.error('Erro:', error);
    }
  });

  const audioCapture = useAudioCapture(ursoEco.sendAudio);

  const handleStart = async () => {
    setStatus('connecting');
    ursoEco.connect();

    // Aguardar conexão
    await new Promise(resolve => setTimeout(resolve, 1000));
    await audioCapture.startCapture();
  };

  const handleStop = () => {
    audioCapture.stopCapture();
    stopPlayback();
    ursoEco.disconnect();
    setStatus('idle');
  };

  const handleInterrupt = () => {
    stopPlayback();
    ursoEco.interrupt();
  };

  return (
    <div className="voice-chat">
      <div className="status">
        Status: {status}
      </div>

      <div className="controls">
        {status === 'idle' ? (
          <button onClick={handleStart}>
            Iniciar Conversa
          </button>
        ) : (
          <>
            <button onClick={handleStop}>
              Encerrar
            </button>
            <button onClick={handleInterrupt}>
              Interromper
            </button>
          </>
        )}
      </div>

      <div className="transcript">
        <h3>Transcrição</h3>
        <pre>{transcript}</pre>
      </div>
    </div>
  );
}

Passo 5: Tratamento de Erros

// Erros comuns e como tratá-los
function handleWebSocketError(error: any) {
  switch (error.code) {
    case 'auth_failed':
      // API key inválida — verificar credenciais
      console.error('API key inválida. Verifique sua chave pk-*');
      break;

    case 'rate_limited':
      // Aguardar e tentar novamente
      const retryAfter = error.retry_after || 30;
      setTimeout(() => reconnect(), retryAfter * 1000);
      break;

    case 'quota_exceeded':
      // Créditos insuficientes
      console.error('Créditos insuficientes. Recarregue sua conta.');
      break;

    case 'session_timeout':
      // Sessão expirou — reconectar
      reconnect();
      break;

    default:
      console.error('Erro desconhecido:', error.message);
      reconnect();
  }
}

Passo 6: Estratégia de Reconexão

// utils/reconnect.ts
class ReconnectManager {
  private attempts = 0;
  private maxAttempts = 5;
  private baseDelay = 1000; // 1 segundo

  getDelay(): number {
    // Backoff exponencial: 1s, 2s, 4s, 8s, 16s
    const delay = this.baseDelay * Math.pow(2, this.attempts);
    // Adicionar jitter para evitar thundering herd
    const jitter = Math.random() * 1000;
    return Math.min(delay + jitter, 30000); // Máximo 30s
  }

  shouldRetry(): boolean {
    return this.attempts < this.maxAttempts;
  }

  async reconnect(connectFn: () => void): Promise<void> {
    if (!this.shouldRetry()) {
      console.error('Número máximo de tentativas atingido');
      return;
    }

    const delay = this.getDelay();
    console.log(`Reconectando em ${delay}ms (tentativa ${this.attempts + 1})`);

    await new Promise(resolve => setTimeout(resolve, delay));
    this.attempts++;
    connectFn();
  }

  reset(): void {
    this.attempts = 0;
  }
}

Dicas de Performance

  • Tamanho do buffer: Use 1024 samples (64ms a 16kHz) para o ScriptProcessor. Buffers menores reduzem latência mas aumentam carga de CPU.
  • Echo cancellation: Sempre habilite echoCancellation no getUserMedia para evitar feedback.
  • Codec Opus: Para conexões com banda limitada, considere usar encoding "opus" na configuração.
  • Web Workers: Para processamento intensivo de áudio, considere usar um AudioWorklet em vez de ScriptProcessor (depreciado).
  • Permissões: Solicite permissão de microfone apenas quando o usuário clicar para iniciar, não no carregamento da página.

On this page