// Voice mode — record with MediaRecorder, transcribe with Gemini, reply with
// the model, speak back with browser SpeechSynthesis.
//
// Push-to-talk: press the mic button to start recording, press again to stop
// and send. Falls back gracefully if the mic is denied or the API key is
// missing.

function VoiceMode({ accent, goal, profile, onClose }) {
  const protocol = goal ? GOAL_PROTOCOLS[goal] : null;
  const [phase, setPhase] = React.useState('idle'); // idle | listening | thinking | speaking
  const [transcript, setTranscript] = React.useState('');
  const [agentText, setAgentText] = React.useState(
    protocol
      ? `Ready when you are. We're working on ${protocol.label.toLowerCase()}. What would you like to know?`
      : `Hi, I'm Pepagent. Tell me what you'd like to improve and I'll design a peptide protocol.`
  );
  const [muted, setMuted] = React.useState(false);
  const [error, setError] = React.useState('');

  const mediaStreamRef = React.useRef(null);
  const recorderRef = React.useRef(null);
  const chunksRef = React.useRef([]);
  const historyRef = React.useRef([]);
  const utteranceRef = React.useRef(null);
  const audioCtxRef = React.useRef(null);

  // Pick the most natural-sounding voice the browser exposes. Voices are
  // loaded asynchronously, so this may return null on the first call —
  // we re-call it inside speak() each time.
  const pickBestVoice = () => {
    const voices = window.speechSynthesis.getVoices();
    if (!voices.length) return null;

    // Priority tiers — tier 1 = best, tier N = last resort.
    const tiers = [
      // Microsoft "Natural" online neural voices (Edge / Win11)
      (v) => /Microsoft.*(Natural|Online).*English/i.test(v.name) && /en[-_]US/i.test(v.lang),
      (v) => /Aria|Jenny|Guy|Davis|Tony|Sara|Nancy/.test(v.name) && /Natural|Online/i.test(v.name),
      // Google natural voices
      (v) => /Google.*US.*English/i.test(v.name),
      (v) => /Google/i.test(v.name) && /en[-_]US/i.test(v.lang),
      // Apple premium voices
      (v) => /(Samantha|Ava|Allison|Susan|Tom|Alex).*Premium/i.test(v.name),
      (v) => /(Samantha|Ava|Allison)/i.test(v.name),
      // Anything advertising itself as neural
      (v) => /neural/i.test(v.name),
      // Fallback: any en-US local voice that ISN'T David or Zira (the robotic Windows defaults)
      (v) => /en[-_]US/i.test(v.lang) && !/David|Zira/i.test(v.name),
      // Any English
      (v) => /^en/i.test(v.lang),
      // First voice
      () => true,
    ];
    for (const test of tiers) {
      const match = voices.find(test);
      if (match) return match;
    }
    return voices[0];
  };

  // Voices load async in Chrome — kick the loader so they're ready when needed.
  React.useEffect(() => {
    if (window.speechSynthesis && window.speechSynthesis.getVoices().length === 0) {
      window.speechSynthesis.onvoiceschanged = () => {
        // Trigger a re-pick on next speak call. No state change needed.
      };
    }
  }, []);

  // Speak agent line
  const speak = React.useCallback((text) => {
    if (muted || !window.speechSynthesis) {
      setPhase('idle');
      return;
    }
    window.speechSynthesis.cancel();
    const u = new SpeechSynthesisUtterance(text);
    u.rate = 1.0;
    u.pitch = 1.0;
    u.volume = 1.0;
    const voice = pickBestVoice();
    if (voice) {
      u.voice = voice;
      u.lang = voice.lang;
      console.log('[Pepagent] TTS voice:', voice.name, voice.lang);
    }
    u.onstart = () => setPhase('speaking');
    u.onend = () => setPhase('idle');
    u.onerror = () => setPhase('idle');
    utteranceRef.current = u;
    window.speechSynthesis.speak(u);
  }, [muted]);

  // Greet on mount
  React.useEffect(() => {
    const t = setTimeout(() => speak(agentText), 400);
    return () => clearTimeout(t);
  }, []);

  // Cleanup on unmount
  React.useEffect(() => {
    return () => {
      try { recorderRef.current?.stop(); } catch (e) {}
      mediaStreamRef.current?.getTracks().forEach((t) => t.stop());
      try { audioCtxRef.current?.close(); } catch (e) {}
      window.speechSynthesis.cancel();
    };
  }, []);

  // Pick a mime type the browser supports
  const pickMime = () => {
    const candidates = ['audio/webm;codecs=opus', 'audio/webm', 'audio/mp4', 'audio/ogg;codecs=opus'];
    for (const c of candidates) {
      if (typeof MediaRecorder !== 'undefined' && MediaRecorder.isTypeSupported(c)) return c;
    }
    return '';
  };

  const startListening = async () => {
    setError('');
    setTranscript('');
    window.speechSynthesis.cancel();
    if (location.protocol === 'file:') {
      setError("Camera/mic don't work when opened as a file. Run a local server (see README) and visit http://localhost:8000.");
      return;
    }
    if (!navigator.mediaDevices?.getUserMedia) {
      setError("This browser doesn't expose microphone APIs.");
      return;
    }
    try {
      const rawStream = await navigator.mediaDevices.getUserMedia({
        audio: {
          // Disable browser-level auto-gain — we apply our own in Web Audio
          // for a controllable, larger boost. Browser AGC tends to UNDER-shoot
          // for soft speakers and Deepgram then trims them as silence.
          echoCancellation: true,
          noiseSuppression: true,
          autoGainControl: false,
          sampleRate: 48000,
          channelCount: 1,
        },
      });
      mediaStreamRef.current = rawStream;

      // Apply a Web Audio gain boost so quiet speech reaches Deepgram loud
      // enough to be transcribed. We feed the boosted output back into a
      // MediaStream that the MediaRecorder records from.
      let recordStream = rawStream;
      try {
        const AC = window.AudioContext || window.webkitAudioContext;
        if (AC) {
          const audioCtx = new AC();
          audioCtxRef.current = audioCtx;
          const source = audioCtx.createMediaStreamSource(rawStream);
          const gain = audioCtx.createGain();
          gain.gain.value = 4.0; // 4× louder. Adjust if needed.
          const dest = audioCtx.createMediaStreamDestination();
          source.connect(gain);
          gain.connect(dest);
          recordStream = dest.stream;
        }
      } catch (audioErr) {
        // Fall back to the raw stream if Web Audio fails for any reason.
        console.warn('[Pepagent] gain boost unavailable, using raw stream:', audioErr);
      }

      const mime = pickMime();
      const opts = mime ? { mimeType: mime, audioBitsPerSecond: 64000 } : { audioBitsPerSecond: 64000 };
      const rec = new MediaRecorder(recordStream, opts);
      chunksRef.current = [];
      rec.ondataavailable = (e) => { if (e.data && e.data.size > 0) chunksRef.current.push(e.data); };
      rec.onstop = async () => {
        const blob = new Blob(chunksRef.current, { type: rec.mimeType || 'audio/webm' });
        mediaStreamRef.current?.getTracks().forEach((t) => t.stop());
        mediaStreamRef.current = null;
        try { await audioCtxRef.current?.close(); } catch (e) {}
        audioCtxRef.current = null;
        if (blob.size < 800) {
          setPhase('idle');
          return;
        }
        await transcribeAndReply(blob);
      };
      rec.start(250); // emit chunks every 250ms — produces a well-formed container even on short clips
      recorderRef.current = rec;
      setPhase('listening');
    } catch (e) {
      console.warn('[Pepagent] mic failed:', e);
      if (e.name === 'NotAllowedError' || e.name === 'SecurityError') {
        setError("Microphone access denied. Check your browser's site permissions.");
      } else if (e.name === 'NotFoundError') {
        setError("No microphone found. Make sure one is connected.");
      } else {
        setError(e.message || 'Could not start recording.');
      }
      setPhase('idle');
    }
  };

  const stopListening = () => {
    const rec = recorderRef.current;
    if (rec && rec.state !== 'inactive') {
      try { rec.stop(); } catch (e) {}
    }
    setPhase('thinking');
  };

  const transcribeAndReply = async (audioBlob) => {
    setPhase('thinking');
    let text = '';
    try {
      console.log('[Pepagent] sending audio:', audioBlob.size, 'bytes, type:', audioBlob.type);
      const res = await fetch('/api/transcribe', {
        method: 'POST',
        headers: { 'Content-Type': audioBlob.type || 'audio/webm' },
        body: audioBlob,
      });
      if (!res.ok) {
        let detail = '';
        try { const j = await res.json(); detail = j.error || j.detail || ''; } catch (e) {}
        throw new Error(`Transcribe ${res.status}: ${detail || 'request failed'}`);
      }
      const j = await res.json();
      text = j.text || '';
      console.log('[Pepagent] transcript:', JSON.stringify(text), 'full response:', j);
    } catch (e) {
      console.warn('[Pepagent] transcription failed:', e);
      setError('Transcription failed: ' + (e.message || 'unknown error'));
      setPhase('idle');
      return;
    }
    const trimmed = (text || '').trim();
    if (!trimmed) {
      setError("I couldn't hear any words. Speak a bit louder or closer to the mic?");
      setPhase('idle');
      return;
    }
    setTranscript(text);
    historyRef.current.push({ role: 'user', content: text });
    await respond();
  };

  const respond = async () => {
    const sysContext = `You are Pepagent, a knowledgeable but cautious AI peptide protocol advisor speaking out loud over voice. Keep replies SHORT — 2 to 3 sentences maximum, never more. Speak conversationally, like a thoughtful coach. No markdown, no bullet points, no headers. Only mention "consult a clinician" when the user asks about dosing, side effects, or starting a protocol — not on every reply.

CRITICAL: This is a multi-turn conversation. Read the full transcript below and remember what the user has already told you. Do NOT re-ask questions they have already answered. If they've already given you their goal, weight, age, or experience level, build on that — don't ask again. Move the conversation forward.

${protocol ? `Current user goal: ${protocol.label}. Recommended stack: ${protocol.stack.join(' + ')}. Duration: ${protocol.duration}. Summary: ${protocol.summary}` : 'No active protocol yet — help the user pick one.'}
${profile ? `User profile: ${profile.weight}kg, ${profile.age} years old, ${profile.experience} experience.` : ''}

Available peptides you know about: BPC-157, TB-500, CJC-1295, Ipamorelin, GHK-Cu, Tesamorelin, Semax, Selank, Epitalon. Reference real dosing, half-lives, and mechanisms when relevant.

Once you have enough info (goal, body stats, experience), propose a concrete peptide stack with doses and frequency. Don't keep gathering forever — be decisive.`;

    // Build a compact transcript Gemini can read in one prompt.
    const transcriptText = historyRef.current
      .map((m) => (m.role === 'user' ? `USER: ${m.content}` : `PEPAGENT: ${m.content}`))
      .join('\n');

    const fullPrompt = `${sysContext}\n\n=== CONVERSATION SO FAR ===\n${transcriptText}\n=== END CONVERSATION ===\n\nReply now as PEPAGENT. Output only what you would say next — no labels, no quotes, just the spoken words.`;

    try {
      let reply;
      if (window.claude?.complete) {
        // Claude supports proper multi-turn — use it natively.
        const messages = historyRef.current.map((m) => ({
          role: m.role === 'user' ? 'user' : 'assistant',
          content: m.content,
        }));
        // Prepend the system context as the first user turn (Claude helper doesn't accept system role here).
        messages.unshift({ role: 'user', content: sysContext + '\n\n(Begin the conversation now. Greet briefly.)' });
        reply = await window.claude.complete({ messages });
      } else {
        reply = await window.geminiText(fullPrompt);
      }
      const clean = String(reply || '').trim();
      if (!clean) throw new Error('Empty reply');
      historyRef.current.push({ role: 'assistant', content: clean });
      setAgentText(clean);
      setTranscript('');
      speak(clean);
    } catch (e) {
      console.warn('[Pepagent] reply failed:', e);
      const fallback = "I'm having trouble connecting right now. Try again in a moment.";
      setAgentText(fallback);
      speak(fallback);
      setPhase('idle');
    }
  };

  const close = () => {
    try { recorderRef.current?.stop(); } catch (e) {}
    mediaStreamRef.current?.getTracks().forEach((t) => t.stop());
    window.speechSynthesis.cancel();
    onClose();
  };

  const orbState = phase === 'listening' ? 'listening' : phase === 'thinking' || phase === 'speaking' ? 'thinking' : 'idle';
  const orbI = phase === 'listening' ? 0.85 : phase === 'speaking' ? 0.75 : phase === 'thinking' ? 0.6 : 0.35;

  return (
    <div className="voice-overlay" data-screen-label="Voice">
      <div className="voice-orb-wrap">
        <Orb size={360} intensity={orbI} color={accent} state={orbState} />
      </div>

      <div className="voice-status">
        <span className="rail-dot" style={{ background: accent }} />
        {phase === 'listening' && 'LISTENING…'}
        {phase === 'thinking' && 'THINKING…'}
        {phase === 'speaking' && 'SPEAKING…'}
        {phase === 'idle' && 'TAP TO TALK'}
      </div>

      <div className="voice-caption">
        {transcript ? (
          <>
            <span className="you">YOU</span>
            "{transcript}"
          </>
        ) : (
          <>
            <span className="agent">PEPAGENT</span>
            {agentText}
          </>
        )}
      </div>

      {error && (
        <div style={{ fontFamily: "'JetBrains Mono',monospace", fontSize: 11, color: '#b87000', textAlign: 'center', maxWidth: 500 }}>
          {error}
        </div>
      )}

      <div className="voice-controls">
        <button
          className="voice-btn"
          onClick={() => setMuted((m) => { if (!m) window.speechSynthesis.cancel(); return !m; })}
          title={muted ? 'Unmute agent' : 'Mute agent'}
        >
          {muted ? (
            <svg viewBox="0 0 24 24" width="20" height="20" fill="none" stroke="currentColor" strokeWidth="1.5"><path d="M11 5L6 9H3v6h3l5 4V5zM17 9l4 6M21 9l-4 6" strokeLinecap="round" strokeLinejoin="round"/></svg>
          ) : (
            <svg viewBox="0 0 24 24" width="20" height="20" fill="none" stroke="currentColor" strokeWidth="1.5"><path d="M11 5L6 9H3v6h3l5 4V5zM15 9a4 4 0 0 1 0 6M18 6a8 8 0 0 1 0 12" strokeLinecap="round" strokeLinejoin="round"/></svg>
          )}
        </button>

        <button
          className={"voice-btn primary " + (phase === 'listening' ? 'on' : '')}
          onClick={() => phase === 'listening' ? stopListening() : startListening()}
          disabled={phase === 'thinking'}
          title="Tap to talk"
        >
          <svg viewBox="0 0 24 24" width="28" height="28" fill="none" stroke="currentColor" strokeWidth="1.6">
            <rect x="9" y="3" width="6" height="12" rx="3" />
            <path d="M5 11a7 7 0 0 0 14 0M12 18v3M9 21h6" strokeLinecap="round" />
          </svg>
        </button>

        <button className="voice-btn danger" onClick={close} title="End call">
          <svg viewBox="0 0 24 24" width="20" height="20" fill="none" stroke="currentColor" strokeWidth="1.6">
            <path d="M3 11c4-4 14-4 18 0l-2 3-3-1v-3a10 10 0 0 0-8 0v3l-3 1-2-3z" strokeLinejoin="round" />
          </svg>
        </button>
      </div>

      <div className="voice-tip">
        TAP MIC · SPEAK · TAP AGAIN TO SEND · POWERED BY GEMINI
      </div>
    </div>
  );
}

window.VoiceMode = VoiceMode;