"use strict";
require("dotenv").config();
const ari = require("ari-client");
const WebSocket = require("ws");
const dgram = require("dgram");
const fs = require("fs");
const path = require("path");
// ─── Konfigurace ────────────────────────────────────────────────────────────
const ts = () => new Date().toISOString().slice(11,23);
const CFG = {
ariUrl: process.env.ARI_URL || "http://127.0.0.1:8088",
ariUser: process.env.ARI_USER || "ai-demo-user",
ariPass: process.env.ARI_PASS || "tajne-heslo",
rtpPort: parseInt(process.env.RTP_PORT || "12000"),
bridgeHost: process.env.BRIDGE_HOST || "127.0.0.1",
openaiKey: process.env.OPENAI_API_KEY || "",
transcriptDir: process.env.TRANSCRIPT_DIR || "/cesta-k-transkriptum",
openaiModel: "gpt-4o-realtime-preview",
openaiVoice: "alloy", // alloy | echo | fable | onyx | nova | shimmer
transferExt: process.env.TRANSFER_EXT || "100", // extenze pro přepojení
};
// Asterisk slin16 = 16 kHz, OpenAI pcm16 = 24 kHz → resample poměr 3:2
const ASTERISK_SAMPLE_RATE = 8000; // alaw @ 8 kHz
const OPENAI_SAMPLE_RATE = 8000; // g711_alaw @ 8 kHz
const FRAME_DURATION_MS = 20; // 20ms RTP frames
const ASTERISK_FRAME_SAMPLES = ASTERISK_SAMPLE_RATE * FRAME_DURATION_MS / 1000;
const OPENAI_FRAME_SAMPLES = OPENAI_SAMPLE_RATE * FRAME_DURATION_MS / 1000;
const OPENAI_CHUNK_SAMPLES = Math.floor(OPENAI_FRAME_SAMPLES / 3) * 3;
const OPENAI_CHUNK_BYTES = OPENAI_CHUNK_SAMPLES * 2;
// RTP header je 12 bytů
const RTP_HEADER_SIZE = 12;
// ─── Instrukce pro AI ────────────────────────────────────────────────────────
const AI_INSTRUCTIONS = `
Jsi hlasový asistent ve společnosti XYZ. Mluvíš VÝHRADNĚ česky.
Chování:
- Přivítej volajícího: "Dobrý den, dovolali jste se do společnosti XYZ, s čím vám mohu pomoci?"
- Odpovídej stručně, max 2-3 věty. Nezaplňuj ticho.
- Jsi přátelský, profesionální, věcný.
- Pokud se tě ptají na produkt, popis je níže.
- Když se tě zeptají kdo jsi, řekni: "Jsem AI asistent určen výhradně pro === DOPLŇTE ÚČEL ===."
O společnosti XYZ:
- PÁR VĚT O SPOLEČNOSTI
O našich produktech:
- PÁR VĚT O PRODUKTECH
Pokud volající chce mluvit s člověkem nebo nevíš odpověď:
- Řekni: "Přepojím vás na kolegu, okamžik prosím." a ukonči odpověď.
`.trim();
// ─── Resample: 16kHz → 24kHz (upsample 2:3 s lineární interpolací) ──────────
function resample16to24(buf16) {
const srcSamples = buf16.length / 2;
const groups = Math.floor(srcSamples / 2);
const dstSamples = groups * 3;
const out = Buffer.alloc(dstSamples * 2);
let outIdx = 0;
for (let g = 0; g < groups; g++) {
const s0 = buf16.readInt16LE(g * 2 * 2);
const s1 = buf16.readInt16LE((g * 2 + 1) * 2);
out.writeInt16LE(s0, outIdx * 2);
out.writeInt16LE(Math.max(-32768, Math.min(32767, Math.round(s0 + (s1 - s0) / 3))), (outIdx + 1) * 2);
out.writeInt16LE(Math.max(-32768, Math.min(32767, Math.round(s0 + (s1 - s0) * 2 / 3))), (outIdx + 2) * 2);
outIdx += 3;
}
return out;
}
// ─── Resample: 24kHz → 16kHz (decimace 3:2 s anti-aliasing) ─────────────────
function resample24to16(buf24) {
const srcSamples = buf24.length / 2;
const groups = Math.floor(srcSamples / 3);
const dstSamples = groups * 2;
const out = Buffer.alloc(dstSamples * 2);
let outIdx = 0;
for (let g = 0; g < groups; g++) {
const i = g * 3;
const s0 = buf24.readInt16LE(i * 2);
const s1 = buf24.readInt16LE((i + 1) * 2);
const s2 = buf24.readInt16LE((i + 2) * 2);
const o0 = Math.round((s0 * 2 + s1) / 3);
const o1 = Math.round((s1 + s2 * 2) / 3);
out.writeInt16LE(Math.max(-32768, Math.min(32767, o0)), outIdx * 2);
out.writeInt16LE(Math.max(-32768, Math.min(32767, o1)), (outIdx + 1) * 2);
outIdx += 2;
}
return out;
}
// ─── RTP packet builder ───────────────────────────────────────────────────────
function buildRtpPacket(payload, seqNum, timestamp, ssrc) {
const header = Buffer.alloc(RTP_HEADER_SIZE);
header.writeUInt8(0x80, 0); // V=2, P=0, X=0, CC=0
header.writeUInt8(8, 1); // M=0, PT=8 (G.711 alaw)
header.writeUInt16BE(seqNum & 0xFFFF, 2);
header.writeUInt32BE(timestamp >>> 0, 4);
header.writeUInt32BE(ssrc >>> 0, 8);
return Buffer.concat([header, payload]);
}
// ─── Transcript helper ────────────────────────────────────────────────────────
function saveTranscript(callId, entries) {
try {
fs.mkdirSync(CFG.transcriptDir, { recursive: true });
const date = new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19);
const fname = path.join(CFG.transcriptDir, `${date}_${callId}.txt`);
const lines = [
`Hovor: ${callId}`,
`Datum: ${new Date().toLocaleString("cs-CZ")}`,
`${"─".repeat(60)}`,
...entries.map(e => `[${e.role.toUpperCase()}] ${e.text}`),
`${"─".repeat(60)}`,
];
fs.writeFileSync(fname, lines.join("\n"), "utf8");
console.log(`[transcript] Uložen: ${fname}`);
} catch (err) {
console.error("[transcript] Chyba při ukládání:", err.message);
}
}
// ─── Hlavní session pro jeden hovor ──────────────────────────────────────────
async function handleCall(ariClient, channel) {
const callId = channel.id.slice(0, 8);
console.log(`\n[${callId}] Nový hovor: ${channel.caller?.number || "unknown"}`);
const transcript = [];
// ── 1. Odpověz na hovor ──────────────────────────────────────────────────
await channel.answer();
console.log(`[${callId}] Hovor přijat`);
// ── 2. Otevři UDP socket pro příjem RTP z Asterisku ──────────────────────
const udpSocket = dgram.createSocket("udp4");
await new Promise((res, rej) => {
udpSocket.bind(CFG.rtpPort, "0.0.0.0", err => err ? rej(err) : res());
});
console.log(`[${callId}] UDP RTP socket naslouchá na portu ${CFG.rtpPort}`);
// ── 3. Vytvoř ExternalMedia kanál v Asterisku ────────────────────────────
let extChannel;
try {
extChannel = await ariClient.channels.externalMedia({
app: "asterisk-ai",
external_host: `${CFG.bridgeHost}:${CFG.rtpPort}`,
format: "alaw",
direction: "both",
});
} catch (err) {
console.error(`[${callId}] ExternalMedia selhalo:`, err.message);
udpSocket.close();
await channel.hangup().catch(() => {});
return;
}
console.log(`[${callId}] ExternalMedia kanál: ${extChannel.id}`);
let asteriskRtpHost = extChannel.channelvars?.UNICASTRTP_LOCAL_ADDRESS || CFG.bridgeHost;
let asteriskRtpPort = parseInt(extChannel.channelvars?.UNICASTRTP_LOCAL_PORT || "0");
if (!asteriskRtpPort && extChannel.id) {
const vars = await ariClient.channels.getChannelVar({
channelId: extChannel.id,
variable: "UNICASTRTP_LOCAL_PORT",
}).catch(() => null);
if (vars) asteriskRtpPort = parseInt(vars.value);
const varsAddr = await ariClient.channels.getChannelVar({
channelId: extChannel.id,
variable: "UNICASTRTP_LOCAL_ADDRESS",
}).catch(() => null);
if (varsAddr) asteriskRtpHost = varsAddr.value;
}
// ── 4. Vytvoř bridge a připoj oba kanály ─────────────────────────────────
const bridge = await ariClient.bridges.create({ type: "mixing" });
await bridge.addChannel({ channel: [channel.id, extChannel.id] });
console.log(`[${callId}] Bridge vytvořen: ${bridge.id}`);
// ── 5. Připoj WebSocket na OpenAI Realtime API ───────────────────────────
const openaiWs = new WebSocket(
`wss://api.openai.com/v1/realtime?model=${CFG.openaiModel}`,
{
headers: {
Authorization: `Bearer ${CFG.openaiKey}`,
"OpenAI-Beta": "realtime=v1",
},
}
);
let rtpSeq = Math.floor(Math.random() * 65535);
let rtpTimestamp = Math.floor(Math.random() * 0xFFFFFFFF);
const rtpSsrc = Math.floor(Math.random() * 0xFFFFFFFF);
let audioBuffer = Buffer.alloc(0);
let flushMode = false;
let aiSpeaking = false;
let transferPending = false;
let active = true;
let sessionReady = false;
const RTP_FRAME_BYTES = 160;
const JITTER_PREFILL = 6;
let jitterReady = false;
let bufferWasFilled = false;
let totalAudioReceived = 0;
let silenceTimer = null;
const SILENCE_MS = 800;
// ── RTP output pacing timer (1 paket každých 20ms) ───────────────────────
const pacingTimer = setInterval(() => {
if (!active || !asteriskRtpPort) return;
if (!jitterReady) {
if (audioBuffer.length < RTP_FRAME_BYTES * JITTER_PREFILL) return;
jitterReady = true;
}
if (audioBuffer.length < RTP_FRAME_BYTES) {
if (!flushMode || !bufferWasFilled) {
if (bufferWasFilled && asteriskRtpPort) {
const cn = Buffer.alloc(RTP_FRAME_BYTES, 0xD5);
const pkt = buildRtpPacket(cn, rtpSeq, rtpTimestamp, rtpSsrc);
rtpSeq = (rtpSeq + 1) & 0xFFFF;
rtpTimestamp = (rtpTimestamp + 160) >>> 0;
udpSocket.send(pkt, asteriskRtpPort, asteriskRtpHost);
}
return;
}
if (audioBuffer.length > 0) {
const frame = Buffer.alloc(RTP_FRAME_BYTES, 0xD5);
audioBuffer.copy(frame, 0);
audioBuffer = Buffer.alloc(0);
const pkt = buildRtpPacket(frame, rtpSeq, rtpTimestamp, rtpSsrc);
rtpSeq = (rtpSeq + 1) & 0xFFFF;
rtpTimestamp = (rtpTimestamp + 160) >>> 0;
udpSocket.send(pkt, asteriskRtpPort, asteriskRtpHost);
}
aiSpeaking = false;
jitterReady = false;
flushMode = false;
bufferWasFilled = false;
if (transferPending) {
transferPending = false;
console.log(`${ts()} [${callId}] Přepojuji na ${CFG.transferExt}`);
channel.continueInDialplan({
context: "default",
extension: CFG.transferExt,
priority: 1,
}).catch(err => console.error(`[${callId}] Transfer error:`, err.message));
}
return;
}
if (flushMode) jitterReady = true;
const frame = audioBuffer.slice(0, RTP_FRAME_BYTES);
audioBuffer = audioBuffer.slice(RTP_FRAME_BYTES);
const pkt = buildRtpPacket(frame, rtpSeq, rtpTimestamp, rtpSsrc);
rtpSeq = (rtpSeq + 1) & 0xFFFF;
rtpTimestamp = (rtpTimestamp + 160) >>> 0;
udpSocket.send(pkt, asteriskRtpPort, asteriskRtpHost);
}, 20);
// ── OpenAI WS events ──────────────────────────────────────────────────────
openaiWs.on("open", () => {
console.log(`[${callId}] OpenAI WS připojen`);
openaiWs.send(JSON.stringify({
type: "session.update",
session: {
modalities: ["audio", "text"],
instructions: AI_INSTRUCTIONS,
voice: CFG.openaiVoice,
input_audio_format: "g711_alaw",
output_audio_format: "g711_alaw",
input_audio_transcription: {
model: "whisper-1",
language: "cs",
},
turn_detection: {
type: "server_vad",
threshold: 0.9,
prefix_padding_ms: 200,
silence_duration_ms: 700,
},
temperature: 0.7,
max_response_output_tokens: 4096,
},
}));
const silence = Buffer.alloc(160, 0xD5);
openaiWs.send(JSON.stringify({
type: "input_audio_buffer.append",
audio: silence.toString("base64"),
}));
});
openaiWs.on("message", (raw) => {
if (!active) return;
let evt;
try { evt = JSON.parse(raw); } catch { return; }
switch (evt.type) {
case "response.audio.delta": {
if (!evt.delta) break;
const chunk = Buffer.from(evt.delta, "base64");
audioBuffer = Buffer.concat([audioBuffer, chunk]);
totalAudioReceived += chunk.length;
bufferWasFilled = true;
break;
}
case "response.created": {
aiSpeaking = true;
if (openaiWs.readyState === WebSocket.OPEN) {
openaiWs.send(JSON.stringify({ type: "input_audio_buffer.clear" }));
}
break;
}
case "response.audio.done": {
flushMode = true;
totalAudioReceived = 0;
break;
}
case "response.output_item.done": {
const content = evt.item?.content;
if (content) {
const textBlock = content.find(c => c.type === "audio" && c.transcript);
const text = textBlock?.transcript || "";
if (text) {
console.log(`${ts()} [${callId}] AI: ${text}`);
transcript.push({ role: "assistant", text });
const lower = text.toLowerCase();
if (lower.includes("přepoj") || lower.includes("přepojím") || lower.includes("přepojuji")) {
transferPending = true;
console.log(`${ts()} [${callId}] Transfer pending → ${CFG.transferExt}`);
}
}
}
break;
}
case "conversation.item.input_audio_transcription.completed": {
const text = evt.transcript || "";
if (text && text.trim()) {
console.log(`${ts()} [${callId}] Volající: ${text}`);
transcript.push({ role: "caller", text });
}
break;
}
case "session.created":
console.log(`${ts()} [${callId}] Session: ${evt.type}`);
break;
case "session.updated":
console.log(`${ts()} [${callId}] Session: ${evt.type}`);
sessionReady = true;
openaiWs.send(JSON.stringify({
type: "response.create",
response: {
modalities: ["audio", "text"],
instructions: "Pozdrav volajícího. Řekni: Dobrý den, dovolali jste se do společnosti XYZ.",
}
}));
break;
case "error":
console.error(`[${callId}] OpenAI chyba:`, evt.error?.message);
break;
case "input_audio_buffer.speech_started":
console.log(`${ts()} [${callId}] !! SPEECH_STARTED (VAD)`);
break;
case "input_audio_buffer.speech_stopped":
console.log(`${ts()} [${callId}] speech_stopped`);
break;
case "response.cancelled":
console.log(`${ts()} [${callId}] !! RESPONSE CANCELLED`);
break;
}
});
openaiWs.on("error", (err) => {
console.error(`[${callId}] OpenAI WS error:`, err.message);
});
openaiWs.on("close", () => {
console.log(`[${callId}] OpenAI WS uzavřen`);
});
// ── 6. Audio z Asterisku → OpenAI ─────────────────────────────────────────
udpSocket.on("message", (msg) => {
if (!active || !sessionReady || openaiWs.readyState !== WebSocket.OPEN) return;
if (msg.length <= RTP_HEADER_SIZE) return;
if (aiSpeaking) return;
const alawData = msg.slice(RTP_HEADER_SIZE);
openaiWs.send(JSON.stringify({
type: "input_audio_buffer.append",
audio: alawData.toString("base64"),
}));
});
// ── 7. Cleanup při zavěšení ───────────────────────────────────────────────
const cleanup = async (reason) => {
if (!active) return;
active = false;
console.log(`${ts()} [${callId}] Hovor ukončen: ${reason}`);
if (transcript.length > 0) {
saveTranscript(callId, transcript);
}
if (silenceTimer) clearTimeout(silenceTimer);
clearInterval(pacingTimer);
openaiWs.close();
udpSocket.close();
try { await bridge.destroy(); } catch {}
try { await extChannel.hangup(); } catch {}
};
channel.on("StasisEnd", () => cleanup("StasisEnd"));
channel.on("ChannelHangupRequest", () => cleanup("HangupRequest"));
setTimeout(() => {
if (active) {
cleanup("timeout");
channel.hangup().catch(() => {});
}
}, 10 * 60 * 1000);
}
// ─── Start ARI klienta ────────────────────────────────────────────────────────
ari.connect(CFG.ariUrl, CFG.ariUser, CFG.ariPass, (err, client) => {
if (err) {
console.error("Nelze se připojit k Asterisk ARI:", err.message);
process.exit(1);
}
console.log("Asterisk - AI Bridge spuštěn");
console.log(` ARI: ${CFG.ariUrl}`);
console.log(` RTP: ${CFG.bridgeHost}:${CFG.rtpPort}`);
console.log(` Modely: ${CFG.openaiModel} / whisper-1`);
console.log("─".repeat(50));
client.on("StasisStart", (evt, channel) => {
if (channel.name.startsWith("UnicastRTP")) return;
handleCall(client, channel).catch(err => {
console.error(`[${channel.id.slice(0,8)}] Neočekávaná chyba:`, err.message);
});
});
client.start("asterisk-ai");
});
process.on("uncaughtException", (err) => {
console.error("Uncaught exception:", err);
});