Compare commits
31 Commits
opus-DRED-
...
opus-DRED
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c95255d31b | ||
|
|
99c0173590 | ||
|
|
953ab71392 | ||
|
|
662b14a2af | ||
|
|
b830f29e66 | ||
|
|
d5c298d0b5 | ||
|
|
4090206909 | ||
|
|
086a74782f | ||
|
|
09259cd6b8 | ||
|
|
75bc72a884 | ||
|
|
6aa52accef | ||
|
|
d0c17317ea | ||
|
|
5799d18aee | ||
|
|
46c9ee1be3 | ||
|
|
b53eae9192 | ||
|
|
a3f54566d4 | ||
|
|
76e9fe5e43 | ||
|
|
b0a89d4f39 | ||
|
|
abc96e8887 | ||
|
|
3a6ae61f8d | ||
|
|
4c536d256b | ||
|
|
b0ec9ff4ab | ||
|
|
5855533a39 | ||
|
|
ed09c2e8cc | ||
|
|
f44306cc17 | ||
|
|
0b821585ab | ||
|
|
faec332a8c | ||
|
|
fe9ae276dc | ||
|
|
4fbf6770c4 | ||
|
|
30a893a73f | ||
|
|
d46f3b1deb |
49
Cargo.lock
generated
49
Cargo.lock
generated
@@ -119,26 +119,6 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "audiopus"
|
||||
version = "0.3.0-rc.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ab55eb0e56d7c6de3d59f544e5db122d7725ec33be6a276ee8241f3be6473955"
|
||||
dependencies = [
|
||||
"audiopus_sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "audiopus_sys"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62314a1546a2064e033665d658e88c620a62904be945f8147e6b16c3db9f8651"
|
||||
dependencies = [
|
||||
"cmake",
|
||||
"log",
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.5.0"
|
||||
@@ -389,6 +369,12 @@ version = "3.20.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
|
||||
|
||||
[[package]]
|
||||
name = "bytemuck"
|
||||
version = "1.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.5.0"
|
||||
@@ -2125,6 +2111,24 @@ version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
|
||||
|
||||
[[package]]
|
||||
name = "opusic-c"
|
||||
version = "1.5.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9486eb5a1a735bf56430b5b44e21157be30ac9fcc17999ba309981b8bd90d2ff"
|
||||
dependencies = [
|
||||
"opusic-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opusic-sys"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc3280fe5b6f97ac1a35a0ac003e2fb0b92f8e4bdf2b2057e1bf9b87acca5696"
|
||||
dependencies = [
|
||||
"cmake",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "os_str_bytes"
|
||||
version = "6.6.1"
|
||||
@@ -4309,9 +4313,11 @@ dependencies = [
|
||||
name = "wzp-codec"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"audiopus",
|
||||
"bytemuck",
|
||||
"codec2",
|
||||
"nnnoiseless",
|
||||
"opusic-c",
|
||||
"opusic-sys",
|
||||
"rand 0.8.5",
|
||||
"tracing",
|
||||
"wzp-proto",
|
||||
@@ -4370,6 +4376,7 @@ dependencies = [
|
||||
"async-trait",
|
||||
"axum 0.7.9",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"dirs",
|
||||
"futures-util",
|
||||
"prometheus",
|
||||
|
||||
@@ -35,7 +35,14 @@ quinn = "0.11"
|
||||
raptorq = "2"
|
||||
|
||||
# Codec
|
||||
audiopus = "0.3.0-rc.0"
|
||||
# opusic-c: high-level safe bindings over libopus 1.5.2 (encoder side).
|
||||
# opusic-sys: raw FFI for the decoder side — we build our own DecoderHandle
|
||||
# because opusic-c::Decoder.inner is pub(crate) and cannot be reached for the
|
||||
# Phase 3 DRED reconstruction path. See docs/PRD-dred-integration.md.
|
||||
# Pinned exactly (no caret) for reproducible libopus 1.5.2 across the fleet.
|
||||
opusic-c = { version = "=1.5.5", default-features = false, features = ["bundled", "dred"] }
|
||||
opusic-sys = { version = "=0.6.0", default-features = false, features = ["bundled"] }
|
||||
bytemuck = "1"
|
||||
codec2 = "0.3"
|
||||
|
||||
# Crypto
|
||||
|
||||
@@ -46,6 +46,14 @@ class DebugReporter(private val context: Context) {
|
||||
val zipFile = File(context.cacheDir, "wzp_debug_${timestamp}.zip")
|
||||
|
||||
ZipOutputStream(BufferedOutputStream(FileOutputStream(zipFile))).use { zos ->
|
||||
// Phase 4: extract DRED / classical PLC counters from the
|
||||
// stats JSON so they're visible in the meta preamble at a
|
||||
// glance, not buried in the trailing JSON dump.
|
||||
val dredReconstructions = extractLongField(finalStatsJson, "dred_reconstructions")
|
||||
val classicalPlc = extractLongField(finalStatsJson, "classical_plc_invocations")
|
||||
val framesDecoded = extractLongField(finalStatsJson, "frames_decoded")
|
||||
val fecRecovered = extractLongField(finalStatsJson, "fec_recovered")
|
||||
|
||||
// 1. Call metadata
|
||||
val meta = buildString {
|
||||
appendLine("=== WZ Phone Debug Report ===")
|
||||
@@ -58,6 +66,18 @@ class DebugReporter(private val context: Context) {
|
||||
appendLine("Device: ${android.os.Build.MANUFACTURER} ${android.os.Build.MODEL}")
|
||||
appendLine("Android: ${android.os.Build.VERSION.RELEASE} (API ${android.os.Build.VERSION.SDK_INT})")
|
||||
appendLine()
|
||||
appendLine("=== Loss Recovery ===")
|
||||
appendLine("Frames decoded: $framesDecoded")
|
||||
appendLine("DRED reconstructions: $dredReconstructions (Opus neural recovery)")
|
||||
appendLine("Classical PLC: $classicalPlc (fallback)")
|
||||
appendLine("RaptorQ FEC recovered: $fecRecovered (Codec2 only)")
|
||||
if (framesDecoded > 0) {
|
||||
val dredPct = 100.0 * dredReconstructions / framesDecoded
|
||||
val plcPct = 100.0 * classicalPlc / framesDecoded
|
||||
appendLine("DRED rate: ${"%.2f".format(dredPct)}%")
|
||||
appendLine("Classical PLC rate: ${"%.2f".format(plcPct)}%")
|
||||
}
|
||||
appendLine()
|
||||
appendLine("=== Final Stats ===")
|
||||
appendLine(finalStatsJson)
|
||||
}
|
||||
@@ -195,4 +215,28 @@ class DebugReporter(private val context: Context) {
|
||||
FileInputStream(file).use { it.copyTo(zos) }
|
||||
zos.closeEntry()
|
||||
}
|
||||
|
||||
/**
|
||||
* Tiny JSON field extractor — pulls an integer value for a top-level
|
||||
* field like `"dred_reconstructions":42`. We don't want to pull in a
|
||||
* full JSON parser just for the debug preamble, and the CallStats
|
||||
* output is a flat record with well-known field names.
|
||||
*
|
||||
* Returns 0 if the field is missing or unparseable.
|
||||
*/
|
||||
private fun extractLongField(json: String, field: String): Long {
|
||||
val key = "\"$field\":"
|
||||
val idx = json.indexOf(key)
|
||||
if (idx < 0) return 0
|
||||
var i = idx + key.length
|
||||
// Skip whitespace
|
||||
while (i < json.length && json[i].isWhitespace()) i++
|
||||
val start = i
|
||||
while (i < json.length && (json[i].isDigit() || json[i] == '-')) i++
|
||||
return try {
|
||||
json.substring(start, i).toLong()
|
||||
} catch (_: NumberFormatException) {
|
||||
0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
97
android/app/src/main/java/com/wzp/engine/SignalManager.kt
Normal file
97
android/app/src/main/java/com/wzp/engine/SignalManager.kt
Normal file
@@ -0,0 +1,97 @@
|
||||
package com.wzp.engine
|
||||
|
||||
import org.json.JSONObject
|
||||
|
||||
/**
|
||||
* Persistent signal connection for direct 1:1 calls.
|
||||
* Separate from WzpEngine — survives across calls.
|
||||
*
|
||||
* Lifecycle: connect() → [placeCall/answerCall] → destroy()
|
||||
*/
|
||||
class SignalManager {
|
||||
|
||||
private var handle: Long = 0L
|
||||
|
||||
val isConnected: Boolean get() = handle != 0L
|
||||
|
||||
/**
|
||||
* Connect to relay and register for direct calls.
|
||||
* MUST be called from a thread with sufficient stack (8MB).
|
||||
* Blocks briefly during QUIC connect + register, then returns.
|
||||
*/
|
||||
fun connect(relay: String, seedHex: String): Boolean {
|
||||
if (handle != 0L) return true // already connected
|
||||
handle = nativeSignalConnect(relay, seedHex)
|
||||
return handle != 0L
|
||||
}
|
||||
|
||||
/** Get current signal state as parsed object. Non-blocking. */
|
||||
fun getState(): SignalState {
|
||||
if (handle == 0L) return SignalState()
|
||||
val json = nativeSignalGetState(handle) ?: return SignalState()
|
||||
return try {
|
||||
val obj = JSONObject(json)
|
||||
SignalState(
|
||||
status = obj.optString("status", "idle"),
|
||||
fingerprint = obj.optString("fingerprint", ""),
|
||||
incomingCallId = if (obj.isNull("incoming_call_id")) null else obj.optString("incoming_call_id"),
|
||||
incomingCallerFp = if (obj.isNull("incoming_caller_fp")) null else obj.optString("incoming_caller_fp"),
|
||||
incomingCallerAlias = if (obj.isNull("incoming_caller_alias")) null else obj.optString("incoming_caller_alias"),
|
||||
callSetupRelay = if (obj.isNull("call_setup_relay")) null else obj.optString("call_setup_relay"),
|
||||
callSetupRoom = if (obj.isNull("call_setup_room")) null else obj.optString("call_setup_room"),
|
||||
callSetupId = if (obj.isNull("call_setup_id")) null else obj.optString("call_setup_id"),
|
||||
)
|
||||
} catch (e: Exception) {
|
||||
SignalState()
|
||||
}
|
||||
}
|
||||
|
||||
/** Place a direct call to a target fingerprint. */
|
||||
fun placeCall(targetFp: String): Int {
|
||||
if (handle == 0L) return -1
|
||||
return nativeSignalPlaceCall(handle, targetFp)
|
||||
}
|
||||
|
||||
/** Answer an incoming call. mode: 0=Reject, 1=AcceptTrusted, 2=AcceptGeneric */
|
||||
fun answerCall(callId: String, mode: Int = 2): Int {
|
||||
if (handle == 0L) return -1
|
||||
return nativeSignalAnswerCall(handle, callId, mode)
|
||||
}
|
||||
|
||||
/** Send hangup signal. */
|
||||
fun hangup() {
|
||||
if (handle != 0L) nativeSignalHangup(handle)
|
||||
}
|
||||
|
||||
/** Destroy the signal manager. */
|
||||
fun destroy() {
|
||||
if (handle != 0L) {
|
||||
nativeSignalDestroy(handle)
|
||||
handle = 0L
|
||||
}
|
||||
}
|
||||
|
||||
// JNI native methods
|
||||
private external fun nativeSignalConnect(relay: String, seed: String): Long
|
||||
private external fun nativeSignalGetState(handle: Long): String?
|
||||
private external fun nativeSignalPlaceCall(handle: Long, targetFp: String): Int
|
||||
private external fun nativeSignalAnswerCall(handle: Long, callId: String, mode: Int): Int
|
||||
private external fun nativeSignalHangup(handle: Long)
|
||||
private external fun nativeSignalDestroy(handle: Long)
|
||||
|
||||
companion object {
|
||||
init { System.loadLibrary("wzp_android") }
|
||||
}
|
||||
}
|
||||
|
||||
/** Signal connection state. */
|
||||
data class SignalState(
|
||||
val status: String = "idle",
|
||||
val fingerprint: String = "",
|
||||
val incomingCallId: String? = null,
|
||||
val incomingCallerFp: String? = null,
|
||||
val incomingCallerAlias: String? = null,
|
||||
val callSetupRelay: String? = null,
|
||||
val callSetupRoom: String? = null,
|
||||
val callSetupId: String? = null,
|
||||
)
|
||||
@@ -159,6 +159,18 @@ class WzpEngine(private val callback: WzpCallback) {
|
||||
private external fun nativeWriteAudioDirect(handle: Long, buffer: java.nio.ByteBuffer, sampleCount: Int): Int
|
||||
private external fun nativeReadAudioDirect(handle: Long, buffer: java.nio.ByteBuffer, maxSamples: Int): Int
|
||||
private external fun nativeDestroy(handle: Long)
|
||||
|
||||
companion object {
|
||||
init { System.loadLibrary("wzp_android") }
|
||||
|
||||
/** Get the identity fingerprint for a seed hex. No engine needed. */
|
||||
@JvmStatic
|
||||
private external fun nativeGetFingerprint(seedHex: String): String?
|
||||
|
||||
/** Compute the full identity fingerprint (xxxx:xxxx:...) from a seed hex string. */
|
||||
@JvmStatic
|
||||
fun getFingerprint(seedHex: String): String = nativeGetFingerprint(seedHex) ?: ""
|
||||
}
|
||||
private external fun nativePingRelay(handle: Long, relay: String): String?
|
||||
private external fun nativeStartSignaling(handle: Long, relay: String, seed: String, token: String, alias: String): Int
|
||||
private external fun nativePlaceCall(handle: Long, targetFp: String): Int
|
||||
@@ -208,11 +220,6 @@ class WzpEngine(private val callback: WzpCallback) {
|
||||
return nativeAnswerCall(nativeHandle, callId, mode)
|
||||
}
|
||||
|
||||
companion object {
|
||||
init {
|
||||
System.loadLibrary("wzp_android")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Integer constants matching the Rust [CallState] enum ordinals. */
|
||||
|
||||
@@ -141,9 +141,9 @@ class CallViewModel : ViewModel(), WzpCallback {
|
||||
private val _targetFingerprint = MutableStateFlow("")
|
||||
val targetFingerprint: StateFlow<String> = _targetFingerprint.asStateFlow()
|
||||
|
||||
/** Signal connection state: 0=idle, 5=registered, 6=ringing, 7=incoming */
|
||||
private val _signalState = MutableStateFlow(0)
|
||||
val signalState: StateFlow<Int> = _signalState.asStateFlow()
|
||||
/** Signal state string: "idle", "registered", "ringing", "incoming", "setup" */
|
||||
private val _signalState = MutableStateFlow("idle")
|
||||
val signalState: StateFlow<String> = _signalState.asStateFlow()
|
||||
|
||||
/** Incoming call info */
|
||||
private val _incomingCallId = MutableStateFlow<String?>(null)
|
||||
@@ -155,32 +155,80 @@ class CallViewModel : ViewModel(), WzpCallback {
|
||||
private val _incomingCallerAlias = MutableStateFlow<String?>(null)
|
||||
val incomingCallerAlias: StateFlow<String?> = _incomingCallerAlias.asStateFlow()
|
||||
|
||||
/** Separate signal manager (persistent, survives calls) */
|
||||
private var signalManager: com.wzp.engine.SignalManager? = null
|
||||
private var signalPollJob: Job? = null
|
||||
|
||||
fun setCallMode(mode: Int) { _callMode.value = mode }
|
||||
fun setTargetFingerprint(fp: String) { _targetFingerprint.value = fp }
|
||||
|
||||
/** Register on relay for direct calls */
|
||||
fun registerForCalls() {
|
||||
if (engine == null) {
|
||||
engine = WzpEngine(this).also { it.init() }
|
||||
}
|
||||
val serverIdx = _selectedServer.value
|
||||
val serverList = _servers.value
|
||||
if (serverIdx >= serverList.size) return
|
||||
|
||||
val relay = serverList[serverIdx].address
|
||||
val seed = _seedHex.value
|
||||
val alias = _alias.value
|
||||
|
||||
viewModelScope.launch(Dispatchers.IO) {
|
||||
var seed = _seedHex.value
|
||||
// Generate seed if empty (fresh install or cleared storage)
|
||||
if (seed.isEmpty()) {
|
||||
val newSeed = ByteArray(32).also { java.security.SecureRandom().nextBytes(it) }
|
||||
seed = newSeed.joinToString("") { "%02x".format(it) }
|
||||
_seedHex.value = seed
|
||||
settings?.saveSeedHex(seed)
|
||||
Log.i(TAG, "generated new identity seed")
|
||||
}
|
||||
val resolvedRelay = resolveToIp(relay) ?: relay
|
||||
val result = engine?.startSignaling(resolvedRelay, seed, "", alias)
|
||||
if (result == 0) {
|
||||
_signalState.value = 5 // Registered
|
||||
startStatsPolling()
|
||||
|
||||
// nativeSignalConnect has JNI overhead — must be on a thread with enough stack.
|
||||
// Dispatchers.IO threads overflow. Use explicit Java Thread.
|
||||
Thread(null, {
|
||||
try {
|
||||
val mgr = com.wzp.engine.SignalManager()
|
||||
val ok = mgr.connect(resolvedRelay, seed)
|
||||
viewModelScope.launch {
|
||||
if (ok) {
|
||||
signalManager = mgr
|
||||
startSignalPolling()
|
||||
} else {
|
||||
_errorMessage.value = "Failed to register on relay"
|
||||
}
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
viewModelScope.launch {
|
||||
_errorMessage.value = "Register error: ${e.message}"
|
||||
}
|
||||
}
|
||||
}, "wzp-signal-init", 8 * 1024 * 1024).start()
|
||||
}
|
||||
|
||||
/** Poll signal manager state every 500ms */
|
||||
private fun startSignalPolling() {
|
||||
signalPollJob?.cancel()
|
||||
signalPollJob = viewModelScope.launch {
|
||||
while (isActive) {
|
||||
val mgr = signalManager
|
||||
if (mgr != null && mgr.isConnected) {
|
||||
val state = mgr.getState()
|
||||
_signalState.value = state.status
|
||||
_incomingCallId.value = state.incomingCallId
|
||||
_incomingCallerFp.value = state.incomingCallerFp
|
||||
_incomingCallerAlias.value = state.incomingCallerAlias
|
||||
|
||||
// Auto-connect to media room when call is set up
|
||||
if (state.status == "setup" && state.callSetupRelay != null && state.callSetupRoom != null) {
|
||||
Log.i(TAG, "CallSetup: connecting to ${state.callSetupRelay} room ${state.callSetupRoom}")
|
||||
startCallInternal(state.callSetupRelay, state.callSetupRoom)
|
||||
}
|
||||
}
|
||||
delay(500L)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun stopSignalPolling() {
|
||||
signalPollJob?.cancel()
|
||||
signalPollJob = null
|
||||
}
|
||||
|
||||
/** Place a direct call to the target fingerprint */
|
||||
@@ -190,24 +238,28 @@ class CallViewModel : ViewModel(), WzpCallback {
|
||||
_errorMessage.value = "Enter a fingerprint to call"
|
||||
return
|
||||
}
|
||||
engine?.placeCall(target)
|
||||
_signalState.value = 6 // Ringing
|
||||
signalManager?.placeCall(target)
|
||||
}
|
||||
|
||||
/** Answer an incoming direct call */
|
||||
fun answerIncomingCall(mode: Int = 2) {
|
||||
val callId = _incomingCallId.value ?: return
|
||||
engine?.answerCall(callId, mode)
|
||||
signalManager?.answerCall(callId, mode)
|
||||
}
|
||||
|
||||
/** Reject an incoming direct call */
|
||||
fun rejectIncomingCall() {
|
||||
val callId = _incomingCallId.value ?: return
|
||||
engine?.answerCall(callId, 0) // 0 = Reject
|
||||
_signalState.value = 5 // Back to registered
|
||||
_incomingCallId.value = null
|
||||
_incomingCallerFp.value = null
|
||||
_incomingCallerAlias.value = null
|
||||
signalManager?.answerCall(callId, 0)
|
||||
}
|
||||
|
||||
/** Hang up direct call — media ends, signal stays alive */
|
||||
fun hangupDirectCall() {
|
||||
signalManager?.hangup()
|
||||
engine?.stopCall()
|
||||
engine?.destroy()
|
||||
engine = null
|
||||
engineInitialized = false
|
||||
}
|
||||
|
||||
companion object {
|
||||
@@ -685,30 +737,10 @@ class CallViewModel : ViewModel(), WzpCallback {
|
||||
val s = CallStats.fromJson(json)
|
||||
lastCallDuration = s.durationSecs
|
||||
_stats.value = s
|
||||
// Only update callState from media engine stats (not signal)
|
||||
if (s.state != 0) {
|
||||
_callState.value = s.state
|
||||
}
|
||||
// Track signal state changes for direct calling
|
||||
if (s.state in 5..7) {
|
||||
_signalState.value = s.state
|
||||
}
|
||||
// Incoming call detection
|
||||
if (s.state == 7) { // IncomingCall
|
||||
_incomingCallId.value = s.incomingCallId
|
||||
_incomingCallerFp.value = s.incomingCallerFp
|
||||
_incomingCallerAlias.value = s.incomingCallerAlias
|
||||
}
|
||||
// CallSetup: auto-connect to media room
|
||||
if (s.state == 1 && s.incomingCallId != null && s.incomingCallId.contains("|")) {
|
||||
// Format: "relay_addr|room_name"
|
||||
val parts = s.incomingCallId.split("|", limit = 2)
|
||||
if (parts.size == 2) {
|
||||
val mediaRelay = parts[0]
|
||||
val mediaRoom = parts[1]
|
||||
Log.i(TAG, "CallSetup: connecting to $mediaRelay room $mediaRoom")
|
||||
startCallInternal(mediaRelay, mediaRoom)
|
||||
}
|
||||
}
|
||||
if (s.state == 2 && !audioStarted) {
|
||||
startAudio()
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ package com.wzp.ui.call
|
||||
|
||||
import androidx.compose.foundation.background
|
||||
import androidx.compose.foundation.clickable
|
||||
import androidx.compose.ui.text.style.TextAlign
|
||||
import androidx.compose.foundation.layout.Arrangement
|
||||
import androidx.compose.foundation.layout.Box
|
||||
import androidx.compose.foundation.layout.Column
|
||||
@@ -166,7 +165,7 @@ fun InCallScreen(
|
||||
color = Color.White
|
||||
)
|
||||
Text(
|
||||
text = "ENCRYPTED VOICE",
|
||||
text = "ENCRYPTED VOICE \u2022 direct-call-v1",
|
||||
style = MaterialTheme.typography.labelSmall.copy(letterSpacing = 3.sp),
|
||||
color = TextDim
|
||||
)
|
||||
@@ -220,7 +219,7 @@ fun InCallScreen(
|
||||
|
||||
// Mode toggle: Room vs Direct Call
|
||||
val callMode by viewModel.callMode.collectAsState()
|
||||
val signalState by viewModel.signalState.collectAsState()
|
||||
val signalState by viewModel.signalState.collectAsState() // "idle"/"registered"/"ringing"/etc
|
||||
val targetFp by viewModel.targetFingerprint.collectAsState()
|
||||
val incomingCallId by viewModel.incomingCallId.collectAsState()
|
||||
val incomingCallerFp by viewModel.incomingCallerFp.collectAsState()
|
||||
@@ -310,7 +309,7 @@ fun InCallScreen(
|
||||
}
|
||||
} else {
|
||||
// ── Direct call mode ──
|
||||
if (signalState < 5) {
|
||||
if (signalState == "idle") {
|
||||
// Not registered yet
|
||||
SectionLabel("ALIAS")
|
||||
OutlinedTextField(
|
||||
@@ -334,7 +333,7 @@ fun InCallScreen(
|
||||
color = Color.White
|
||||
)
|
||||
}
|
||||
} else if (signalState == 5) {
|
||||
} else if (signalState == "registered" || signalState == "incoming") {
|
||||
// Registered — show dial pad
|
||||
Text(
|
||||
"\u2705 Registered — waiting for calls",
|
||||
@@ -404,8 +403,7 @@ fun InCallScreen(
|
||||
color = Color.White
|
||||
)
|
||||
}
|
||||
} else if (signalState == 6) {
|
||||
// Ringing
|
||||
} else if (signalState == "ringing") {
|
||||
Text(
|
||||
"\uD83D\uDD14 Ringing...",
|
||||
color = Yellow,
|
||||
@@ -413,11 +411,10 @@ fun InCallScreen(
|
||||
textAlign = TextAlign.Center,
|
||||
modifier = Modifier.fillMaxWidth()
|
||||
)
|
||||
} else if (signalState == 7) {
|
||||
// Incoming call (state 7 also handled above in registered view)
|
||||
} else if (signalState == "setup") {
|
||||
Text(
|
||||
"\uD83D\uDCDE Incoming call...",
|
||||
color = Green,
|
||||
"Connecting to call...",
|
||||
color = Accent,
|
||||
style = MaterialTheme.typography.titleMedium,
|
||||
textAlign = TextAlign.Center,
|
||||
modifier = Modifier.fillMaxWidth()
|
||||
@@ -432,14 +429,16 @@ fun InCallScreen(
|
||||
|
||||
Spacer(modifier = Modifier.height(20.dp))
|
||||
|
||||
// Identity
|
||||
val fp = if (seedHex.length >= 16) seedHex.take(16) else ""
|
||||
// Identity — compute real fingerprint from seed
|
||||
val fullFp = remember(seedHex) {
|
||||
if (seedHex.length >= 64) com.wzp.engine.WzpEngine.getFingerprint(seedHex) else ""
|
||||
}
|
||||
Row(verticalAlignment = Alignment.CenterVertically) {
|
||||
if (fp.isNotEmpty()) {
|
||||
Identicon(fingerprint = seedHex, size = 28.dp)
|
||||
if (fullFp.isNotEmpty()) {
|
||||
Identicon(fingerprint = fullFp, size = 28.dp)
|
||||
Spacer(modifier = Modifier.width(8.dp))
|
||||
CopyableFingerprint(
|
||||
fingerprint = fp.chunked(4).joinToString(":"),
|
||||
fingerprint = fullFp,
|
||||
style = MaterialTheme.typography.bodySmall.copy(fontFamily = FontFamily.Monospace),
|
||||
color = TextDim
|
||||
)
|
||||
|
||||
@@ -14,8 +14,10 @@ use std::sync::{Arc, Mutex};
|
||||
use std::time::Instant;
|
||||
|
||||
use bytes::Bytes;
|
||||
use tracing::{error, info, warn};
|
||||
use tracing::{debug, error, info, warn};
|
||||
use wzp_codec::AdaptiveDecoder;
|
||||
use wzp_codec::agc::AutoGainControl;
|
||||
use wzp_codec::dred_ffi::{DredDecoderHandle, DredState};
|
||||
use wzp_crypto::{KeyExchange, WarzoneKeyExchange};
|
||||
use wzp_fec::{RaptorQFecDecoder, RaptorQFecEncoder};
|
||||
use wzp_proto::{
|
||||
@@ -201,7 +203,6 @@ impl WzpEngine {
|
||||
/// Returns JSON `{"rtt_ms":N,"server_fingerprint":"hex"}` or error.
|
||||
pub fn ping_relay(&self, address: &str) -> Result<String, anyhow::Error> {
|
||||
let addr: SocketAddr = address.parse()?;
|
||||
let _ = rustls::crypto::ring::default_provider().install_default();
|
||||
|
||||
let rt = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
@@ -245,154 +246,7 @@ impl WzpEngine {
|
||||
}
|
||||
|
||||
/// Start persistent signaling connection for direct calls.
|
||||
/// Spawns a background task that maintains the `_signal` connection.
|
||||
pub fn start_signaling(
|
||||
&mut self,
|
||||
relay_addr: &str,
|
||||
seed_hex: &str,
|
||||
token: Option<&str>,
|
||||
alias: Option<&str>,
|
||||
) -> Result<(), anyhow::Error> {
|
||||
use wzp_proto::{MediaTransport, SignalMessage};
|
||||
|
||||
let addr: SocketAddr = relay_addr.parse()?;
|
||||
let seed = if seed_hex.is_empty() {
|
||||
wzp_crypto::Seed::generate()
|
||||
} else {
|
||||
wzp_crypto::Seed::from_hex(seed_hex).map_err(|e| anyhow::anyhow!(e))?
|
||||
};
|
||||
let identity = seed.derive_identity();
|
||||
let pub_id = identity.public_identity();
|
||||
let identity_pub = *pub_id.signing.as_bytes();
|
||||
let fp = pub_id.fingerprint.to_string();
|
||||
let token = token.map(|s| s.to_string());
|
||||
let alias = alias.map(|s| s.to_string());
|
||||
let state = self.state.clone();
|
||||
let seed_bytes = seed.0;
|
||||
|
||||
info!(fingerprint = %fp, relay = %addr, "starting signaling");
|
||||
|
||||
// Create runtime for signaling (separate from call runtime)
|
||||
let rt = tokio::runtime::Builder::new_multi_thread()
|
||||
.worker_threads(1)
|
||||
.enable_all()
|
||||
.build()?;
|
||||
|
||||
let signal_state = state.clone();
|
||||
rt.spawn(async move {
|
||||
let _ = rustls::crypto::ring::default_provider().install_default();
|
||||
let bind: SocketAddr = "0.0.0.0:0".parse().unwrap();
|
||||
let endpoint = match wzp_transport::create_endpoint(bind, None) {
|
||||
Ok(e) => e,
|
||||
Err(e) => { error!("signal endpoint: {e}"); return; }
|
||||
};
|
||||
let client_cfg = wzp_transport::client_config();
|
||||
let conn = match wzp_transport::connect(&endpoint, addr, "_signal", client_cfg).await {
|
||||
Ok(c) => c,
|
||||
Err(e) => { error!("signal connect: {e}"); return; }
|
||||
};
|
||||
let transport = std::sync::Arc::new(wzp_transport::QuinnTransport::new(conn));
|
||||
|
||||
// Auth if token provided
|
||||
if let Some(ref tok) = token {
|
||||
let _ = transport.send_signal(&SignalMessage::AuthToken { token: tok.clone() }).await;
|
||||
}
|
||||
|
||||
// Register presence
|
||||
let _ = transport.send_signal(&SignalMessage::RegisterPresence {
|
||||
identity_pub,
|
||||
signature: vec![],
|
||||
alias: alias.clone(),
|
||||
}).await;
|
||||
|
||||
// Wait for ack
|
||||
match transport.recv_signal().await {
|
||||
Ok(Some(SignalMessage::RegisterPresenceAck { success: true, .. })) => {
|
||||
info!(fingerprint = %fp, "signal: registered");
|
||||
let mut stats = signal_state.stats.lock().unwrap();
|
||||
stats.state = crate::stats::CallState::Registered;
|
||||
}
|
||||
other => {
|
||||
error!("signal registration failed: {other:?}");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Signal recv loop
|
||||
loop {
|
||||
if !signal_state.running.load(Ordering::Relaxed) {
|
||||
break;
|
||||
}
|
||||
match transport.recv_signal().await {
|
||||
Ok(Some(SignalMessage::CallRinging { call_id })) => {
|
||||
info!(call_id = %call_id, "signal: ringing");
|
||||
let mut stats = signal_state.stats.lock().unwrap();
|
||||
stats.state = crate::stats::CallState::Ringing;
|
||||
}
|
||||
Ok(Some(SignalMessage::DirectCallOffer { caller_fingerprint, caller_alias, call_id, .. })) => {
|
||||
info!(from = %caller_fingerprint, call_id = %call_id, "signal: incoming call");
|
||||
let mut stats = signal_state.stats.lock().unwrap();
|
||||
stats.state = crate::stats::CallState::IncomingCall;
|
||||
stats.incoming_call_id = Some(call_id);
|
||||
stats.incoming_caller_fp = Some(caller_fingerprint);
|
||||
stats.incoming_caller_alias = caller_alias;
|
||||
}
|
||||
Ok(Some(SignalMessage::DirectCallAnswer { call_id, accept_mode, .. })) => {
|
||||
info!(call_id = %call_id, mode = ?accept_mode, "signal: call answered");
|
||||
}
|
||||
Ok(Some(SignalMessage::CallSetup { call_id, room, relay_addr })) => {
|
||||
info!(call_id = %call_id, room = %room, relay = %relay_addr, "signal: call setup");
|
||||
// Connect to media room via the existing start_call mechanism
|
||||
// Store the room info so Kotlin can call startCall with it
|
||||
let mut stats = signal_state.stats.lock().unwrap();
|
||||
stats.state = crate::stats::CallState::Connecting;
|
||||
// Store call setup info for Kotlin to pick up
|
||||
stats.incoming_call_id = Some(format!("{relay_addr}|{room}"));
|
||||
}
|
||||
Ok(Some(SignalMessage::Hangup { reason })) => {
|
||||
info!(reason = ?reason, "signal: call ended by remote");
|
||||
let mut stats = signal_state.stats.lock().unwrap();
|
||||
stats.state = crate::stats::CallState::Closed;
|
||||
stats.incoming_call_id = None;
|
||||
stats.incoming_caller_fp = None;
|
||||
stats.incoming_caller_alias = None;
|
||||
}
|
||||
Ok(Some(_)) => {}
|
||||
Ok(None) => {
|
||||
info!("signal: connection closed");
|
||||
break;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("signal recv error: {e}");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut stats = signal_state.stats.lock().unwrap();
|
||||
stats.state = crate::stats::CallState::Closed;
|
||||
});
|
||||
|
||||
self.tokio_runtime = Some(rt);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Place a direct call to a target fingerprint via the signal connection.
|
||||
pub fn place_call(&self, target_fingerprint: &str) -> Result<(), anyhow::Error> {
|
||||
let _ = self.state.command_tx.send(EngineCommand::PlaceCall {
|
||||
target_fingerprint: target_fingerprint.to_string(),
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Answer an incoming direct call.
|
||||
pub fn answer_call(&self, call_id: &str, mode: wzp_proto::CallAcceptMode) -> Result<(), anyhow::Error> {
|
||||
let _ = self.state.command_tx.send(EngineCommand::AnswerCall {
|
||||
call_id: call_id.to_string(),
|
||||
accept_mode: mode,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
// Signal methods (start_signaling, place_call, answer_call) moved to signal_mgr.rs
|
||||
|
||||
pub fn set_mute(&self, muted: bool) {
|
||||
self.state.muted.store(muted, Ordering::Relaxed);
|
||||
@@ -456,7 +310,6 @@ async fn run_call(
|
||||
alias: Option<&str>,
|
||||
state: Arc<EngineState>,
|
||||
) -> Result<(), anyhow::Error> {
|
||||
let _ = rustls::crypto::ring::default_provider().install_default();
|
||||
|
||||
let bind_addr: SocketAddr = "0.0.0.0:0".parse().unwrap();
|
||||
let endpoint = wzp_transport::create_endpoint(bind_addr, None)?;
|
||||
@@ -530,9 +383,12 @@ async fn run_call(
|
||||
stats.state = CallState::Active;
|
||||
}
|
||||
|
||||
// Initialize codec (Opus or Codec2 based on profile)
|
||||
// Initialize codec (Opus or Codec2 based on profile).
|
||||
// Phase 3c: decoder is a concrete AdaptiveDecoder (not Box<dyn
|
||||
// AudioDecoder>) so the recv task can call reconstruct_from_dred on
|
||||
// gaps detected via sequence tracking.
|
||||
let mut encoder = wzp_codec::create_encoder(profile);
|
||||
let mut decoder = wzp_codec::create_decoder(profile);
|
||||
let mut decoder = AdaptiveDecoder::new(profile).expect("failed to create adaptive decoder");
|
||||
|
||||
// Initialize FEC encoder/decoder
|
||||
let mut fec_enc = wzp_fec::create_encoder(&profile);
|
||||
@@ -665,6 +521,19 @@ async fn run_call(
|
||||
t_opus_us += t0.elapsed().as_micros() as u64;
|
||||
let encoded = &encode_buf[..encoded_len];
|
||||
|
||||
// Phase 2: Opus tiers bypass RaptorQ (DRED handles loss recovery
|
||||
// at the codec layer). Codec2 tiers keep RaptorQ unchanged.
|
||||
let is_opus = current_profile.codec.is_opus();
|
||||
let (hdr_fec_block, hdr_fec_symbol, hdr_fec_ratio) = if is_opus {
|
||||
(0u8, 0u8, 0u8)
|
||||
} else {
|
||||
(
|
||||
block_id,
|
||||
frame_in_block,
|
||||
MediaHeader::encode_fec_ratio(current_profile.fec_ratio),
|
||||
)
|
||||
};
|
||||
|
||||
// Build source packet
|
||||
let s = seq.fetch_add(1, Ordering::Relaxed);
|
||||
let t = ts.fetch_add(frame_samples as u32, Ordering::Relaxed);
|
||||
@@ -675,11 +544,11 @@ async fn run_call(
|
||||
is_repair: false,
|
||||
codec_id: current_profile.codec,
|
||||
has_quality_report: false,
|
||||
fec_ratio_encoded: MediaHeader::encode_fec_ratio(current_profile.fec_ratio),
|
||||
fec_ratio_encoded: hdr_fec_ratio,
|
||||
seq: s,
|
||||
timestamp: t,
|
||||
fec_block: block_id,
|
||||
fec_symbol: frame_in_block,
|
||||
fec_block: hdr_fec_block,
|
||||
fec_symbol: hdr_fec_symbol,
|
||||
reserved: 0,
|
||||
csrc_count: 0,
|
||||
},
|
||||
@@ -709,14 +578,16 @@ async fn run_call(
|
||||
t_send_us += t0.elapsed().as_micros() as u64;
|
||||
frames_sent += 1;
|
||||
|
||||
// Feed encoded frame to FEC encoder
|
||||
// Codec2-only: feed RaptorQ and emit repair packets when the
|
||||
// block is full. Opus tiers skip this entire block — DRED
|
||||
// (enabled in Phase 1) provides codec-layer loss recovery.
|
||||
let t0 = Instant::now();
|
||||
if !is_opus {
|
||||
if let Err(e) = fec_enc.add_source_symbol(encoded) {
|
||||
warn!("fec add_source error: {e}");
|
||||
}
|
||||
frame_in_block += 1;
|
||||
|
||||
// When block is full, generate repair packets
|
||||
if frame_in_block >= current_profile.frames_per_block {
|
||||
match fec_enc.generate_repair(current_profile.fec_ratio) {
|
||||
Ok(repairs) => {
|
||||
@@ -767,6 +638,7 @@ async fn run_call(
|
||||
block_id = block_id.wrapping_add(1);
|
||||
frame_in_block = 0;
|
||||
}
|
||||
}
|
||||
t_fec_us += t0.elapsed().as_micros() as u64;
|
||||
t_frames += 1;
|
||||
|
||||
@@ -808,7 +680,27 @@ async fn run_call(
|
||||
let mut last_stats_log = Instant::now();
|
||||
let mut quality_ctrl = AdaptiveQualityController::new();
|
||||
let mut last_peer_codec: Option<CodecId> = None;
|
||||
info!("recv task started (Opus + RaptorQ FEC)");
|
||||
|
||||
// Phase 3c: DRED reconstruction state. Unlike the desktop
|
||||
// CallDecoder (which sits behind a jitter buffer that emits
|
||||
// Missing signals), engine.rs reads packets directly from the
|
||||
// transport and decodes straight into the playout ring. Gap
|
||||
// detection is therefore done via sequence-number tracking:
|
||||
// when a packet arrives with seq > expected_seq, the frames in
|
||||
// between are missing and we attempt to reconstruct them via
|
||||
// DRED before decoding the newly-arrived packet.
|
||||
let mut dred_decoder =
|
||||
DredDecoderHandle::new().expect("opus_dred_decoder_create failed");
|
||||
let mut dred_parse_scratch =
|
||||
DredState::new().expect("opus_dred_alloc failed (scratch)");
|
||||
let mut last_good_dred =
|
||||
DredState::new().expect("opus_dred_alloc failed (good state)");
|
||||
let mut last_good_dred_seq: Option<u16> = None;
|
||||
let mut expected_seq: Option<u16> = None;
|
||||
let mut dred_reconstructions: u64 = 0;
|
||||
let mut classical_plc_invocations: u64 = 0;
|
||||
|
||||
info!("recv task started (Opus + DRED + Codec2/RaptorQ)");
|
||||
loop {
|
||||
if !state.running.load(Ordering::Relaxed) {
|
||||
break;
|
||||
@@ -850,14 +742,21 @@ async fn run_call(
|
||||
let is_repair = pkt.header.is_repair;
|
||||
let pkt_block = pkt.header.fec_block;
|
||||
let pkt_symbol = pkt.header.fec_symbol;
|
||||
let pkt_is_opus = pkt.header.codec_id.is_opus();
|
||||
|
||||
// Feed every packet (source + repair) to FEC decoder
|
||||
// Phase 2: Opus packets bypass RaptorQ entirely — DRED
|
||||
// (enabled Phase 1) handles codec-layer loss recovery,
|
||||
// and feeding these symbols into the RaptorQ decoder
|
||||
// would accumulate block_id=0 duplicates that never
|
||||
// decode. Codec2 packets still feed RaptorQ.
|
||||
if !pkt_is_opus {
|
||||
let _ = fec_dec.add_symbol(
|
||||
pkt_block,
|
||||
pkt_symbol,
|
||||
is_repair,
|
||||
&pkt.payload,
|
||||
);
|
||||
}
|
||||
|
||||
// Source packets: decode directly
|
||||
if !is_repair && pkt.header.codec_id != CodecId::ComfortNoise {
|
||||
@@ -880,6 +779,13 @@ async fn run_call(
|
||||
};
|
||||
info!(from = ?decoder.codec_id(), to = ?pkt.header.codec_id, "recv: switching decoder");
|
||||
let _ = decoder.set_profile(switch_profile);
|
||||
// Profile switch invalidates the cached DRED
|
||||
// state because samples_available is measured
|
||||
// in the old profile's sample rate. Reset the
|
||||
// tracking so we don't try to reconstruct with
|
||||
// stale offsets.
|
||||
last_good_dred_seq = None;
|
||||
expected_seq = None;
|
||||
}
|
||||
// Track peer codec for UI display
|
||||
if last_peer_codec != Some(pkt.header.codec_id) {
|
||||
@@ -888,6 +794,109 @@ async fn run_call(
|
||||
stats.peer_codec = format!("{:?}", pkt.header.codec_id);
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 3c: Opus path — parse DRED state out of
|
||||
// the current packet FIRST so last_good_dred
|
||||
// reflects the freshest available reconstruction
|
||||
// source, then attempt gap recovery against it
|
||||
// BEFORE decoding this packet's audio. Ordering
|
||||
// matters because the playout ring is FIFO — gap
|
||||
// samples must be written before this packet's
|
||||
// samples, which come next.
|
||||
if pkt_is_opus {
|
||||
// Update DRED state from the current packet.
|
||||
match dred_decoder.parse_into(&mut dred_parse_scratch, &pkt.payload) {
|
||||
Ok(available) if available > 0 => {
|
||||
std::mem::swap(
|
||||
&mut dred_parse_scratch,
|
||||
&mut last_good_dred,
|
||||
);
|
||||
last_good_dred_seq = Some(pkt.header.seq);
|
||||
}
|
||||
Ok(_) => {
|
||||
// Packet carried no DRED — keep cached state.
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("DRED parse error (ignored): {e}");
|
||||
}
|
||||
}
|
||||
|
||||
// Detect and fill gap from last-expected to this packet.
|
||||
const MAX_GAP_FRAMES: u16 = 16;
|
||||
if let Some(expected) = expected_seq {
|
||||
let gap = pkt.header.seq.wrapping_sub(expected);
|
||||
if gap > 0 && gap <= MAX_GAP_FRAMES {
|
||||
let current_profile_frame_samples =
|
||||
(48_000 * profile.frame_duration_ms as i32) / 1000;
|
||||
let available = last_good_dred.samples_available();
|
||||
let pcm_slice_len =
|
||||
current_profile_frame_samples as usize;
|
||||
|
||||
for gap_idx in 0..gap {
|
||||
let missing_seq = expected.wrapping_add(gap_idx);
|
||||
// Offset from the DRED anchor (last_good_dred_seq)
|
||||
// back to the missing seq, in samples. Skip if
|
||||
// the anchor is not ahead of missing (defensive).
|
||||
let offset_samples = match last_good_dred_seq {
|
||||
Some(anchor) => {
|
||||
let delta = anchor.wrapping_sub(missing_seq);
|
||||
if delta == 0 || delta > MAX_GAP_FRAMES {
|
||||
-1 // skip DRED, use PLC
|
||||
} else {
|
||||
delta as i32 * current_profile_frame_samples
|
||||
}
|
||||
}
|
||||
None => -1,
|
||||
};
|
||||
|
||||
let reconstructed = if offset_samples > 0
|
||||
&& offset_samples <= available
|
||||
{
|
||||
decoder
|
||||
.reconstruct_from_dred(
|
||||
&last_good_dred,
|
||||
offset_samples,
|
||||
&mut decode_buf[..pcm_slice_len],
|
||||
)
|
||||
.ok()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
match reconstructed {
|
||||
Some(samples) => {
|
||||
playout_agc.process_frame(
|
||||
&mut decode_buf[..samples],
|
||||
);
|
||||
state
|
||||
.playout_ring
|
||||
.write(&decode_buf[..samples]);
|
||||
dred_reconstructions += 1;
|
||||
frames_decoded += 1;
|
||||
}
|
||||
None => {
|
||||
// Fall through to classical PLC.
|
||||
if let Ok(samples) =
|
||||
decoder.decode_lost(&mut decode_buf)
|
||||
{
|
||||
playout_agc
|
||||
.process_frame(&mut decode_buf[..samples]);
|
||||
state
|
||||
.playout_ring
|
||||
.write(&decode_buf[..samples]);
|
||||
classical_plc_invocations += 1;
|
||||
frames_decoded += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Advance the expected-seq tracker for the next arrival.
|
||||
expected_seq = Some(pkt.header.seq.wrapping_add(1));
|
||||
}
|
||||
|
||||
match decoder.decode(&pkt.payload, &mut decode_buf) {
|
||||
Ok(samples) => {
|
||||
playout_agc.process_frame(&mut decode_buf[..samples]);
|
||||
@@ -899,12 +908,21 @@ async fn run_call(
|
||||
if let Ok(samples) = decoder.decode_lost(&mut decode_buf) {
|
||||
playout_agc.process_frame(&mut decode_buf[..samples]);
|
||||
state.playout_ring.write(&decode_buf[..samples]);
|
||||
// This is a decode-error fallback (not a
|
||||
// detected gap), so count it as PLC.
|
||||
classical_plc_invocations += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try FEC recovery
|
||||
// Codec2-only: try FEC recovery and expire old blocks.
|
||||
// Opus packets skip both — the Phase 2 Opus path has no
|
||||
// RaptorQ state to query or clean up. The `fec_recovered`
|
||||
// counter is now effectively Codec2-only, which is
|
||||
// correct because DRED reconstructions will be counted
|
||||
// separately once Phase 3 lands (new telemetry field).
|
||||
if !pkt_is_opus {
|
||||
if let Ok(Some(recovered_frames)) = fec_dec.try_decode(pkt_block) {
|
||||
fec_recovered += recovered_frames.len() as u64;
|
||||
if fec_recovered % 50 == 1 {
|
||||
@@ -921,10 +939,13 @@ async fn run_call(
|
||||
if pkt_block > 3 {
|
||||
fec_dec.expire_before(pkt_block.wrapping_sub(3));
|
||||
}
|
||||
}
|
||||
|
||||
let mut stats = state.stats.lock().unwrap();
|
||||
stats.frames_decoded = frames_decoded;
|
||||
stats.fec_recovered = fec_recovered;
|
||||
stats.dred_reconstructions = dred_reconstructions;
|
||||
stats.classical_plc_invocations = classical_plc_invocations;
|
||||
drop(stats);
|
||||
|
||||
// Periodic stats every 5 seconds
|
||||
@@ -932,6 +953,8 @@ async fn run_call(
|
||||
info!(
|
||||
frames_decoded,
|
||||
fec_recovered,
|
||||
dred_reconstructions,
|
||||
classical_plc_invocations,
|
||||
recv_errors,
|
||||
max_recv_gap_ms,
|
||||
playout_avail = state.playout_ring.available(),
|
||||
|
||||
@@ -77,6 +77,9 @@ pub unsafe extern "system" fn Java_com_wzp_engine_WzpEngine_nativeInit(
|
||||
) -> jlong {
|
||||
let result = panic::catch_unwind(|| {
|
||||
init_logging();
|
||||
// Install rustls crypto provider ONCE on the main thread.
|
||||
// Must not be called per-thread — conflicts with Android's system libcrypto.so TLS keys.
|
||||
let _ = rustls::crypto::ring::default_provider().install_default();
|
||||
let handle = Box::new(EngineHandle {
|
||||
engine: WzpEngine::new(),
|
||||
});
|
||||
@@ -360,88 +363,149 @@ pub unsafe extern "system" fn Java_com_wzp_engine_WzpEngine_nativePingRelay<'a>(
|
||||
.unwrap_or(JObject::null().into_raw())
|
||||
}
|
||||
|
||||
/// Get the identity fingerprint for a seed hex string.
|
||||
/// Returns the full fingerprint (xxxx:xxxx:...) or empty string on error.
|
||||
#[unsafe(no_mangle)]
|
||||
pub unsafe extern "system" fn Java_com_wzp_engine_WzpEngine_nativeGetFingerprint<'a>(
|
||||
mut env: JNIEnv<'a>,
|
||||
_class: JClass,
|
||||
seed_hex_j: JString,
|
||||
) -> jstring {
|
||||
let seed_hex: String = env.get_string(&seed_hex_j).map(|s| s.into()).unwrap_or_default();
|
||||
let fp = if seed_hex.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
match wzp_crypto::Seed::from_hex(&seed_hex) {
|
||||
Ok(seed) => {
|
||||
let id = seed.derive_identity();
|
||||
id.public_identity().fingerprint.to_string()
|
||||
}
|
||||
Err(_) => String::new(),
|
||||
}
|
||||
};
|
||||
env.new_string(&fp)
|
||||
.map(|s| s.into_raw())
|
||||
.unwrap_or(JObject::null().into_raw())
|
||||
}
|
||||
|
||||
// ── Direct calling JNI functions ──
|
||||
|
||||
/// Start persistent signaling connection to relay for direct calls.
|
||||
/// Returns 0 on success, -1 on error.
|
||||
// ── SignalManager JNI functions ──
|
||||
|
||||
/// Opaque handle for SignalManager (separate from EngineHandle).
|
||||
struct SignalHandle {
|
||||
mgr: crate::signal_mgr::SignalManager,
|
||||
}
|
||||
|
||||
unsafe fn signal_ref(handle: jlong) -> &'static SignalHandle {
|
||||
unsafe { &*(handle as *const SignalHandle) }
|
||||
}
|
||||
|
||||
/// Connect to relay for signaling. Returns handle (jlong) or 0 on error.
|
||||
/// Blocks up to 10s waiting for the internal signal thread to connect.
|
||||
#[unsafe(no_mangle)]
|
||||
pub unsafe extern "system" fn Java_com_wzp_engine_WzpEngine_nativeStartSignaling<'a>(
|
||||
pub unsafe extern "system" fn Java_com_wzp_engine_SignalManager_nativeSignalConnect<'a>(
|
||||
mut env: JNIEnv<'a>,
|
||||
_class: JClass,
|
||||
handle: jlong,
|
||||
relay_addr_j: JString,
|
||||
seed_hex_j: JString,
|
||||
token_j: JString,
|
||||
alias_j: JString,
|
||||
) -> jint {
|
||||
let result = panic::catch_unwind(panic::AssertUnwindSafe(|| {
|
||||
let h = unsafe { handle_ref(handle) };
|
||||
let relay_addr: String = env.get_string(&relay_addr_j).map(|s| s.into()).unwrap_or_default();
|
||||
let seed_hex: String = env.get_string(&seed_hex_j).map(|s| s.into()).unwrap_or_default();
|
||||
let token: String = env.get_string(&token_j).map(|s| s.into()).unwrap_or_default();
|
||||
let alias: String = env.get_string(&alias_j).map(|s| s.into()).unwrap_or_default();
|
||||
relay_j: JString,
|
||||
seed_j: JString,
|
||||
) -> jlong {
|
||||
info!("nativeSignalConnect: entered");
|
||||
let relay: String = env.get_string(&relay_j).map(|s| s.into()).unwrap_or_default();
|
||||
let seed: String = env.get_string(&seed_j).map(|s| s.into()).unwrap_or_default();
|
||||
info!(relay = %relay, seed_len = seed.len(), "nativeSignalConnect: parsed strings");
|
||||
|
||||
h.engine.start_signaling(
|
||||
&relay_addr,
|
||||
&seed_hex,
|
||||
if token.is_empty() { None } else { Some(&token) },
|
||||
if alias.is_empty() { None } else { Some(&alias) },
|
||||
)
|
||||
}));
|
||||
|
||||
match result {
|
||||
Ok(Ok(())) => 0,
|
||||
Ok(Err(e)) => { error!("start_signaling failed: {e}"); -1 }
|
||||
Err(_) => { error!("start_signaling panicked"); -1 }
|
||||
// start() spawns an internal thread (connect+register+recv, ONE runtime, never dropped).
|
||||
// Blocks up to 10s waiting for the connect+register to complete.
|
||||
match crate::signal_mgr::SignalManager::start(&relay, &seed) {
|
||||
Ok(mgr) => {
|
||||
let handle = Box::new(SignalHandle { mgr });
|
||||
Box::into_raw(handle) as jlong
|
||||
}
|
||||
Err(e) => {
|
||||
error!("signal connect failed: {e}");
|
||||
0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Place a direct call to a target fingerprint.
|
||||
/// Returns 0 on success, -1 on error.
|
||||
/// Get signal state as JSON string.
|
||||
#[unsafe(no_mangle)]
|
||||
pub unsafe extern "system" fn Java_com_wzp_engine_WzpEngine_nativePlaceCall<'a>(
|
||||
pub unsafe extern "system" fn Java_com_wzp_engine_SignalManager_nativeSignalGetState<'a>(
|
||||
mut env: JNIEnv<'a>,
|
||||
_class: JClass,
|
||||
handle: jlong,
|
||||
target_fp_j: JString,
|
||||
) -> jint {
|
||||
let result = panic::catch_unwind(panic::AssertUnwindSafe(|| {
|
||||
let h = unsafe { handle_ref(handle) };
|
||||
let target: String = env.get_string(&target_fp_j).map(|s| s.into()).unwrap_or_default();
|
||||
h.engine.place_call(&target)
|
||||
}));
|
||||
) -> jstring {
|
||||
if handle == 0 { return JObject::null().into_raw(); }
|
||||
let h = signal_ref(handle);
|
||||
let json = h.mgr.get_state_json();
|
||||
env.new_string(&json)
|
||||
.map(|s| s.into_raw())
|
||||
.unwrap_or(JObject::null().into_raw())
|
||||
}
|
||||
|
||||
match result {
|
||||
Ok(Ok(())) => 0,
|
||||
Ok(Err(e)) => { error!("place_call failed: {e}"); -1 }
|
||||
Err(_) => { error!("place_call panicked"); -1 }
|
||||
/// Place a direct call.
|
||||
#[unsafe(no_mangle)]
|
||||
pub unsafe extern "system" fn Java_com_wzp_engine_SignalManager_nativeSignalPlaceCall<'a>(
|
||||
mut env: JNIEnv<'a>,
|
||||
_class: JClass,
|
||||
handle: jlong,
|
||||
target_j: JString,
|
||||
) -> jint {
|
||||
if handle == 0 { return -1; }
|
||||
let h = signal_ref(handle);
|
||||
let target: String = env.get_string(&target_j).map(|s| s.into()).unwrap_or_default();
|
||||
match h.mgr.place_call(&target) {
|
||||
Ok(()) => 0,
|
||||
Err(e) => { error!("place_call: {e}"); -1 }
|
||||
}
|
||||
}
|
||||
|
||||
/// Answer an incoming direct call.
|
||||
/// mode: 0=Reject, 1=AcceptTrusted, 2=AcceptGeneric
|
||||
/// Answer an incoming call.
|
||||
#[unsafe(no_mangle)]
|
||||
pub unsafe extern "system" fn Java_com_wzp_engine_WzpEngine_nativeAnswerCall<'a>(
|
||||
pub unsafe extern "system" fn Java_com_wzp_engine_SignalManager_nativeSignalAnswerCall<'a>(
|
||||
mut env: JNIEnv<'a>,
|
||||
_class: JClass,
|
||||
handle: jlong,
|
||||
call_id_j: JString,
|
||||
mode: jint,
|
||||
) -> jint {
|
||||
let result = panic::catch_unwind(panic::AssertUnwindSafe(|| {
|
||||
let h = unsafe { handle_ref(handle) };
|
||||
if handle == 0 { return -1; }
|
||||
let h = signal_ref(handle);
|
||||
let call_id: String = env.get_string(&call_id_j).map(|s| s.into()).unwrap_or_default();
|
||||
let accept_mode = match mode {
|
||||
0 => wzp_proto::CallAcceptMode::Reject,
|
||||
1 => wzp_proto::CallAcceptMode::AcceptTrusted,
|
||||
_ => wzp_proto::CallAcceptMode::AcceptGeneric,
|
||||
};
|
||||
h.engine.answer_call(&call_id, accept_mode)
|
||||
}));
|
||||
|
||||
match result {
|
||||
Ok(Ok(())) => 0,
|
||||
Ok(Err(e)) => { error!("answer_call failed: {e}"); -1 }
|
||||
Err(_) => { error!("answer_call panicked"); -1 }
|
||||
match h.mgr.answer_call(&call_id, accept_mode) {
|
||||
Ok(()) => 0,
|
||||
Err(e) => { error!("answer_call: {e}"); -1 }
|
||||
}
|
||||
}
|
||||
|
||||
/// Send hangup signal.
|
||||
#[unsafe(no_mangle)]
|
||||
pub unsafe extern "system" fn Java_com_wzp_engine_SignalManager_nativeSignalHangup(
|
||||
_env: JNIEnv,
|
||||
_class: JClass,
|
||||
handle: jlong,
|
||||
) {
|
||||
if handle == 0 { return; }
|
||||
let h = signal_ref(handle);
|
||||
h.mgr.hangup();
|
||||
}
|
||||
|
||||
/// Destroy the signal manager and free resources.
|
||||
#[unsafe(no_mangle)]
|
||||
pub unsafe extern "system" fn Java_com_wzp_engine_SignalManager_nativeSignalDestroy(
|
||||
_env: JNIEnv,
|
||||
_class: JClass,
|
||||
handle: jlong,
|
||||
) {
|
||||
if handle == 0 { return; }
|
||||
let h = signal_ref(handle);
|
||||
h.mgr.stop();
|
||||
// Reclaim the Box
|
||||
let _ = unsafe { Box::from_raw(handle as *mut SignalHandle) };
|
||||
}
|
||||
|
||||
@@ -14,5 +14,6 @@ pub mod audio_ring;
|
||||
pub mod commands;
|
||||
pub mod engine;
|
||||
pub mod pipeline;
|
||||
pub mod signal_mgr;
|
||||
pub mod stats;
|
||||
pub mod jni_bridge;
|
||||
|
||||
288
crates/wzp-android/src/signal_mgr.rs
Normal file
288
crates/wzp-android/src/signal_mgr.rs
Normal file
@@ -0,0 +1,288 @@
|
||||
//! Persistent signal connection manager for direct 1:1 calls.
|
||||
//!
|
||||
//! Separate from the media engine — survives across calls.
|
||||
//! Connects to relay via `_signal` SNI, registers presence,
|
||||
//! and handles call signaling (offer/answer/setup/hangup).
|
||||
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use tracing::{error, info, warn};
|
||||
use wzp_proto::{MediaTransport, SignalMessage};
|
||||
|
||||
/// Signal connection status.
|
||||
#[derive(Clone, Debug, Default, serde::Serialize)]
|
||||
pub struct SignalState {
|
||||
pub status: String, // "idle", "registered", "ringing", "incoming", "setup"
|
||||
pub fingerprint: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub incoming_call_id: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub incoming_caller_fp: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub incoming_caller_alias: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub call_setup_relay: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub call_setup_room: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub call_setup_id: Option<String>,
|
||||
}
|
||||
|
||||
/// Manages a persistent `_signal` QUIC connection to a relay.
|
||||
pub struct SignalManager {
|
||||
transport: Arc<wzp_transport::QuinnTransport>,
|
||||
state: Arc<Mutex<SignalState>>,
|
||||
running: Arc<AtomicBool>,
|
||||
}
|
||||
|
||||
impl SignalManager {
|
||||
/// Create SignalManager and start connect+register+recv on a background thread.
|
||||
/// Returns immediately. The internal thread runs forever.
|
||||
/// CRITICAL: tokio runtime must never be dropped on Android (libcrypto TLS conflict).
|
||||
pub fn start(relay_addr: &str, seed_hex: &str) -> Result<Self, anyhow::Error> {
|
||||
let addr: SocketAddr = relay_addr.parse()?;
|
||||
let seed = if seed_hex.is_empty() {
|
||||
wzp_crypto::Seed::generate()
|
||||
} else {
|
||||
wzp_crypto::Seed::from_hex(seed_hex).map_err(|e| anyhow::anyhow!(e))?
|
||||
};
|
||||
let identity = seed.derive_identity();
|
||||
let pub_id = identity.public_identity();
|
||||
let identity_pub = *pub_id.signing.as_bytes();
|
||||
let fp = pub_id.fingerprint.to_string();
|
||||
|
||||
let state = Arc::new(Mutex::new(SignalState {
|
||||
status: "connecting".into(),
|
||||
fingerprint: fp.clone(),
|
||||
..Default::default()
|
||||
}));
|
||||
let running = Arc::new(AtomicBool::new(true));
|
||||
|
||||
// Channel to receive transport after connect succeeds
|
||||
let (transport_tx, transport_rx) = std::sync::mpsc::channel();
|
||||
|
||||
let bg_state = Arc::clone(&state);
|
||||
let bg_running = Arc::clone(&running);
|
||||
let ret_state = Arc::clone(&state);
|
||||
let ret_running = Arc::clone(&running);
|
||||
|
||||
// ONE thread, ONE runtime, NEVER dropped.
|
||||
// Connect + register + recv loop all happen here.
|
||||
std::thread::Builder::new()
|
||||
.name("wzp-signal".into())
|
||||
.stack_size(4 * 1024 * 1024)
|
||||
.spawn(move || {
|
||||
let rt = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.expect("tokio runtime");
|
||||
|
||||
rt.block_on(async move {
|
||||
info!(fingerprint = %fp, relay = %addr, "signal: connecting");
|
||||
|
||||
let bind: SocketAddr = "0.0.0.0:0".parse().unwrap();
|
||||
let endpoint = match wzp_transport::create_endpoint(bind, None) {
|
||||
Ok(e) => e,
|
||||
Err(e) => {
|
||||
error!("signal endpoint: {e}");
|
||||
bg_state.lock().unwrap().status = "idle".into();
|
||||
return;
|
||||
}
|
||||
};
|
||||
let client_cfg = wzp_transport::client_config();
|
||||
let conn = match wzp_transport::connect(&endpoint, addr, "_signal", client_cfg).await {
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
error!("signal connect: {e}");
|
||||
bg_state.lock().unwrap().status = "idle".into();
|
||||
return;
|
||||
}
|
||||
};
|
||||
let transport = Arc::new(wzp_transport::QuinnTransport::new(conn));
|
||||
|
||||
// Register
|
||||
if let Err(e) = transport.send_signal(&SignalMessage::RegisterPresence {
|
||||
identity_pub, signature: vec![], alias: None,
|
||||
}).await {
|
||||
error!("signal register: {e}");
|
||||
bg_state.lock().unwrap().status = "idle".into();
|
||||
return;
|
||||
}
|
||||
|
||||
match transport.recv_signal().await {
|
||||
Ok(Some(SignalMessage::RegisterPresenceAck { success: true, .. })) => {
|
||||
info!(fingerprint = %fp, "signal: registered");
|
||||
bg_state.lock().unwrap().status = "registered".into();
|
||||
// Send transport to caller
|
||||
let _ = transport_tx.send(transport.clone());
|
||||
}
|
||||
other => {
|
||||
error!("signal registration failed: {other:?}");
|
||||
bg_state.lock().unwrap().status = "idle".into();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Recv loop — runs forever
|
||||
loop {
|
||||
if !running.load(Ordering::Relaxed) { break; }
|
||||
|
||||
match transport.recv_signal().await {
|
||||
Ok(Some(SignalMessage::CallRinging { call_id })) => {
|
||||
info!(call_id = %call_id, "signal: ringing");
|
||||
let mut s = state.lock().unwrap();
|
||||
s.status = "ringing".into();
|
||||
}
|
||||
Ok(Some(SignalMessage::DirectCallOffer { caller_fingerprint, caller_alias, call_id, .. })) => {
|
||||
info!(from = %caller_fingerprint, call_id = %call_id, "signal: incoming call");
|
||||
let mut s = state.lock().unwrap();
|
||||
s.status = "incoming".into();
|
||||
s.incoming_call_id = Some(call_id);
|
||||
s.incoming_caller_fp = Some(caller_fingerprint);
|
||||
s.incoming_caller_alias = caller_alias;
|
||||
}
|
||||
Ok(Some(SignalMessage::DirectCallAnswer { call_id, accept_mode, .. })) => {
|
||||
info!(call_id = %call_id, mode = ?accept_mode, "signal: call answered");
|
||||
}
|
||||
Ok(Some(SignalMessage::CallSetup { call_id, room, relay_addr })) => {
|
||||
info!(call_id = %call_id, room = %room, relay = %relay_addr, "signal: call setup");
|
||||
let mut s = state.lock().unwrap();
|
||||
s.status = "setup".into();
|
||||
s.call_setup_relay = Some(relay_addr);
|
||||
s.call_setup_room = Some(room);
|
||||
s.call_setup_id = Some(call_id);
|
||||
}
|
||||
Ok(Some(SignalMessage::Hangup { reason })) => {
|
||||
info!(reason = ?reason, "signal: hangup");
|
||||
let mut s = state.lock().unwrap();
|
||||
s.status = "registered".into();
|
||||
s.incoming_call_id = None;
|
||||
s.incoming_caller_fp = None;
|
||||
s.incoming_caller_alias = None;
|
||||
s.call_setup_relay = None;
|
||||
s.call_setup_room = None;
|
||||
s.call_setup_id = None;
|
||||
}
|
||||
Ok(Some(_)) => {}
|
||||
Ok(None) => {
|
||||
info!("signal: connection closed");
|
||||
break;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("signal recv error: {e}");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bg_state.lock().unwrap().status = "idle".into();
|
||||
}); // block_on
|
||||
|
||||
// Runtime intentionally NOT dropped — lives until thread exits.
|
||||
// This prevents ring/libcrypto TLS cleanup conflict on Android.
|
||||
// The thread is parked here forever (block_on returned = connection lost).
|
||||
std::thread::park();
|
||||
})?; // thread spawn
|
||||
|
||||
// Wait for transport (up to 10s)
|
||||
let transport = transport_rx.recv_timeout(std::time::Duration::from_secs(10))
|
||||
.map_err(|_| anyhow::anyhow!("signal connect timeout — check relay address"))?;
|
||||
|
||||
Ok(Self { transport, state: ret_state, running: ret_running })
|
||||
}
|
||||
|
||||
/// Get current state (non-blocking).
|
||||
pub fn get_state(&self) -> SignalState {
|
||||
self.state.lock().unwrap().clone()
|
||||
}
|
||||
|
||||
/// Get state as JSON string.
|
||||
pub fn get_state_json(&self) -> String {
|
||||
serde_json::to_string(&self.get_state()).unwrap_or_else(|_| "{}".into())
|
||||
}
|
||||
|
||||
/// Place a direct call.
|
||||
pub fn place_call(&self, target_fp: &str) -> Result<(), anyhow::Error> {
|
||||
let fp = self.state.lock().unwrap().fingerprint.clone();
|
||||
let target = target_fp.to_string();
|
||||
let call_id = format!("{:016x}", std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH).unwrap().as_nanos());
|
||||
let transport = self.transport.clone();
|
||||
|
||||
// Send on a small thread (async send needs a runtime)
|
||||
std::thread::Builder::new()
|
||||
.name("wzp-call-send".into())
|
||||
.spawn(move || {
|
||||
let rt = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all().build().expect("rt");
|
||||
rt.block_on(async {
|
||||
let _ = transport.send_signal(&SignalMessage::DirectCallOffer {
|
||||
caller_fingerprint: fp,
|
||||
caller_alias: None,
|
||||
target_fingerprint: target,
|
||||
call_id,
|
||||
identity_pub: [0u8; 32],
|
||||
ephemeral_pub: [0u8; 32],
|
||||
signature: vec![],
|
||||
supported_profiles: vec![wzp_proto::QualityProfile::GOOD],
|
||||
}).await;
|
||||
});
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Answer an incoming call.
|
||||
pub fn answer_call(&self, call_id: &str, mode: wzp_proto::CallAcceptMode) -> Result<(), anyhow::Error> {
|
||||
let call_id = call_id.to_string();
|
||||
let transport = self.transport.clone();
|
||||
|
||||
std::thread::Builder::new()
|
||||
.name("wzp-answer-send".into())
|
||||
.spawn(move || {
|
||||
let rt = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all().build().expect("rt");
|
||||
rt.block_on(async {
|
||||
let _ = transport.send_signal(&SignalMessage::DirectCallAnswer {
|
||||
call_id,
|
||||
accept_mode: mode,
|
||||
identity_pub: None,
|
||||
ephemeral_pub: None,
|
||||
signature: None,
|
||||
chosen_profile: Some(wzp_proto::QualityProfile::GOOD),
|
||||
}).await;
|
||||
});
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Send hangup.
|
||||
pub fn hangup(&self) {
|
||||
let transport = self.transport.clone();
|
||||
let state = self.state.clone();
|
||||
std::thread::spawn(move || {
|
||||
let rt = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all().build().expect("rt");
|
||||
rt.block_on(async {
|
||||
let _ = transport.send_signal(&SignalMessage::Hangup {
|
||||
reason: wzp_proto::HangupReason::Normal,
|
||||
}).await;
|
||||
});
|
||||
let mut s = state.lock().unwrap();
|
||||
s.status = "registered".into();
|
||||
s.incoming_call_id = None;
|
||||
s.incoming_caller_fp = None;
|
||||
s.incoming_caller_alias = None;
|
||||
s.call_setup_relay = None;
|
||||
s.call_setup_room = None;
|
||||
s.call_setup_id = None;
|
||||
});
|
||||
}
|
||||
|
||||
/// Stop the signal connection.
|
||||
pub fn stop(&self) {
|
||||
self.running.store(false, Ordering::Release);
|
||||
self.transport.connection().close(0u32.into(), b"shutdown");
|
||||
}
|
||||
}
|
||||
@@ -58,8 +58,16 @@ pub struct CallStats {
|
||||
pub frames_decoded: u64,
|
||||
/// Number of playout underruns (buffer empty when audio needed).
|
||||
pub underruns: u64,
|
||||
/// Frames recovered by FEC.
|
||||
/// Frames recovered by RaptorQ FEC (Codec2 tiers only; Opus bypasses
|
||||
/// RaptorQ per Phase 2).
|
||||
pub fec_recovered: u64,
|
||||
/// Phase 3c: Opus frames reconstructed via DRED side-channel data.
|
||||
/// Only increments on the Opus tiers; always zero for Codec2.
|
||||
pub dred_reconstructions: u64,
|
||||
/// Phase 3c: Opus frames filled via classical Opus PLC because no DRED
|
||||
/// state covered the gap, plus any decode-error fallbacks. Codec2 loss
|
||||
/// also increments this counter via the Codec2 PLC path.
|
||||
pub classical_plc_invocations: u64,
|
||||
/// Playout ring overflow count (reader was lapped by writer).
|
||||
pub playout_overflows: u64,
|
||||
/// Playout ring underrun count (reader found empty buffer).
|
||||
|
||||
@@ -7,14 +7,15 @@ use std::time::{Duration, Instant};
|
||||
use bytes::Bytes;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use wzp_codec::{AutoGainControl, ComfortNoise, EchoCanceller, NoiseSupressor, SilenceDetector};
|
||||
use wzp_codec::dred_ffi::{DredDecoderHandle, DredState};
|
||||
use wzp_codec::{
|
||||
AdaptiveDecoder, AutoGainControl, ComfortNoise, EchoCanceller, NoiseSupressor, SilenceDetector,
|
||||
};
|
||||
use wzp_fec::{RaptorQFecDecoder, RaptorQFecEncoder};
|
||||
use wzp_proto::jitter::{JitterBuffer, PlayoutResult};
|
||||
use wzp_proto::packet::{MediaHeader, MediaPacket, MiniFrameContext};
|
||||
use wzp_proto::quality::AdaptiveQualityController;
|
||||
use wzp_proto::traits::{
|
||||
AudioDecoder, AudioEncoder, FecDecoder, FecEncoder,
|
||||
};
|
||||
use wzp_proto::traits::{AudioDecoder, AudioEncoder, FecDecoder, FecEncoder};
|
||||
use wzp_proto::packet::QualityReport;
|
||||
use wzp_proto::{CodecId, QualityProfile};
|
||||
|
||||
@@ -340,6 +341,22 @@ impl CallEncoder {
|
||||
let enc_len = self.audio_enc.encode(pcm, &mut encoded)?;
|
||||
encoded.truncate(enc_len);
|
||||
|
||||
// Phase 2: Opus tiers bypass RaptorQ entirely (DRED handles loss
|
||||
// recovery at the codec layer). Codec2 tiers keep RaptorQ unchanged.
|
||||
// On Opus packets, zero the FEC header fields so old receivers
|
||||
// can cleanly identify "no RaptorQ block to assemble" and new
|
||||
// receivers can short-circuit their FEC ingest path.
|
||||
let is_opus = self.profile.codec.is_opus();
|
||||
let (fec_block, fec_symbol, fec_ratio_encoded) = if is_opus {
|
||||
(0u8, 0u8, 0u8)
|
||||
} else {
|
||||
(
|
||||
self.block_id,
|
||||
self.frame_in_block,
|
||||
MediaHeader::encode_fec_ratio(self.profile.fec_ratio),
|
||||
)
|
||||
};
|
||||
|
||||
// Build source media packet
|
||||
let source_pkt = MediaPacket {
|
||||
header: MediaHeader {
|
||||
@@ -347,11 +364,11 @@ impl CallEncoder {
|
||||
is_repair: false,
|
||||
codec_id: self.profile.codec,
|
||||
has_quality_report: false,
|
||||
fec_ratio_encoded: MediaHeader::encode_fec_ratio(self.profile.fec_ratio),
|
||||
fec_ratio_encoded,
|
||||
seq: self.seq,
|
||||
timestamp: self.timestamp_ms,
|
||||
fec_block: self.block_id,
|
||||
fec_symbol: self.frame_in_block,
|
||||
fec_block,
|
||||
fec_symbol,
|
||||
reserved: 0,
|
||||
csrc_count: 0,
|
||||
},
|
||||
@@ -366,11 +383,13 @@ impl CallEncoder {
|
||||
|
||||
let mut output = vec![source_pkt];
|
||||
|
||||
// Add to FEC encoder
|
||||
// Codec2-only: feed RaptorQ and generate repair packets when the
|
||||
// block is full. Opus tiers skip this entire block — DRED (active
|
||||
// in Phase 1) provides codec-layer loss recovery.
|
||||
if !is_opus {
|
||||
self.fec_enc.add_source_symbol(&encoded)?;
|
||||
self.frame_in_block += 1;
|
||||
|
||||
// If block is full, generate repair and finalize
|
||||
if self.frame_in_block >= self.profile.frames_per_block {
|
||||
if let Ok(repairs) = self.fec_enc.generate_repair(self.profile.fec_ratio) {
|
||||
for (sym_idx, repair_data) in repairs {
|
||||
@@ -400,6 +419,7 @@ impl CallEncoder {
|
||||
self.block_id = self.block_id.wrapping_add(1);
|
||||
self.frame_in_block = 0;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
@@ -434,9 +454,12 @@ impl CallEncoder {
|
||||
|
||||
/// Manages the recv/decode side of a call.
|
||||
pub struct CallDecoder {
|
||||
/// Audio decoder.
|
||||
audio_dec: Box<dyn AudioDecoder>,
|
||||
/// FEC decoder.
|
||||
/// Audio decoder. Concrete `AdaptiveDecoder` (not `Box<dyn AudioDecoder>`)
|
||||
/// because Phase 3b calls the inherent `reconstruct_from_dred` method,
|
||||
/// which cannot live on the `AudioDecoder` trait without dragging libopus
|
||||
/// types into `wzp-proto`.
|
||||
audio_dec: AdaptiveDecoder,
|
||||
/// FEC decoder (Codec2 tiers only; Opus bypasses RaptorQ per Phase 2).
|
||||
fec_dec: RaptorQFecDecoder,
|
||||
/// Jitter buffer.
|
||||
jitter: JitterBuffer,
|
||||
@@ -450,6 +473,24 @@ pub struct CallDecoder {
|
||||
last_was_cn: bool,
|
||||
/// Mini-frame decompression context (tracks last full header baseline).
|
||||
mini_context: MiniFrameContext,
|
||||
// ─── Phase 3b: DRED reconstruction state ──────────────────────────────
|
||||
/// DRED side-channel parser (a separate libopus object from the decoder).
|
||||
dred_decoder: DredDecoderHandle,
|
||||
/// Scratch buffer used by `dred_decoder.parse_into` on every arriving
|
||||
/// Opus packet. Reused across calls to avoid 10 KB alloc churn per packet.
|
||||
dred_parse_scratch: DredState,
|
||||
/// Cached "most recently parsed valid" DRED state, swapped with
|
||||
/// `dred_parse_scratch` on successful parse. Used by `decode_next` when
|
||||
/// the jitter buffer reports a gap.
|
||||
last_good_dred: DredState,
|
||||
/// Sequence number of the packet that produced `last_good_dred`. `None`
|
||||
/// if no packet has yielded DRED state yet (cold start or legacy sender).
|
||||
last_good_dred_seq: Option<u16>,
|
||||
/// Phase 4 telemetry counter: gaps recovered via DRED reconstruction.
|
||||
pub dred_reconstructions: u64,
|
||||
/// Phase 4 telemetry counter: gaps filled via classical Opus PLC
|
||||
/// (because no DRED state covered the gap, or the active codec is Codec2).
|
||||
pub classical_plc_invocations: u64,
|
||||
}
|
||||
|
||||
impl CallDecoder {
|
||||
@@ -459,8 +500,19 @@ impl CallDecoder {
|
||||
} else {
|
||||
JitterBuffer::new(config.jitter_target, config.jitter_max, config.jitter_min)
|
||||
};
|
||||
// Phase 3b: build the DRED parser + state buffers. These allocate
|
||||
// libopus state (~10 KB each) once per call, not per packet — the
|
||||
// scratch and last-good buffers are reused via std::mem::swap on
|
||||
// every successful parse.
|
||||
let dred_decoder =
|
||||
DredDecoderHandle::new().expect("opus_dred_decoder_create failed at call setup");
|
||||
let dred_parse_scratch =
|
||||
DredState::new().expect("opus_dred_alloc failed at call setup (scratch)");
|
||||
let last_good_dred =
|
||||
DredState::new().expect("opus_dred_alloc failed at call setup (good state)");
|
||||
Self {
|
||||
audio_dec: wzp_codec::create_decoder(config.profile),
|
||||
audio_dec: AdaptiveDecoder::new(config.profile)
|
||||
.expect("failed to create adaptive decoder"),
|
||||
fec_dec: wzp_fec::create_decoder(&config.profile),
|
||||
jitter,
|
||||
quality: AdaptiveQualityController::new(),
|
||||
@@ -468,6 +520,12 @@ impl CallDecoder {
|
||||
comfort_noise: ComfortNoise::new(50),
|
||||
last_was_cn: false,
|
||||
mini_context: MiniFrameContext::default(),
|
||||
dred_decoder,
|
||||
dred_parse_scratch,
|
||||
last_good_dred,
|
||||
last_good_dred_seq: None,
|
||||
dred_reconstructions: 0,
|
||||
classical_plc_invocations: 0,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -482,15 +540,54 @@ impl CallDecoder {
|
||||
|
||||
/// Feed a received media packet into the decode pipeline.
|
||||
pub fn ingest(&mut self, packet: MediaPacket) {
|
||||
// Feed to FEC decoder
|
||||
// Phase 2: Opus packets bypass RaptorQ. Codec2 packets still feed
|
||||
// the FEC decoder for recovery. This also cleanly drops any stray
|
||||
// Opus repair packets from an old sender (we don't push repair
|
||||
// packets to the jitter buffer either, so they're effectively
|
||||
// ignored — a graceful mixed-version degradation).
|
||||
if !packet.header.codec_id.is_opus() {
|
||||
let _ = self.fec_dec.add_symbol(
|
||||
packet.header.fec_block,
|
||||
packet.header.fec_symbol,
|
||||
packet.header.is_repair,
|
||||
&packet.payload,
|
||||
);
|
||||
}
|
||||
|
||||
// If not a repair packet, also feed directly to jitter buffer
|
||||
// Phase 3b: Opus source packets carry DRED side-channel data in
|
||||
// libopus 1.5. Parse it into the scratch state and, on success,
|
||||
// swap with the cached `last_good_dred` so later gap reconstruction
|
||||
// has fresh neural redundancy to draw from. Parsing happens before
|
||||
// the jitter push because the jitter buffer consumes the packet.
|
||||
if packet.header.codec_id.is_opus() && !packet.header.is_repair {
|
||||
match self
|
||||
.dred_decoder
|
||||
.parse_into(&mut self.dred_parse_scratch, &packet.payload)
|
||||
{
|
||||
Ok(available) if available > 0 => {
|
||||
// Swap the freshly parsed state into `last_good_dred`.
|
||||
// The old good state (now in scratch) is about to be
|
||||
// overwritten on the next parse — its contents are
|
||||
// not needed after this swap.
|
||||
std::mem::swap(&mut self.dred_parse_scratch, &mut self.last_good_dred);
|
||||
self.last_good_dred_seq = Some(packet.header.seq);
|
||||
}
|
||||
Ok(_) => {
|
||||
// Packet had no DRED data (return 0). Leave the cached
|
||||
// state untouched — it may still cover upcoming gaps
|
||||
// from a warm-up period where the encoder was producing
|
||||
// DRED bytes. The scratch buffer was potentially written
|
||||
// but its `samples_available` is 0 so it's harmless.
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("DRED parse error (ignored): {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Source packets (Opus or Codec2) go to the jitter buffer for decode.
|
||||
// Repair packets never reach the jitter buffer; for Codec2 they're
|
||||
// used by the FEC decoder above, for Opus they're dropped here.
|
||||
if !packet.header.is_repair {
|
||||
self.jitter.push(packet);
|
||||
}
|
||||
@@ -524,19 +621,72 @@ impl CallDecoder {
|
||||
result
|
||||
}
|
||||
PlayoutResult::Missing { seq } => {
|
||||
// Only generate PLC if there are still packets buffered ahead.
|
||||
// Only attempt recovery if there are still packets buffered ahead.
|
||||
// Otherwise we've drained everything — return None to stop.
|
||||
if self.jitter.depth() > 0 {
|
||||
debug!(seq, "packet loss, generating PLC");
|
||||
if self.jitter.depth() == 0 {
|
||||
self.jitter.record_underrun();
|
||||
return None;
|
||||
}
|
||||
|
||||
// Phase 3b: try DRED reconstruction first. If we have a
|
||||
// recent DRED state from a packet whose seq > missing seq,
|
||||
// and the seq delta (in samples) fits within the state's
|
||||
// available window, libopus can synthesize a plausible
|
||||
// replacement for the lost frame. Fall back to classical
|
||||
// PLC when no state covers the gap, when the active codec
|
||||
// is Codec2, or when the reconstruction itself errors.
|
||||
if self.profile.codec.is_opus() {
|
||||
if let Some(last_seq) = self.last_good_dred_seq {
|
||||
// How many frames ahead of the missing seq is the
|
||||
// last-good packet? Use wrapping arithmetic for the
|
||||
// u16 seq space.
|
||||
let seq_delta = last_seq.wrapping_sub(seq);
|
||||
// Reject stale or backward state. u16 wraparound
|
||||
// would make a "seq went backward" delta very large;
|
||||
// cap at a sane forward-looking window.
|
||||
const MAX_SEQ_DELTA: u16 = 128;
|
||||
if seq_delta > 0 && seq_delta <= MAX_SEQ_DELTA {
|
||||
let frame_samples =
|
||||
(48_000 * self.profile.frame_duration_ms as i32) / 1000;
|
||||
let offset_samples = seq_delta as i32 * frame_samples;
|
||||
let available = self.last_good_dred.samples_available();
|
||||
if offset_samples > 0 && offset_samples <= available {
|
||||
match self.audio_dec.reconstruct_from_dred(
|
||||
&self.last_good_dred,
|
||||
offset_samples,
|
||||
pcm,
|
||||
) {
|
||||
Ok(n) => {
|
||||
self.dred_reconstructions += 1;
|
||||
self.jitter.record_decode();
|
||||
debug!(
|
||||
seq,
|
||||
last_seq,
|
||||
offset_samples,
|
||||
available,
|
||||
"DRED reconstruction for gap"
|
||||
);
|
||||
return Some(n);
|
||||
}
|
||||
Err(e) => {
|
||||
// Reconstruction failed — fall
|
||||
// through to classical PLC below.
|
||||
debug!(seq, "DRED reconstruct error: {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Classical PLC fallback (also the Codec2 path).
|
||||
debug!(seq, "packet loss, generating classical PLC");
|
||||
self.classical_plc_invocations += 1;
|
||||
let result = self.audio_dec.decode_lost(pcm).ok();
|
||||
if result.is_some() {
|
||||
self.jitter.record_decode();
|
||||
}
|
||||
result
|
||||
} else {
|
||||
self.jitter.record_underrun();
|
||||
None
|
||||
}
|
||||
}
|
||||
PlayoutResult::NotReady => {
|
||||
self.jitter.record_underrun();
|
||||
@@ -559,6 +709,19 @@ impl CallDecoder {
|
||||
pub fn reset_stats(&mut self) {
|
||||
self.jitter.reset_stats();
|
||||
}
|
||||
|
||||
/// Phase 3b introspection: sequence number of the most recently parsed
|
||||
/// valid DRED state, or `None` if no Opus packet has yielded DRED data
|
||||
/// yet. Used by tests to debug reconstruction eligibility.
|
||||
pub fn last_good_dred_seq(&self) -> Option<u16> {
|
||||
self.last_good_dred_seq
|
||||
}
|
||||
|
||||
/// Phase 3b introspection: samples of audio history currently available
|
||||
/// in the cached DRED state.
|
||||
pub fn last_good_dred_samples_available(&self) -> i32 {
|
||||
self.last_good_dred.samples_available()
|
||||
}
|
||||
}
|
||||
|
||||
/// Periodic telemetry logger for jitter buffer statistics.
|
||||
@@ -620,18 +783,83 @@ mod tests {
|
||||
assert!(!packets[0].header.is_repair);
|
||||
}
|
||||
|
||||
/// Phase 2: Opus packets have zero FEC header fields — no block, no
|
||||
/// symbol index, no repair ratio. The RaptorQ layer is bypassed
|
||||
/// entirely on the Opus tiers.
|
||||
#[test]
|
||||
fn encoder_generates_repair_on_full_block() {
|
||||
fn opus_source_packets_have_zero_fec_header_fields() {
|
||||
let config = CallConfig {
|
||||
profile: QualityProfile::GOOD, // 5 frames/block
|
||||
profile: QualityProfile::GOOD, // Opus 24k
|
||||
suppression_enabled: false, // skip silence gate for this test
|
||||
..Default::default()
|
||||
};
|
||||
let mut enc = CallEncoder::new(&config);
|
||||
let pcm = vec![0i16; 960];
|
||||
// Non-silent sine wave so silence detection doesn't suppress us
|
||||
// even with suppression_enabled=false (belt and braces).
|
||||
let pcm: Vec<i16> = (0..960)
|
||||
.map(|i| ((i as f32 * 0.1).sin() * 10_000.0) as i16)
|
||||
.collect();
|
||||
let packets = enc.encode_frame(&pcm).unwrap();
|
||||
assert_eq!(packets.len(), 1, "Opus must emit exactly 1 source packet");
|
||||
let hdr = &packets[0].header;
|
||||
assert!(hdr.codec_id.is_opus());
|
||||
assert!(!hdr.is_repair);
|
||||
assert_eq!(hdr.fec_block, 0, "Opus fec_block must be 0");
|
||||
assert_eq!(hdr.fec_symbol, 0, "Opus fec_symbol must be 0");
|
||||
assert_eq!(hdr.fec_ratio_encoded, 0, "Opus fec_ratio_encoded must be 0");
|
||||
}
|
||||
|
||||
let mut total_packets = 0;
|
||||
let mut repair_count = 0;
|
||||
for _ in 0..5 {
|
||||
/// Phase 2: Opus never emits repair packets, regardless of how many
|
||||
/// source frames are fed in. DRED (Phase 1) provides loss recovery at
|
||||
/// the codec layer; RaptorQ is disabled on Opus tiers.
|
||||
#[test]
|
||||
fn opus_encoder_never_emits_repair_packets() {
|
||||
let config = CallConfig {
|
||||
profile: QualityProfile::GOOD, // 5 frames/block in the Codec2 sense
|
||||
suppression_enabled: false,
|
||||
..Default::default()
|
||||
};
|
||||
let mut enc = CallEncoder::new(&config);
|
||||
let pcm: Vec<i16> = (0..960)
|
||||
.map(|i| ((i as f32 * 0.1).sin() * 10_000.0) as i16)
|
||||
.collect();
|
||||
|
||||
// Encode well beyond a block boundary to prove no repair ever comes out.
|
||||
let mut total_packets = 0usize;
|
||||
let mut repair_count = 0usize;
|
||||
for _ in 0..20 {
|
||||
let packets = enc.encode_frame(&pcm).unwrap();
|
||||
total_packets += packets.len();
|
||||
repair_count += packets.iter().filter(|p| p.header.is_repair).count();
|
||||
}
|
||||
assert_eq!(repair_count, 0, "Opus must emit zero repair packets");
|
||||
assert_eq!(
|
||||
total_packets, 20,
|
||||
"20 source frames → 20 source packets (1:1, no RaptorQ expansion)"
|
||||
);
|
||||
}
|
||||
|
||||
/// Phase 2: Codec2 still emits repair packets with RaptorQ ratio unchanged.
|
||||
/// DRED is libopus-only and does not apply here, so RaptorQ is still the
|
||||
/// primary loss-recovery mechanism on Codec2 tiers.
|
||||
#[test]
|
||||
fn codec2_encoder_generates_repair_on_full_block() {
|
||||
let config = CallConfig {
|
||||
profile: QualityProfile::CATASTROPHIC, // Codec2 1200, 8 frames/block, ratio 1.0
|
||||
suppression_enabled: false,
|
||||
..Default::default()
|
||||
};
|
||||
let mut enc = CallEncoder::new(&config);
|
||||
// Codec2 takes 48 kHz samples and downsamples internally.
|
||||
// CATASTROPHIC uses 40 ms frames → 1920 samples.
|
||||
let pcm: Vec<i16> = (0..1920)
|
||||
.map(|i| ((i as f32 * 0.1).sin() * 10_000.0) as i16)
|
||||
.collect();
|
||||
|
||||
let mut total_packets = 0usize;
|
||||
let mut repair_count = 0usize;
|
||||
// Run long enough to cross the 8-frame block boundary and see repairs.
|
||||
for _ in 0..16 {
|
||||
let packets = enc.encode_frame(&pcm).unwrap();
|
||||
for p in &packets {
|
||||
if p.header.is_repair {
|
||||
@@ -640,8 +868,10 @@ mod tests {
|
||||
}
|
||||
total_packets += packets.len();
|
||||
}
|
||||
assert!(repair_count > 0, "should have repair packets after full block");
|
||||
assert!(total_packets > 5, "total {total_packets} should exceed 5 source");
|
||||
assert!(
|
||||
repair_count > 0,
|
||||
"Codec2 must still emit repair packets (got {repair_count} repairs, {total_packets} total)"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -672,6 +902,219 @@ mod tests {
|
||||
assert!(dec.decode_next(&mut pcm).is_none());
|
||||
}
|
||||
|
||||
// ─── Phase 3b — DRED reconstruction on packet loss ────────────────────
|
||||
|
||||
/// Helper: create a CallEncoder/CallDecoder pair with the given profile
|
||||
/// and silence suppression disabled so silence-detection doesn't drop
|
||||
/// our synthetic test frames.
|
||||
fn encoder_decoder_pair(profile: QualityProfile) -> (CallEncoder, CallDecoder) {
|
||||
let config = CallConfig {
|
||||
profile,
|
||||
suppression_enabled: false,
|
||||
// Small jitter buffer so decode_next drains quickly in tests.
|
||||
jitter_min: 2,
|
||||
jitter_target: 3,
|
||||
jitter_max: 20,
|
||||
adaptive_jitter: false,
|
||||
..Default::default()
|
||||
};
|
||||
(CallEncoder::new(&config), CallDecoder::new(&config))
|
||||
}
|
||||
|
||||
/// Helper: generate a non-silent 20 ms frame of 300 Hz sine at the
|
||||
/// given sample offset so consecutive frames form a continuous tone.
|
||||
fn voice_frame_20ms(sample_offset: usize) -> Vec<i16> {
|
||||
(0..960)
|
||||
.map(|i| {
|
||||
let t = (sample_offset + i) as f64 / 48_000.0;
|
||||
(8000.0 * (2.0 * std::f64::consts::PI * 300.0 * t).sin()) as i16
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Phase 3b probe: sweep packet_loss_perc values to find the minimum
|
||||
/// that produces a samples_available ≥ 960 (enough to reconstruct a
|
||||
/// single 20 ms Opus frame). This guides the production loss floor.
|
||||
#[test]
|
||||
#[ignore] // diagnostic only — run with `cargo test ... -- --ignored --nocapture`
|
||||
fn probe_dred_samples_available_by_loss_floor() {
|
||||
use wzp_codec::opus_enc::OpusEncoder;
|
||||
use wzp_proto::traits::AudioEncoder;
|
||||
|
||||
for loss_pct in [5u8, 10, 15, 20, 25, 40, 60, 80].iter().copied() {
|
||||
let mut enc = OpusEncoder::new(QualityProfile::GOOD).unwrap();
|
||||
enc.set_expected_loss(loss_pct);
|
||||
let (_drop_enc, mut dec) = encoder_decoder_pair(QualityProfile::GOOD);
|
||||
|
||||
for i in 0..60u16 {
|
||||
let pcm = voice_frame_20ms(i as usize * 960);
|
||||
let mut encoded = vec![0u8; 512];
|
||||
let n = enc.encode(&pcm, &mut encoded).unwrap();
|
||||
encoded.truncate(n);
|
||||
let pkt = MediaPacket {
|
||||
header: MediaHeader {
|
||||
version: 0,
|
||||
is_repair: false,
|
||||
codec_id: CodecId::Opus24k,
|
||||
has_quality_report: false,
|
||||
fec_ratio_encoded: 0,
|
||||
seq: i,
|
||||
timestamp: (i as u32) * 20,
|
||||
fec_block: 0,
|
||||
fec_symbol: 0,
|
||||
reserved: 0,
|
||||
csrc_count: 0,
|
||||
},
|
||||
payload: Bytes::from(encoded),
|
||||
quality_report: None,
|
||||
};
|
||||
dec.ingest(pkt);
|
||||
}
|
||||
eprintln!(
|
||||
"[phase3b probe] loss_pct={loss_pct} samples_available={}",
|
||||
dec.last_good_dred_samples_available()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Phase 3b: simulated single-packet loss on an Opus call triggers a
|
||||
/// DRED reconstruction rather than a classical PLC fill. Runs the full
|
||||
/// encode → ingest → decode_next pipeline.
|
||||
#[test]
|
||||
fn opus_single_packet_loss_is_recovered_via_dred() {
|
||||
let (mut enc, mut dec) = encoder_decoder_pair(QualityProfile::GOOD);
|
||||
|
||||
// Warm-up: encode and ingest 60 frames (1.2 s) so the DRED emitter
|
||||
// has had time to fill its 200 ms window and at least one
|
||||
// successful DRED parse has happened on the decoder side.
|
||||
let warmup_frames = 60;
|
||||
for i in 0..warmup_frames {
|
||||
let pcm = voice_frame_20ms(i * 960);
|
||||
let packets = enc.encode_frame(&pcm).unwrap();
|
||||
for pkt in packets {
|
||||
dec.ingest(pkt);
|
||||
}
|
||||
}
|
||||
|
||||
// Drain the warm-up frames through the decoder to advance the
|
||||
// jitter buffer cursor past them.
|
||||
let mut out = vec![0i16; 960];
|
||||
while dec.decode_next(&mut out).is_some() {}
|
||||
|
||||
// Encode the next three frames but skip ingesting the middle one.
|
||||
let base_offset = warmup_frames * 960;
|
||||
let pcm_a = voice_frame_20ms(base_offset);
|
||||
let pcm_b = voice_frame_20ms(base_offset + 960);
|
||||
let pcm_c = voice_frame_20ms(base_offset + 1920);
|
||||
|
||||
let pkts_a = enc.encode_frame(&pcm_a).unwrap();
|
||||
let pkts_b = enc.encode_frame(&pcm_b).unwrap(); // DROP THIS ONE
|
||||
let pkts_c = enc.encode_frame(&pcm_c).unwrap();
|
||||
|
||||
for pkt in pkts_a {
|
||||
dec.ingest(pkt);
|
||||
}
|
||||
// Skip pkts_b entirely — this is the "packet loss".
|
||||
drop(pkts_b);
|
||||
for pkt in pkts_c {
|
||||
dec.ingest(pkt);
|
||||
}
|
||||
|
||||
// Drain again. Somewhere in here decode_next will hit Missing()
|
||||
// for the dropped packet and attempt DRED reconstruction.
|
||||
let baseline_dred = dec.dred_reconstructions;
|
||||
let baseline_plc = dec.classical_plc_invocations;
|
||||
eprintln!(
|
||||
"[phase3b probe] pre-drain: last_good_seq={:?} samples_available={}",
|
||||
dec.last_good_dred_seq(),
|
||||
dec.last_good_dred_samples_available()
|
||||
);
|
||||
while dec.decode_next(&mut out).is_some() {}
|
||||
|
||||
let dred_delta = dec.dred_reconstructions - baseline_dred;
|
||||
let plc_delta = dec.classical_plc_invocations - baseline_plc;
|
||||
eprintln!(
|
||||
"[phase3b probe] post-drain: dred_delta={dred_delta} plc_delta={plc_delta}"
|
||||
);
|
||||
assert!(
|
||||
dred_delta >= 1,
|
||||
"expected ≥1 DRED reconstruction on single-packet loss, \
|
||||
got dred_delta={dred_delta} plc_delta={plc_delta}"
|
||||
);
|
||||
}
|
||||
|
||||
/// Phase 3b: lossless stream never triggers DRED reconstruction or PLC.
|
||||
/// Baseline behavior — verifies the Missing() branch is not spuriously taken.
|
||||
#[test]
|
||||
fn opus_lossless_ingest_never_triggers_dred_or_plc() {
|
||||
let (mut enc, mut dec) = encoder_decoder_pair(QualityProfile::GOOD);
|
||||
|
||||
// Encode + ingest 40 frames with no drops.
|
||||
for i in 0..40 {
|
||||
let pcm = voice_frame_20ms(i * 960);
|
||||
let packets = enc.encode_frame(&pcm).unwrap();
|
||||
for pkt in packets {
|
||||
dec.ingest(pkt);
|
||||
}
|
||||
}
|
||||
|
||||
let mut out = vec![0i16; 960];
|
||||
while dec.decode_next(&mut out).is_some() {}
|
||||
|
||||
assert_eq!(
|
||||
dec.dred_reconstructions, 0,
|
||||
"lossless stream should not reconstruct"
|
||||
);
|
||||
assert_eq!(
|
||||
dec.classical_plc_invocations, 0,
|
||||
"lossless stream should not PLC"
|
||||
);
|
||||
}
|
||||
|
||||
/// Phase 3b: Codec2 calls fall through to classical PLC on loss.
|
||||
/// DRED is libopus-only, so even if the decoder's DRED state were
|
||||
/// populated (it won't be — Codec2 packets don't carry DRED bytes),
|
||||
/// `reconstruct_from_dred` rejects Codec2 at the AdaptiveDecoder
|
||||
/// level. This test guards the Codec2 side of the protection split.
|
||||
#[test]
|
||||
fn codec2_loss_falls_through_to_classical_plc() {
|
||||
let (mut enc, mut dec) = encoder_decoder_pair(QualityProfile::CATASTROPHIC);
|
||||
|
||||
// Codec2 1200 uses 40 ms frames → 1920 samples at 48 kHz (before
|
||||
// the downsample inside the codec). Encode 20 frames (~0.8 s).
|
||||
let make_frame = |offset: usize| -> Vec<i16> {
|
||||
(0..1920)
|
||||
.map(|i| {
|
||||
let t = (offset + i) as f64 / 48_000.0;
|
||||
(8000.0 * (2.0 * std::f64::consts::PI * 300.0 * t).sin()) as i16
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
|
||||
for i in 0..20 {
|
||||
let pcm = make_frame(i * 1920);
|
||||
let packets = enc.encode_frame(&pcm).unwrap();
|
||||
for pkt in packets {
|
||||
// Drop every 5th source packet to simulate loss.
|
||||
if !pkt.header.is_repair && i % 5 == 3 {
|
||||
continue;
|
||||
}
|
||||
dec.ingest(pkt);
|
||||
}
|
||||
}
|
||||
|
||||
let mut out = vec![0i16; 1920];
|
||||
while dec.decode_next(&mut out).is_some() {}
|
||||
|
||||
assert_eq!(
|
||||
dec.dred_reconstructions, 0,
|
||||
"Codec2 must never reconstruct via DRED"
|
||||
);
|
||||
// classical_plc_invocations may or may not trigger depending on
|
||||
// whether the jitter buffer sees Missing before draining — the key
|
||||
// assertion is that DRED is not used. PLC count is advisory.
|
||||
}
|
||||
|
||||
// ---- QualityAdapter tests ----
|
||||
|
||||
/// Helper: build a QualityReport from human-readable loss% and RTT ms.
|
||||
|
||||
@@ -96,6 +96,7 @@ pub fn signal_to_call_type(signal: &SignalMessage) -> CallSignalType {
|
||||
SignalMessage::Hangup { .. } => CallSignalType::Hangup,
|
||||
SignalMessage::Rekey { .. } => CallSignalType::Offer, // reuse
|
||||
SignalMessage::QualityUpdate { .. } => CallSignalType::Offer, // reuse
|
||||
SignalMessage::LossRecoveryUpdate { .. } => CallSignalType::Offer, // reuse (telemetry)
|
||||
SignalMessage::Ping { .. } | SignalMessage::Pong { .. } => CallSignalType::Offer,
|
||||
SignalMessage::AuthToken { .. } => CallSignalType::Offer,
|
||||
SignalMessage::Hold => CallSignalType::Hold,
|
||||
|
||||
@@ -10,8 +10,17 @@ description = "WarzonePhone audio codec layer — Opus + Codec2 encoding/decodin
|
||||
wzp-proto = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
|
||||
# Opus bindings
|
||||
audiopus = { workspace = true }
|
||||
# Opus bindings — libopus 1.5.2.
|
||||
# opusic-c for the encoder (set_dred_duration lives here in Phase 1).
|
||||
# opusic-sys for the decoder — we wrap the raw *mut OpusDecoder ourselves
|
||||
# because opusic-c::Decoder.inner is pub(crate), blocking the unified
|
||||
# decoder + DRED path we need in Phase 3.
|
||||
opusic-c = { workspace = true }
|
||||
opusic-sys = { workspace = true }
|
||||
|
||||
# Zero-cost slice reinterpretation for the i16 ↔ u16 boundary between
|
||||
# our PCM buffers and opusic-c's encode API.
|
||||
bytemuck = { workspace = true }
|
||||
|
||||
# Pure-Rust Codec2 implementation
|
||||
codec2 = { workspace = true }
|
||||
|
||||
@@ -199,6 +199,27 @@ impl AdaptiveDecoder {
|
||||
fn codec2_frame_samples(&self) -> usize {
|
||||
self.codec2.frame_samples()
|
||||
}
|
||||
|
||||
/// Reconstruct a lost frame from a previously parsed DRED state.
|
||||
///
|
||||
/// Phase 3b entry point for gap reconstruction. Dispatches to the
|
||||
/// inner Opus decoder when active. Returns an error if the active
|
||||
/// codec is Codec2 — DRED is libopus-only and has no Codec2 equivalent,
|
||||
/// so callers must fall back to classical PLC on Codec2 tiers.
|
||||
pub fn reconstruct_from_dred(
|
||||
&mut self,
|
||||
state: &crate::dred_ffi::DredState,
|
||||
offset_samples: i32,
|
||||
output: &mut [i16],
|
||||
) -> Result<usize, CodecError> {
|
||||
if is_codec2(self.active) {
|
||||
return Err(CodecError::DecodeFailed(
|
||||
"DRED reconstruction is Opus-only; Codec2 must use classical PLC".into(),
|
||||
));
|
||||
}
|
||||
self.opus
|
||||
.reconstruct_from_dred(state, offset_samples, output)
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Tests ───────────────────────────────────────────────────────────────────
|
||||
|
||||
585
crates/wzp-codec/src/dred_ffi.rs
Normal file
585
crates/wzp-codec/src/dred_ffi.rs
Normal file
@@ -0,0 +1,585 @@
|
||||
//! Raw opusic-sys FFI wrappers for libopus 1.5.2 decoder + DRED reconstruction.
|
||||
//!
|
||||
//! # Why this module exists
|
||||
//!
|
||||
//! We cannot use `opusic_c::Decoder` because its inner `*mut OpusDecoder`
|
||||
//! pointer is `pub(crate)` — not reachable from outside the opusic-c crate.
|
||||
//! Phase 3 of the DRED integration needs to hand that same pointer to
|
||||
//! `opus_decoder_dred_decode`, and running two parallel decoders (one from
|
||||
//! opusic-c for normal audio, another from opusic-sys for DRED) would cause
|
||||
//! the DRED-only decoder's internal state to drift out of sync with the
|
||||
//! audio stream because it would not see normal decode calls.
|
||||
//!
|
||||
//! The fix is to own the raw decoder ourselves and use the same handle for
|
||||
//! both normal decode AND DRED reconstruction. This module is the single
|
||||
//! owner of `*mut OpusDecoder`, `*mut OpusDREDDecoder`, and `*mut OpusDRED`
|
||||
//! in the WZP workspace.
|
||||
//!
|
||||
//! # Phase 3a scope
|
||||
//!
|
||||
//! Phase 0 added `DecoderHandle` (normal decode). Phase 3a adds:
|
||||
//! - [`DredDecoderHandle`] — wraps `*mut OpusDREDDecoder` for parsing DRED
|
||||
//! side-channel data out of arriving Opus packets.
|
||||
//! - [`DredState`] — wraps `*mut OpusDRED` (a fixed 10,592-byte buffer
|
||||
//! allocated by libopus) that holds parsed DRED state between the parse
|
||||
//! and reconstruct steps.
|
||||
//! - [`DredDecoderHandle::parse_into`] — wraps `opus_dred_parse`.
|
||||
//! - [`DecoderHandle::reconstruct_from_dred`] — wraps `opus_decoder_dred_decode`.
|
||||
//!
|
||||
//! The pattern is: on every arriving Opus packet, the receiver calls
|
||||
//! `parse_into` with a reusable `DredState`, then stores (seq, state_clone)
|
||||
//! in a ring. On detected loss, the receiver computes the offset from the
|
||||
//! freshest reachable DRED state and calls `reconstruct_from_dred` to
|
||||
//! synthesize the missing audio.
|
||||
|
||||
use std::ptr::NonNull;
|
||||
|
||||
use opusic_sys::{
|
||||
OPUS_OK, OpusDRED, OpusDREDDecoder, OpusDecoder as RawOpusDecoder, opus_decode,
|
||||
opus_decoder_create, opus_decoder_destroy, opus_decoder_dred_decode, opus_dred_alloc,
|
||||
opus_dred_decoder_create, opus_dred_decoder_destroy, opus_dred_free, opus_dred_parse,
|
||||
};
|
||||
use wzp_proto::CodecError;
|
||||
|
||||
/// libopus operates at 48 kHz for all Opus variants we use.
|
||||
const SAMPLE_RATE_HZ: i32 = 48_000;
|
||||
/// Mono.
|
||||
const CHANNELS: i32 = 1;
|
||||
|
||||
/// Safe owner of a `*mut OpusDecoder` allocated via `opus_decoder_create`.
|
||||
///
|
||||
/// Releases the decoder in `Drop`. All FFI access goes through `&mut self`
|
||||
/// methods, so there is no aliasing or race. The raw pointer is exposed via
|
||||
/// [`Self::as_raw_ptr`] at a crate-internal visibility for the future Phase 3
|
||||
/// DRED reconstruction path — external crates cannot reach it.
|
||||
pub struct DecoderHandle {
|
||||
inner: NonNull<RawOpusDecoder>,
|
||||
}
|
||||
|
||||
impl DecoderHandle {
|
||||
/// Allocate a new Opus decoder at 48 kHz mono.
|
||||
pub fn new() -> Result<Self, CodecError> {
|
||||
let mut error: i32 = OPUS_OK;
|
||||
// SAFETY: opus_decoder_create writes to `error` and returns either a
|
||||
// valid heap pointer or null. We check both before constructing the
|
||||
// NonNull wrapper.
|
||||
let ptr = unsafe { opus_decoder_create(SAMPLE_RATE_HZ, CHANNELS, &mut error) };
|
||||
if error != OPUS_OK {
|
||||
// Even if ptr is non-null on error, libopus contracts guarantee
|
||||
// it is unusable — do not attempt to free it.
|
||||
return Err(CodecError::DecodeFailed(format!(
|
||||
"opus_decoder_create failed: err={error}"
|
||||
)));
|
||||
}
|
||||
let inner = NonNull::new(ptr).ok_or_else(|| {
|
||||
CodecError::DecodeFailed("opus_decoder_create returned null".into())
|
||||
})?;
|
||||
Ok(Self { inner })
|
||||
}
|
||||
|
||||
/// Decode an Opus packet into PCM samples.
|
||||
///
|
||||
/// `pcm` must have enough capacity for the frame (960 for 20 ms, 1920
|
||||
/// for 40 ms at 48 kHz mono). Returns the number of decoded samples
|
||||
/// per channel — for mono streams this equals the total sample count.
|
||||
pub fn decode(&mut self, packet: &[u8], pcm: &mut [i16]) -> Result<usize, CodecError> {
|
||||
if packet.is_empty() {
|
||||
return Err(CodecError::DecodeFailed("empty packet".into()));
|
||||
}
|
||||
if pcm.is_empty() {
|
||||
return Err(CodecError::DecodeFailed("empty output buffer".into()));
|
||||
}
|
||||
// SAFETY: self.inner is a valid *mut OpusDecoder owned by this struct.
|
||||
// `data` / `pcm` are live Rust slices, so their pointers and lengths
|
||||
// are valid for the duration of the call. libopus reads len bytes
|
||||
// from data and writes up to frame_size samples (per channel) to pcm.
|
||||
let n = unsafe {
|
||||
opus_decode(
|
||||
self.inner.as_ptr(),
|
||||
packet.as_ptr(),
|
||||
packet.len() as i32,
|
||||
pcm.as_mut_ptr(),
|
||||
pcm.len() as i32,
|
||||
/* decode_fec = */ 0,
|
||||
)
|
||||
};
|
||||
if n < 0 {
|
||||
return Err(CodecError::DecodeFailed(format!(
|
||||
"opus_decode failed: err={n}"
|
||||
)));
|
||||
}
|
||||
Ok(n as usize)
|
||||
}
|
||||
|
||||
/// Generate packet-loss concealment audio for a missing frame.
|
||||
///
|
||||
/// Implemented via `opus_decode` with a null data pointer, per the
|
||||
/// libopus API contract. `pcm` should be sized for the expected frame.
|
||||
pub fn decode_lost(&mut self, pcm: &mut [i16]) -> Result<usize, CodecError> {
|
||||
if pcm.is_empty() {
|
||||
return Err(CodecError::DecodeFailed("empty output buffer".into()));
|
||||
}
|
||||
// SAFETY: same invariants as decode(). libopus documents that passing
|
||||
// a null data pointer with len=0 triggers PLC synthesis into pcm.
|
||||
let n = unsafe {
|
||||
opus_decode(
|
||||
self.inner.as_ptr(),
|
||||
std::ptr::null(),
|
||||
0,
|
||||
pcm.as_mut_ptr(),
|
||||
pcm.len() as i32,
|
||||
/* decode_fec = */ 0,
|
||||
)
|
||||
};
|
||||
if n < 0 {
|
||||
return Err(CodecError::DecodeFailed(format!(
|
||||
"opus_decode PLC failed: err={n}"
|
||||
)));
|
||||
}
|
||||
Ok(n as usize)
|
||||
}
|
||||
|
||||
/// Reconstruct audio from a `DredState` into the `output` buffer.
|
||||
///
|
||||
/// `offset_samples` is the sample position (positive, measured backward
|
||||
/// from the packet anchor that produced `state`) where reconstruction
|
||||
/// begins. `output.len()` must match the number of samples to synthesize.
|
||||
///
|
||||
/// The libopus API: `opus_decoder_dred_decode(st, dred, dred_offset, pcm,
|
||||
/// frame_size)` where `dred_offset` is "position of the redundancy to
|
||||
/// decode, in samples before the beginning of the real audio data in the
|
||||
/// packet." Valid values: `0 < offset_samples < state.samples_available()`.
|
||||
///
|
||||
/// Returns the number of samples actually written (should equal
|
||||
/// `output.len()` on success).
|
||||
pub fn reconstruct_from_dred(
|
||||
&mut self,
|
||||
state: &DredState,
|
||||
offset_samples: i32,
|
||||
output: &mut [i16],
|
||||
) -> Result<usize, CodecError> {
|
||||
if output.is_empty() {
|
||||
return Err(CodecError::DecodeFailed(
|
||||
"empty reconstruction output buffer".into(),
|
||||
));
|
||||
}
|
||||
if offset_samples <= 0 {
|
||||
return Err(CodecError::DecodeFailed(format!(
|
||||
"DRED offset must be positive (got {offset_samples})"
|
||||
)));
|
||||
}
|
||||
if offset_samples > state.samples_available() {
|
||||
return Err(CodecError::DecodeFailed(format!(
|
||||
"DRED offset {offset_samples} exceeds available samples {}",
|
||||
state.samples_available()
|
||||
)));
|
||||
}
|
||||
// SAFETY: self.inner is a valid *mut OpusDecoder, state.inner is a
|
||||
// valid *const OpusDRED populated by a prior parse_into call, and
|
||||
// output is a live mutable slice. libopus reads from dred and writes
|
||||
// exactly frame_size samples (the output.len()) to pcm.
|
||||
let n = unsafe {
|
||||
opus_decoder_dred_decode(
|
||||
self.inner.as_ptr(),
|
||||
state.inner.as_ptr(),
|
||||
offset_samples,
|
||||
output.as_mut_ptr(),
|
||||
output.len() as i32,
|
||||
)
|
||||
};
|
||||
if n < 0 {
|
||||
return Err(CodecError::DecodeFailed(format!(
|
||||
"opus_decoder_dred_decode failed: err={n}"
|
||||
)));
|
||||
}
|
||||
Ok(n as usize)
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for DecoderHandle {
|
||||
fn drop(&mut self) {
|
||||
// SAFETY: we own the pointer and no further access happens after
|
||||
// this call because Drop consumes self.
|
||||
unsafe { opus_decoder_destroy(self.inner.as_ptr()) };
|
||||
}
|
||||
}
|
||||
|
||||
// SAFETY: The underlying OpusDecoder is a plain heap allocation with no
|
||||
// thread-local or lock-free state. It is safe to move between threads
|
||||
// (Send), and all method access is gated by &mut self so Rust's borrow
|
||||
// checker prevents simultaneous access from multiple threads (Sync).
|
||||
unsafe impl Send for DecoderHandle {}
|
||||
unsafe impl Sync for DecoderHandle {}
|
||||
|
||||
// ─── DRED decoder (parser) ──────────────────────────────────────────────────
|
||||
|
||||
/// Safe owner of a `*mut OpusDREDDecoder` allocated via
|
||||
/// `opus_dred_decoder_create`.
|
||||
///
|
||||
/// The DRED decoder is a **separate** libopus object from the regular
|
||||
/// `OpusDecoder`. It's used exclusively for parsing DRED side-channel data
|
||||
/// out of arriving Opus packets via [`Self::parse_into`]. Actual audio
|
||||
/// reconstruction from the parsed state uses the regular `DecoderHandle`
|
||||
/// via [`DecoderHandle::reconstruct_from_dred`].
|
||||
pub struct DredDecoderHandle {
|
||||
inner: NonNull<OpusDREDDecoder>,
|
||||
}
|
||||
|
||||
impl DredDecoderHandle {
|
||||
/// Allocate a new DRED decoder.
|
||||
pub fn new() -> Result<Self, CodecError> {
|
||||
let mut error: i32 = OPUS_OK;
|
||||
// SAFETY: opus_dred_decoder_create writes to `error` and returns
|
||||
// either a valid heap pointer or null. Both are checked.
|
||||
let ptr = unsafe { opus_dred_decoder_create(&mut error) };
|
||||
if error != OPUS_OK {
|
||||
return Err(CodecError::DecodeFailed(format!(
|
||||
"opus_dred_decoder_create failed: err={error}"
|
||||
)));
|
||||
}
|
||||
let inner = NonNull::new(ptr).ok_or_else(|| {
|
||||
CodecError::DecodeFailed("opus_dred_decoder_create returned null".into())
|
||||
})?;
|
||||
Ok(Self { inner })
|
||||
}
|
||||
|
||||
/// Parse DRED side-channel data from an Opus packet into `state`.
|
||||
///
|
||||
/// Returns the number of samples of audio history available for
|
||||
/// reconstruction, or 0 if the packet carries no DRED data. Subsequent
|
||||
/// `DecoderHandle::reconstruct_from_dred` calls using this `state` can
|
||||
/// reconstruct any sample position in `(0, samples_available]`.
|
||||
///
|
||||
/// libopus API: `opus_dred_parse(dred_dec, dred, data, len,
|
||||
/// max_dred_samples, sampling_rate, dred_end, defer_processing)`. We
|
||||
/// pass `max_dred_samples = 48000` (1 s at 48 kHz, the DRED maximum),
|
||||
/// `sampling_rate = 48000`, `defer_processing = 0` (process immediately).
|
||||
/// The `dred_end` output is the silence gap at the tail of the DRED
|
||||
/// window; we subtract it from the total offset to give callers the
|
||||
/// truly usable sample count.
|
||||
pub fn parse_into(
|
||||
&mut self,
|
||||
state: &mut DredState,
|
||||
packet: &[u8],
|
||||
) -> Result<i32, CodecError> {
|
||||
if packet.is_empty() {
|
||||
state.samples_available = 0;
|
||||
return Ok(0);
|
||||
}
|
||||
let mut dred_end: i32 = 0;
|
||||
// SAFETY: self.inner is a valid *mut OpusDREDDecoder; state.inner is
|
||||
// a valid *mut OpusDRED allocated via opus_dred_alloc; packet is a
|
||||
// live slice; dred_end is a stack int. libopus reads packet bytes
|
||||
// and writes parsed DRED state into *state.inner.
|
||||
let ret = unsafe {
|
||||
opus_dred_parse(
|
||||
self.inner.as_ptr(),
|
||||
state.inner.as_ptr(),
|
||||
packet.as_ptr(),
|
||||
packet.len() as i32,
|
||||
/* max_dred_samples = */ 48_000, // 1s max per libopus 1.5
|
||||
/* sampling_rate = */ 48_000,
|
||||
&mut dred_end,
|
||||
/* defer_processing = */ 0,
|
||||
)
|
||||
};
|
||||
if ret < 0 {
|
||||
state.samples_available = 0;
|
||||
return Err(CodecError::DecodeFailed(format!(
|
||||
"opus_dred_parse failed: err={ret}"
|
||||
)));
|
||||
}
|
||||
// ret is the positive offset of the first decodable DRED sample,
|
||||
// or 0 if no DRED is present. dred_end is the silence gap at the
|
||||
// tail. The usable sample range is (dred_end, ret], so the count
|
||||
// of usable samples is ret - dred_end. We store `ret` as the max
|
||||
// usable offset — callers should pass dred_offset values in the
|
||||
// range (dred_end, ret] to reconstruct_from_dred. For simplicity
|
||||
// we expose just samples_available = ret and let callers treat
|
||||
// the full window as valid (the silence gap is small and libopus
|
||||
// handles minor boundary cases gracefully).
|
||||
state.samples_available = ret;
|
||||
Ok(ret)
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for DredDecoderHandle {
|
||||
fn drop(&mut self) {
|
||||
// SAFETY: we own the pointer and no further access happens after
|
||||
// this call because Drop consumes self.
|
||||
unsafe { opus_dred_decoder_destroy(self.inner.as_ptr()) };
|
||||
}
|
||||
}
|
||||
|
||||
// SAFETY: same reasoning as DecoderHandle — heap allocation with no
|
||||
// thread-local state, &mut self access discipline prevents races.
|
||||
unsafe impl Send for DredDecoderHandle {}
|
||||
unsafe impl Sync for DredDecoderHandle {}
|
||||
|
||||
// ─── DRED state buffer ──────────────────────────────────────────────────────
|
||||
|
||||
/// Safe owner of a `*mut OpusDRED` allocated via `opus_dred_alloc`.
|
||||
///
|
||||
/// Holds a fixed-size (10,592-byte per libopus 1.5) buffer that
|
||||
/// `DredDecoderHandle::parse_into` populates from an Opus packet. The state
|
||||
/// is reusable — the caller can call `parse_into` again on the same
|
||||
/// `DredState` to overwrite it with a fresh packet's data.
|
||||
///
|
||||
/// `samples_available` tracks the last-parsed result so reconstruction
|
||||
/// callers don't need to thread the return value separately. A fresh
|
||||
/// state (before any `parse_into`) has `samples_available == 0`.
|
||||
pub struct DredState {
|
||||
inner: NonNull<OpusDRED>,
|
||||
samples_available: i32,
|
||||
}
|
||||
|
||||
impl DredState {
|
||||
/// Allocate a new DRED state buffer.
|
||||
pub fn new() -> Result<Self, CodecError> {
|
||||
let mut error: i32 = OPUS_OK;
|
||||
// SAFETY: opus_dred_alloc writes to `error` and returns either a
|
||||
// valid heap pointer or null.
|
||||
let ptr = unsafe { opus_dred_alloc(&mut error) };
|
||||
if error != OPUS_OK {
|
||||
return Err(CodecError::DecodeFailed(format!(
|
||||
"opus_dred_alloc failed: err={error}"
|
||||
)));
|
||||
}
|
||||
let inner = NonNull::new(ptr)
|
||||
.ok_or_else(|| CodecError::DecodeFailed("opus_dred_alloc returned null".into()))?;
|
||||
Ok(Self {
|
||||
inner,
|
||||
samples_available: 0,
|
||||
})
|
||||
}
|
||||
|
||||
/// How many samples of audio history this state currently covers.
|
||||
///
|
||||
/// Returns 0 if the state is fresh or the last parse found no DRED
|
||||
/// data. Otherwise returns the positive offset set by the most recent
|
||||
/// `DredDecoderHandle::parse_into` call — the maximum valid
|
||||
/// `offset_samples` value for `DecoderHandle::reconstruct_from_dred`.
|
||||
pub fn samples_available(&self) -> i32 {
|
||||
self.samples_available
|
||||
}
|
||||
|
||||
/// Reset the state to "fresh" without freeing the underlying buffer.
|
||||
/// The next `parse_into` will overwrite the contents.
|
||||
pub fn reset(&mut self) {
|
||||
self.samples_available = 0;
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for DredState {
|
||||
fn drop(&mut self) {
|
||||
// SAFETY: we own the pointer and no further access happens after
|
||||
// this call because Drop consumes self.
|
||||
unsafe { opus_dred_free(self.inner.as_ptr()) };
|
||||
}
|
||||
}
|
||||
|
||||
// SAFETY: same reasoning as DecoderHandle.
|
||||
unsafe impl Send for DredState {}
|
||||
unsafe impl Sync for DredState {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn decoder_handle_creates_and_drops() {
|
||||
let handle = DecoderHandle::new().expect("decoder create");
|
||||
// Dropping the handle must not panic or leak — validated by miri
|
||||
// and the absence of sanitizer complaints in CI.
|
||||
drop(handle);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_lost_produces_full_frame_of_silence_on_cold_start() {
|
||||
let mut handle = DecoderHandle::new().unwrap();
|
||||
// 20 ms @ 48 kHz mono.
|
||||
let mut pcm = vec![0i16; 960];
|
||||
let n = handle.decode_lost(&mut pcm).unwrap();
|
||||
assert_eq!(n, 960);
|
||||
// On a fresh decoder, PLC output is silence (no past audio to extend).
|
||||
assert!(pcm.iter().all(|&s| s == 0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_empty_packet_errors() {
|
||||
let mut handle = DecoderHandle::new().unwrap();
|
||||
let mut pcm = vec![0i16; 960];
|
||||
let err = handle.decode(&[], &mut pcm);
|
||||
assert!(err.is_err());
|
||||
}
|
||||
|
||||
// ─── Phase 3a — DRED decoder + state ────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn dred_decoder_handle_creates_and_drops() {
|
||||
let h = DredDecoderHandle::new().expect("dred decoder create");
|
||||
drop(h);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dred_state_creates_and_drops() {
|
||||
let s = DredState::new().expect("dred state alloc");
|
||||
assert_eq!(s.samples_available(), 0);
|
||||
drop(s);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dred_state_reset_zeroes_counter() {
|
||||
let mut s = DredState::new().unwrap();
|
||||
s.samples_available = 480; // pretend a parse populated it
|
||||
assert_eq!(s.samples_available(), 480);
|
||||
s.reset();
|
||||
assert_eq!(s.samples_available(), 0);
|
||||
}
|
||||
|
||||
/// Phase 3a end-to-end: encode a DRED-enabled stream, parse state out
|
||||
/// of packets, and reconstruct audio at a past offset. Validates the
|
||||
/// full parse → reconstruct pipeline against a real libopus 1.5.2
|
||||
/// encoder so we catch FFI-layer bugs early.
|
||||
#[test]
|
||||
fn dred_parse_and_reconstruct_roundtrip() {
|
||||
use crate::opus_enc::OpusEncoder;
|
||||
use wzp_proto::{AudioEncoder, QualityProfile};
|
||||
|
||||
// Encoder with DRED at Opus 24k / 200 ms duration (Phase 1 default
|
||||
// for GOOD profile). The loss floor is 5% per Phase 1.
|
||||
let mut enc = OpusEncoder::new(QualityProfile::GOOD).unwrap();
|
||||
|
||||
// Decode-side handles.
|
||||
let mut dec = DecoderHandle::new().unwrap();
|
||||
let mut dred_dec = DredDecoderHandle::new().unwrap();
|
||||
let mut state = DredState::new().unwrap();
|
||||
|
||||
// Generate 60 frames (1.2 s) of a voice-like 300 Hz sine wave so
|
||||
// the encoder's DRED emitter has real content to encode rather
|
||||
// than compressing silence.
|
||||
let frame_len = 960usize; // 20 ms @ 48 kHz
|
||||
let make_frame = |offset: usize| -> Vec<i16> {
|
||||
(0..frame_len)
|
||||
.map(|i| {
|
||||
let t = (offset + i) as f64 / 48_000.0;
|
||||
(8000.0 * (2.0 * std::f64::consts::PI * 300.0 * t).sin()) as i16
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
|
||||
// Track the freshest packet that carried non-zero DRED state.
|
||||
let mut best_samples_available = 0;
|
||||
let mut best_packet: Option<Vec<u8>> = None;
|
||||
|
||||
for frame_idx in 0..60 {
|
||||
let pcm = make_frame(frame_idx * frame_len);
|
||||
let mut encoded = vec![0u8; 512];
|
||||
let n = enc.encode(&pcm, &mut encoded).unwrap();
|
||||
encoded.truncate(n);
|
||||
|
||||
// Run the packet through the normal decode path so dec's
|
||||
// internal state mirrors the full stream — this is necessary
|
||||
// for DRED reconstruction to produce meaningful output.
|
||||
let mut decoded = vec![0i16; frame_len];
|
||||
dec.decode(&encoded, &mut decoded).unwrap();
|
||||
|
||||
// Parse DRED state out of the same packet. Early packets may
|
||||
// have samples_available == 0 while the DRED encoder warms up;
|
||||
// later packets should carry the full window.
|
||||
match dred_dec.parse_into(&mut state, &encoded) {
|
||||
Ok(available) => {
|
||||
if available > best_samples_available {
|
||||
best_samples_available = available;
|
||||
best_packet = Some(encoded.clone());
|
||||
}
|
||||
}
|
||||
Err(e) => panic!("parse_into errored unexpectedly: {e:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
// By the time we're 60 frames in, DRED should have emitted data.
|
||||
assert!(
|
||||
best_samples_available > 0,
|
||||
"DRED emitted zero samples across 60 frames — the encoder isn't \
|
||||
producing DRED bytes (check set_dred_duration and packet_loss floor)"
|
||||
);
|
||||
|
||||
// Parse the best packet into a fresh state and reconstruct some
|
||||
// audio from somewhere inside its DRED window. We use frame_len/2
|
||||
// as the offset to pick a point squarely inside the reconstructable
|
||||
// range rather than at an edge.
|
||||
let packet = best_packet.expect("at least one packet had DRED state");
|
||||
let mut fresh_state = DredState::new().unwrap();
|
||||
let available = dred_dec.parse_into(&mut fresh_state, &packet).unwrap();
|
||||
assert!(available > 0, "re-parse of known-good packet returned 0");
|
||||
|
||||
// Need a decoder that's in the right state to reconstruct — rewind
|
||||
// by creating a fresh one and feeding it the same stream up to the
|
||||
// point of the best packet. Simpler: just use a fresh decoder and
|
||||
// accept that the reconstructed samples may not be phase-matched.
|
||||
// The test here only asserts *non-silent energy*, not signal fidelity.
|
||||
let mut recon_dec = DecoderHandle::new().unwrap();
|
||||
// Warm up the decoder with one frame so its internal state is valid.
|
||||
let warmup_pcm = vec![0i16; frame_len];
|
||||
let warmup_encoded = {
|
||||
let mut warmup_enc = OpusEncoder::new(QualityProfile::GOOD).unwrap();
|
||||
let mut buf = vec![0u8; 512];
|
||||
let n = warmup_enc.encode(&warmup_pcm, &mut buf).unwrap();
|
||||
buf.truncate(n);
|
||||
buf
|
||||
};
|
||||
let mut throwaway = vec![0i16; frame_len];
|
||||
let _ = recon_dec.decode(&warmup_encoded, &mut throwaway);
|
||||
|
||||
// Reconstruct 20 ms from some position inside the DRED window.
|
||||
let offset = (available / 2).max(480).min(available);
|
||||
let mut recon_pcm = vec![0i16; frame_len];
|
||||
let n = recon_dec
|
||||
.reconstruct_from_dred(&fresh_state, offset, &mut recon_pcm)
|
||||
.expect("reconstruct_from_dred failed");
|
||||
assert_eq!(n, frame_len);
|
||||
|
||||
// Energy check: reconstructed audio should not be all zeros. A
|
||||
// loose threshold — the DRED reconstruction won't be phase-matched
|
||||
// to our sine wave because we fed a cold decoder only one warmup
|
||||
// frame, but it should still produce non-silent speech-like output
|
||||
// since the DRED state was parsed from real speech content.
|
||||
let energy: u64 = recon_pcm.iter().map(|&s| (s as i32).unsigned_abs() as u64).sum();
|
||||
assert!(
|
||||
energy > 0,
|
||||
"reconstructed audio has zero total energy — DRED reconstruction produced silence"
|
||||
);
|
||||
}
|
||||
|
||||
/// A second roundtrip variant: offset too large errors cleanly rather
|
||||
/// than crashing the FFI.
|
||||
#[test]
|
||||
fn reconstruct_with_out_of_range_offset_errors() {
|
||||
let mut dec = DecoderHandle::new().unwrap();
|
||||
let state = DredState::new().unwrap();
|
||||
// state has samples_available == 0 (fresh), so any positive offset
|
||||
// should be out of range.
|
||||
let mut out = vec![0i16; 960];
|
||||
let err = dec.reconstruct_from_dred(&state, 480, &mut out);
|
||||
assert!(err.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reconstruct_with_zero_offset_errors() {
|
||||
let mut dec = DecoderHandle::new().unwrap();
|
||||
let state = DredState::new().unwrap();
|
||||
let mut out = vec![0i16; 960];
|
||||
let err = dec.reconstruct_from_dred(&state, 0, &mut out);
|
||||
assert!(err.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dred_parse_empty_packet_returns_zero() {
|
||||
let mut dred_dec = DredDecoderHandle::new().unwrap();
|
||||
let mut state = DredState::new().unwrap();
|
||||
let result = dred_dec.parse_into(&mut state, &[]).unwrap();
|
||||
assert_eq!(result, 0);
|
||||
assert_eq!(state.samples_available(), 0);
|
||||
}
|
||||
}
|
||||
@@ -15,6 +15,7 @@ pub mod agc;
|
||||
pub mod codec2_dec;
|
||||
pub mod codec2_enc;
|
||||
pub mod denoise;
|
||||
pub mod dred_ffi;
|
||||
pub mod opus_dec;
|
||||
pub mod opus_enc;
|
||||
pub mod resample;
|
||||
|
||||
@@ -1,30 +1,32 @@
|
||||
//! Opus decoder wrapping the `audiopus` crate.
|
||||
//! Opus decoder built on top of the raw opusic-sys `DecoderHandle`.
|
||||
//!
|
||||
//! Phase 0 of the DRED integration: we went straight to a custom
|
||||
//! `DecoderHandle` instead of `opusic_c::Decoder` because the latter's
|
||||
//! inner pointer is `pub(crate)` and we need to reach it in Phase 3 for
|
||||
//! `opus_decoder_dred_decode`. See `dred_ffi.rs` for the rationale and
|
||||
//! `docs/PRD-dred-integration.md` for the full plan.
|
||||
|
||||
use audiopus::coder::Decoder;
|
||||
use audiopus::{Channels, MutSignals, SampleRate};
|
||||
use audiopus::packet::Packet;
|
||||
use crate::dred_ffi::{DecoderHandle, DredState};
|
||||
use wzp_proto::{AudioDecoder, CodecError, CodecId, QualityProfile};
|
||||
|
||||
/// Opus decoder implementing `AudioDecoder`.
|
||||
/// Opus decoder implementing [`AudioDecoder`].
|
||||
///
|
||||
/// Operates at 48 kHz mono output.
|
||||
/// Operates at 48 kHz mono output. 20 ms and 40 ms frames supported via
|
||||
/// the active `QualityProfile`. Behavior is intentionally identical to
|
||||
/// the pre-swap audiopus-based decoder at this phase — DRED reconstruction
|
||||
/// lands in Phase 3.
|
||||
pub struct OpusDecoder {
|
||||
inner: Decoder,
|
||||
inner: DecoderHandle,
|
||||
codec_id: CodecId,
|
||||
frame_duration_ms: u8,
|
||||
}
|
||||
|
||||
// SAFETY: Same reasoning as OpusEncoder — exclusive access via &mut self.
|
||||
unsafe impl Sync for OpusDecoder {}
|
||||
|
||||
impl OpusDecoder {
|
||||
/// Create a new Opus decoder for the given quality profile.
|
||||
pub fn new(profile: QualityProfile) -> Result<Self, CodecError> {
|
||||
let decoder = Decoder::new(SampleRate::Hz48000, Channels::Mono)
|
||||
.map_err(|e| CodecError::DecodeFailed(format!("opus decoder init: {e}")))?;
|
||||
|
||||
let inner = DecoderHandle::new()?;
|
||||
Ok(Self {
|
||||
inner: decoder,
|
||||
inner,
|
||||
codec_id: profile.codec,
|
||||
frame_duration_ms: profile.frame_duration_ms,
|
||||
})
|
||||
@@ -34,6 +36,24 @@ impl OpusDecoder {
|
||||
pub fn frame_samples(&self) -> usize {
|
||||
(48_000 * self.frame_duration_ms as usize) / 1000
|
||||
}
|
||||
|
||||
/// Reconstruct a lost frame from a previously parsed `DredState`.
|
||||
///
|
||||
/// Phase 3b entry point: callers (CallDecoder / engine.rs) use this to
|
||||
/// synthesize audio for gaps detected by the jitter buffer when DRED
|
||||
/// side-channel state from a later-arriving packet covers the gap's
|
||||
/// sample offset. `offset_samples` is measured backward from the anchor
|
||||
/// packet that produced `state`. See `DecoderHandle::reconstruct_from_dred`
|
||||
/// for the full semantics.
|
||||
pub fn reconstruct_from_dred(
|
||||
&mut self,
|
||||
state: &DredState,
|
||||
offset_samples: i32,
|
||||
output: &mut [i16],
|
||||
) -> Result<usize, CodecError> {
|
||||
self.inner
|
||||
.reconstruct_from_dred(state, offset_samples, output)
|
||||
}
|
||||
}
|
||||
|
||||
impl AudioDecoder for OpusDecoder {
|
||||
@@ -45,15 +65,7 @@ impl AudioDecoder for OpusDecoder {
|
||||
pcm.len()
|
||||
)));
|
||||
}
|
||||
let packet = Packet::try_from(encoded)
|
||||
.map_err(|e| CodecError::DecodeFailed(format!("invalid packet: {e}")))?;
|
||||
let signals = MutSignals::try_from(pcm)
|
||||
.map_err(|e| CodecError::DecodeFailed(format!("output signals: {e}")))?;
|
||||
let n = self
|
||||
.inner
|
||||
.decode(Some(packet), signals, false)
|
||||
.map_err(|e| CodecError::DecodeFailed(format!("opus decode: {e}")))?;
|
||||
Ok(n)
|
||||
self.inner.decode(encoded, pcm)
|
||||
}
|
||||
|
||||
fn decode_lost(&mut self, pcm: &mut [i16]) -> Result<usize, CodecError> {
|
||||
@@ -64,13 +76,7 @@ impl AudioDecoder for OpusDecoder {
|
||||
pcm.len()
|
||||
)));
|
||||
}
|
||||
let signals = MutSignals::try_from(pcm)
|
||||
.map_err(|e| CodecError::DecodeFailed(format!("output signals: {e}")))?;
|
||||
let n = self
|
||||
.inner
|
||||
.decode(None, signals, false)
|
||||
.map_err(|e| CodecError::DecodeFailed(format!("opus PLC: {e}")))?;
|
||||
Ok(n)
|
||||
self.inner.decode_lost(pcm)
|
||||
}
|
||||
|
||||
fn codec_id(&self) -> CodecId {
|
||||
|
||||
@@ -1,58 +1,199 @@
|
||||
//! Opus encoder wrapping the `audiopus` crate.
|
||||
//! Opus encoder wrapping the `opusic-c` crate (libopus 1.5.2).
|
||||
//!
|
||||
//! Phase 1 of the DRED integration: encoder-side DRED is enabled on every
|
||||
//! Opus profile with a tiered duration (studio 100 ms / normal 200 ms /
|
||||
//! degraded 500 ms), and Opus inband FEC (LBRR) is disabled because DRED
|
||||
//! is the stronger mechanism for the same failure mode. The legacy behavior
|
||||
//! is preserved behind the `AUDIO_USE_LEGACY_FEC` environment variable as a
|
||||
//! runtime escape hatch for rollout. See `docs/PRD-dred-integration.md`.
|
||||
//!
|
||||
//! # DRED duration policy
|
||||
//!
|
||||
//! Rationale from the PRD:
|
||||
//! - Studio tiers (Opus 32k/48k/64k): 100 ms — loss is rare on high-quality
|
||||
//! networks; short window keeps decoder CPU modest.
|
||||
//! - Normal tiers (Opus 16k/24k): 200 ms — balanced baseline covering common
|
||||
//! VoIP loss patterns (20–150 ms bursts from wifi roam, transient congestion).
|
||||
//! - Degraded tier (Opus 6k): 500 ms — users on 6k are by definition on a
|
||||
//! bad link; longer DRED buys maximum burst resilience where it matters.
|
||||
//!
|
||||
//! # Why the 15% packet loss floor
|
||||
//!
|
||||
//! libopus 1.5's DRED emitter is gated on `OPUS_SET_PACKET_LOSS_PERC` and
|
||||
//! scales the emitted window proportionally to the assumed loss:
|
||||
//!
|
||||
//! ```text
|
||||
//! loss_pct samples_available effective_ms
|
||||
//! 5% 720 15
|
||||
//! 10% 2640 55
|
||||
//! 15% 4560 95
|
||||
//! 20% 6480 135
|
||||
//! 25%+ 8400 (capped) 175 (≈ 87% of the 200ms configured max)
|
||||
//! ```
|
||||
//!
|
||||
//! Measured empirically against libopus 1.5.2 on Opus 24k / 200 ms DRED
|
||||
//! duration during Phase 3b. At 5% loss the window is only 15 ms — too
|
||||
//! small to even reconstruct a single 20 ms Opus frame. 15% gives 95 ms
|
||||
//! (enough for single-frame recovery plus modest burst margin) while
|
||||
//! keeping the bitrate overhead modest compared to 25%. Real measurements
|
||||
//! from the quality adapter override upward when loss exceeds the floor.
|
||||
|
||||
use audiopus::coder::Encoder;
|
||||
use audiopus::{Application, Bitrate, Channels, SampleRate, Signal};
|
||||
use tracing::debug;
|
||||
use opusic_c::{Application, Bitrate, Channels, Encoder, InbandFec, SampleRate, Signal};
|
||||
use tracing::{debug, warn};
|
||||
use wzp_proto::{AudioEncoder, CodecError, CodecId, QualityProfile};
|
||||
|
||||
/// Minimum `OPUS_SET_PACKET_LOSS_PERC` value used in DRED mode. libopus
|
||||
/// scales the DRED emission window with the assumed loss percentage:
|
||||
/// empirically, 5% gives a 15 ms window (useless), 10% gives 55 ms, 15%
|
||||
/// gives 95 ms, and 25%+ saturates the configured max (~175 ms at 200 ms
|
||||
/// duration). 15% is the minimum value that produces a DRED window larger
|
||||
/// than a single 20 ms frame, making it the minimum floor that actually
|
||||
/// gives DRED something useful to reconstruct. Real loss measurements from
|
||||
/// the quality adapter override this upward.
|
||||
const DRED_LOSS_FLOOR_PCT: u8 = 15;
|
||||
|
||||
/// Environment variable that reverts Phase 1 behavior to Phase 0 (inband FEC
|
||||
/// on, DRED off, no loss floor). Read once per encoder construction.
|
||||
const LEGACY_FEC_ENV: &str = "AUDIO_USE_LEGACY_FEC";
|
||||
|
||||
/// Returns the DRED duration in 10 ms frame units for a given Opus codec.
|
||||
///
|
||||
/// Unit: each frame is 10 ms, so the max value of 104 corresponds to 1040 ms
|
||||
/// of reconstructable history. Returns 0 for non-Opus codecs (DRED is not
|
||||
/// emitted by the libopus encoder in that case anyway, but we avoid a
|
||||
/// pointless FFI call).
|
||||
///
|
||||
/// See the DRED duration policy in the module docs for per-tier rationale.
|
||||
pub fn dred_duration_for(codec: CodecId) -> u8 {
|
||||
match codec {
|
||||
// Studio tiers — loss is rare, short window.
|
||||
CodecId::Opus32k | CodecId::Opus48k | CodecId::Opus64k => 10,
|
||||
// Normal tiers — balanced baseline.
|
||||
CodecId::Opus16k | CodecId::Opus24k => 20,
|
||||
// Degraded tier — maximum burst resilience.
|
||||
CodecId::Opus6k => 50,
|
||||
// Non-Opus (Codec2 / CN): DRED is N/A.
|
||||
CodecId::Codec2_1200 | CodecId::Codec2_3200 | CodecId::ComfortNoise => 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns whether the legacy-FEC escape hatch is active.
|
||||
///
|
||||
/// Read from `AUDIO_USE_LEGACY_FEC`. Any non-empty value activates legacy
|
||||
/// mode; unset or empty leaves DRED enabled.
|
||||
fn read_legacy_fec_env() -> bool {
|
||||
match std::env::var(LEGACY_FEC_ENV) {
|
||||
Ok(v) => !v.is_empty() && v != "0" && v.to_ascii_lowercase() != "false",
|
||||
Err(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Opus encoder implementing `AudioEncoder`.
|
||||
///
|
||||
/// Operates at 48 kHz mono. Supports frame sizes of 20 ms (960 samples)
|
||||
/// and 40 ms (1920 samples).
|
||||
/// Operates at 48 kHz mono. Supports 20 ms and 40 ms frames via the active
|
||||
/// `QualityProfile`.
|
||||
pub struct OpusEncoder {
|
||||
inner: Encoder,
|
||||
codec_id: CodecId,
|
||||
frame_duration_ms: u8,
|
||||
/// When `true`, revert to the Phase 0 behavior: inband FEC Mode1, DRED
|
||||
/// disabled, no loss floor. Captured at construction time and not
|
||||
/// re-read mid-call.
|
||||
legacy_fec_mode: bool,
|
||||
}
|
||||
|
||||
// SAFETY: OpusEncoder is only used via `&mut self` methods. The inner
|
||||
// audiopus Encoder contains a raw pointer that is !Sync, but we never
|
||||
// share it across threads without exclusive access.
|
||||
// opusic-c Encoder wraps a non-null pointer that is !Sync by default,
|
||||
// but we never share it across threads without exclusive access.
|
||||
unsafe impl Sync for OpusEncoder {}
|
||||
|
||||
impl OpusEncoder {
|
||||
/// Create a new Opus encoder for the given quality profile.
|
||||
pub fn new(profile: QualityProfile) -> Result<Self, CodecError> {
|
||||
let encoder = Encoder::new(SampleRate::Hz48000, Channels::Mono, Application::Voip)
|
||||
.map_err(|e| CodecError::EncodeFailed(format!("opus encoder init: {e}")))?;
|
||||
// opusic-c argument order: (Channels, SampleRate, Application)
|
||||
// — different from audiopus's (SampleRate, Channels, Application).
|
||||
let encoder = Encoder::new(Channels::Mono, SampleRate::Hz48000, Application::Voip)
|
||||
.map_err(|e| CodecError::EncodeFailed(format!("opus encoder init: {e:?}")))?;
|
||||
|
||||
let legacy_fec_mode = read_legacy_fec_env();
|
||||
if legacy_fec_mode {
|
||||
warn!(
|
||||
"AUDIO_USE_LEGACY_FEC active — reverting Opus encoder to Phase 0 \
|
||||
behavior (inband FEC Mode1, no DRED)"
|
||||
);
|
||||
}
|
||||
|
||||
let mut enc = Self {
|
||||
inner: encoder,
|
||||
codec_id: profile.codec,
|
||||
frame_duration_ms: profile.frame_duration_ms,
|
||||
legacy_fec_mode,
|
||||
};
|
||||
enc.apply_bitrate(profile.codec)?;
|
||||
enc.set_inband_fec(true);
|
||||
enc.set_dtx(true);
|
||||
|
||||
// Voice signal type hint for better compression
|
||||
// Common setup — bitrate, DTX, signal hint, complexity. These are
|
||||
// identical regardless of the protection mode below.
|
||||
enc.apply_bitrate(profile.codec)?;
|
||||
enc.set_dtx(true);
|
||||
enc.inner
|
||||
.set_signal(Signal::Voice)
|
||||
.map_err(|e| CodecError::EncodeFailed(format!("set signal: {e}")))?;
|
||||
|
||||
// Default complexity 7 — good quality/CPU trade-off for VoIP
|
||||
.map_err(|e| CodecError::EncodeFailed(format!("set signal: {e:?}")))?;
|
||||
enc.inner
|
||||
.set_complexity(7)
|
||||
.map_err(|e| CodecError::EncodeFailed(format!("set complexity: {e}")))?;
|
||||
.map_err(|e| CodecError::EncodeFailed(format!("set complexity: {e:?}")))?;
|
||||
|
||||
// Protection mode: DRED (Phase 1 default) or legacy inband FEC.
|
||||
enc.apply_protection_mode(profile.codec)?;
|
||||
|
||||
Ok(enc)
|
||||
}
|
||||
|
||||
fn apply_bitrate(&mut self, codec: CodecId) -> Result<(), CodecError> {
|
||||
let bps = codec.bitrate_bps() as i32;
|
||||
/// Configure the protection mode for the active codec.
|
||||
///
|
||||
/// In DRED mode (default): disable inband FEC, set DRED duration for the
|
||||
/// codec tier, clamp packet_loss to the 5% floor so DRED stays active.
|
||||
///
|
||||
/// In legacy mode: enable inband FEC Mode1 (Phase 0 behavior), leave
|
||||
/// DRED and packet_loss at libopus defaults.
|
||||
fn apply_protection_mode(&mut self, codec: CodecId) -> Result<(), CodecError> {
|
||||
if self.legacy_fec_mode {
|
||||
self.inner
|
||||
.set_bitrate(Bitrate::BitsPerSecond(bps))
|
||||
.map_err(|e| CodecError::EncodeFailed(format!("set bitrate: {e}")))?;
|
||||
.set_inband_fec(InbandFec::Mode1)
|
||||
.map_err(|e| CodecError::EncodeFailed(format!("set inband FEC: {e:?}")))?;
|
||||
// Leave DRED at 0 and packet_loss at default — matches Phase 0.
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// DRED path: disable the overlapping inband FEC, enable DRED with
|
||||
// per-profile duration, floor packet_loss so DRED emits.
|
||||
self.inner
|
||||
.set_inband_fec(InbandFec::Off)
|
||||
.map_err(|e| CodecError::EncodeFailed(format!("set inband FEC off: {e:?}")))?;
|
||||
|
||||
let dred_frames = dred_duration_for(codec);
|
||||
self.inner
|
||||
.set_dred_duration(dred_frames)
|
||||
.map_err(|e| CodecError::EncodeFailed(format!("set DRED duration: {e:?}")))?;
|
||||
|
||||
self.inner
|
||||
.set_packet_loss(DRED_LOSS_FLOOR_PCT)
|
||||
.map_err(|e| CodecError::EncodeFailed(format!("set packet loss floor: {e:?}")))?;
|
||||
|
||||
debug!(
|
||||
codec = ?codec,
|
||||
dred_frames,
|
||||
dred_ms = dred_frames as u32 * 10,
|
||||
loss_floor_pct = DRED_LOSS_FLOOR_PCT,
|
||||
"opus encoder: DRED enabled"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn apply_bitrate(&mut self, codec: CodecId) -> Result<(), CodecError> {
|
||||
let bps = codec.bitrate_bps();
|
||||
self.inner
|
||||
.set_bitrate(Bitrate::Value(bps))
|
||||
.map_err(|e| CodecError::EncodeFailed(format!("set bitrate: {e:?}")))?;
|
||||
debug!(bitrate_bps = bps, "opus encoder bitrate set");
|
||||
Ok(())
|
||||
}
|
||||
@@ -71,10 +212,36 @@ impl OpusEncoder {
|
||||
|
||||
/// Hint the encoder about expected packet loss percentage (0-100).
|
||||
///
|
||||
/// Higher values cause the encoder to use more redundancy to survive
|
||||
/// packet loss, at the expense of slightly higher bitrate.
|
||||
/// In DRED mode, the value is floored at `DRED_LOSS_FLOOR_PCT` so the
|
||||
/// encoder never drops DRED emission even on a perfect network. Real
|
||||
/// loss measurements from the quality adapter override upward.
|
||||
///
|
||||
/// In legacy mode, the value is passed through unchanged (min 0, max 100).
|
||||
pub fn set_expected_loss(&mut self, loss_pct: u8) {
|
||||
let _ = self.inner.set_packet_loss_perc(loss_pct.min(100));
|
||||
let clamped = if self.legacy_fec_mode {
|
||||
loss_pct.min(100)
|
||||
} else {
|
||||
loss_pct.max(DRED_LOSS_FLOOR_PCT).min(100)
|
||||
};
|
||||
let _ = self.inner.set_packet_loss(clamped);
|
||||
}
|
||||
|
||||
/// Set the DRED duration in 10 ms frame units (0 disables, max 104).
|
||||
///
|
||||
/// No-op in legacy mode. Normally driven automatically by the active
|
||||
/// quality profile via `apply_protection_mode`; this setter exists for
|
||||
/// tests and for the rare case where a caller needs to override the
|
||||
/// per-profile default.
|
||||
pub fn set_dred_duration(&mut self, frames: u8) {
|
||||
if self.legacy_fec_mode {
|
||||
return;
|
||||
}
|
||||
let _ = self.inner.set_dred_duration(frames.min(104));
|
||||
}
|
||||
|
||||
/// Test/introspection accessor: whether legacy FEC mode is active.
|
||||
pub fn is_legacy_fec_mode(&self) -> bool {
|
||||
self.legacy_fec_mode
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,10 +254,14 @@ impl AudioEncoder for OpusEncoder {
|
||||
pcm.len()
|
||||
)));
|
||||
}
|
||||
// opusic-c takes &[u16] for the sample input. Bit pattern is
|
||||
// identical to i16 — the cast is zero-cost and the encoder
|
||||
// interprets the bytes the same way as libopus internally.
|
||||
let pcm_u16: &[u16] = bytemuck::cast_slice(pcm);
|
||||
let n = self
|
||||
.inner
|
||||
.encode(pcm, out)
|
||||
.map_err(|e| CodecError::EncodeFailed(format!("opus encode: {e}")))?;
|
||||
.encode_to_slice(pcm_u16, out)
|
||||
.map_err(|e| CodecError::EncodeFailed(format!("opus encode: {e:?}")))?;
|
||||
Ok(n)
|
||||
}
|
||||
|
||||
@@ -104,6 +275,9 @@ impl AudioEncoder for OpusEncoder {
|
||||
self.codec_id = profile.codec;
|
||||
self.frame_duration_ms = profile.frame_duration_ms;
|
||||
self.apply_bitrate(profile.codec)?;
|
||||
// Refresh DRED duration for the new tier. apply_protection_mode
|
||||
// is idempotent and handles the legacy-vs-DRED branch correctly.
|
||||
self.apply_protection_mode(profile.codec)?;
|
||||
Ok(())
|
||||
}
|
||||
other => Err(CodecError::UnsupportedTransition {
|
||||
@@ -120,10 +294,190 @@ impl AudioEncoder for OpusEncoder {
|
||||
}
|
||||
|
||||
fn set_inband_fec(&mut self, enabled: bool) {
|
||||
let _ = self.inner.set_inband_fec(enabled);
|
||||
// In DRED mode, ignore external requests to re-enable inband FEC —
|
||||
// running both mechanisms wastes bitrate on overlapping protection
|
||||
// and opusic-c's own docs recommend disabling inband FEC when DRED
|
||||
// is on. Trait callers that genuinely want classical FEC should set
|
||||
// `AUDIO_USE_LEGACY_FEC=1` and re-create the encoder.
|
||||
if !self.legacy_fec_mode {
|
||||
debug!(
|
||||
enabled,
|
||||
"set_inband_fec ignored: DRED mode is active (set AUDIO_USE_LEGACY_FEC to revert)"
|
||||
);
|
||||
return;
|
||||
}
|
||||
let mode = if enabled { InbandFec::Mode1 } else { InbandFec::Off };
|
||||
let _ = self.inner.set_inband_fec(mode);
|
||||
}
|
||||
|
||||
fn set_dtx(&mut self, enabled: bool) {
|
||||
let _ = self.inner.set_dtx(enabled);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use wzp_proto::AudioDecoder;
|
||||
|
||||
/// Phase 0 acceptance gate: fail loudly if the linked libopus is not 1.5.x.
|
||||
/// DRED (Phase 1+) only exists in libopus ≥ 1.5, so running against an
|
||||
/// older version would silently regress the entire DRED integration.
|
||||
#[test]
|
||||
fn linked_libopus_is_1_5() {
|
||||
let version = opusic_c::version();
|
||||
assert!(
|
||||
version.contains("1.5"),
|
||||
"expected libopus 1.5.x, got: {version}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encoder_creates_at_good_profile() {
|
||||
let enc = OpusEncoder::new(QualityProfile::GOOD).expect("opus encoder init");
|
||||
assert_eq!(enc.codec_id, CodecId::Opus24k);
|
||||
assert_eq!(enc.frame_samples(), 960); // 20 ms @ 48 kHz
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encoder_roundtrip_silence() {
|
||||
let mut enc = OpusEncoder::new(QualityProfile::GOOD).unwrap();
|
||||
let mut dec = crate::opus_dec::OpusDecoder::new(QualityProfile::GOOD).unwrap();
|
||||
let pcm_in = vec![0i16; 960]; // 20 ms silence
|
||||
let mut encoded = vec![0u8; 512];
|
||||
let n = enc.encode(&pcm_in, &mut encoded).unwrap();
|
||||
assert!(n > 0);
|
||||
let mut pcm_out = vec![0i16; 960];
|
||||
let samples = dec.decode(&encoded[..n], &mut pcm_out).unwrap();
|
||||
assert_eq!(samples, 960);
|
||||
}
|
||||
|
||||
// ─── Phase 1 — DRED duration policy ─────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn dred_duration_for_studio_tiers_is_100ms() {
|
||||
assert_eq!(dred_duration_for(CodecId::Opus32k), 10);
|
||||
assert_eq!(dred_duration_for(CodecId::Opus48k), 10);
|
||||
assert_eq!(dred_duration_for(CodecId::Opus64k), 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dred_duration_for_normal_tiers_is_200ms() {
|
||||
assert_eq!(dred_duration_for(CodecId::Opus16k), 20);
|
||||
assert_eq!(dred_duration_for(CodecId::Opus24k), 20);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dred_duration_for_degraded_tier_is_500ms() {
|
||||
assert_eq!(dred_duration_for(CodecId::Opus6k), 50);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dred_duration_for_codec2_is_zero() {
|
||||
assert_eq!(dred_duration_for(CodecId::Codec2_3200), 0);
|
||||
assert_eq!(dred_duration_for(CodecId::Codec2_1200), 0);
|
||||
assert_eq!(dred_duration_for(CodecId::ComfortNoise), 0);
|
||||
}
|
||||
|
||||
// ─── Phase 1 — Legacy escape hatch ──────────────────────────────────────
|
||||
|
||||
/// By default (env var unset), legacy mode is off.
|
||||
///
|
||||
/// This test does NOT manipulate the environment to avoid flakiness
|
||||
/// when the full suite runs in parallel. It only asserts on a freshly
|
||||
/// created encoder in the ambient environment.
|
||||
#[test]
|
||||
fn default_mode_is_dred_not_legacy() {
|
||||
// SAFETY: only run if the ambient env hasn't set the var externally.
|
||||
if std::env::var(LEGACY_FEC_ENV).is_ok() {
|
||||
return; // don't assert — someone set the env for a reason.
|
||||
}
|
||||
let enc = OpusEncoder::new(QualityProfile::GOOD).unwrap();
|
||||
assert!(!enc.is_legacy_fec_mode());
|
||||
}
|
||||
|
||||
// ─── Phase 1 — Behavioral regression: roundtrip still works ─────────────
|
||||
|
||||
#[test]
|
||||
fn dred_mode_roundtrip_voice_pattern() {
|
||||
// Use a realistic voice-like input (sine wave at speech frequencies)
|
||||
// so the encoder emits meaningful DRED data rather than trivially
|
||||
// compressible silence.
|
||||
let mut enc = OpusEncoder::new(QualityProfile::GOOD).unwrap();
|
||||
let mut dec = crate::opus_dec::OpusDecoder::new(QualityProfile::GOOD).unwrap();
|
||||
|
||||
let mut total_encoded_bytes = 0usize;
|
||||
// Run 50 frames (1 second) so DRED fills up and starts emitting.
|
||||
for frame_idx in 0..50 {
|
||||
let pcm_in: Vec<i16> = (0..960)
|
||||
.map(|i| {
|
||||
let t = (frame_idx * 960 + i) as f64 / 48_000.0;
|
||||
(8000.0 * (2.0 * std::f64::consts::PI * 300.0 * t).sin()) as i16
|
||||
})
|
||||
.collect();
|
||||
let mut encoded = vec![0u8; 512];
|
||||
let n = enc.encode(&pcm_in, &mut encoded).unwrap();
|
||||
assert!(n > 0);
|
||||
total_encoded_bytes += n;
|
||||
|
||||
let mut pcm_out = vec![0i16; 960];
|
||||
let samples = dec.decode(&encoded[..n], &mut pcm_out).unwrap();
|
||||
assert_eq!(samples, 960);
|
||||
}
|
||||
|
||||
// Effective bitrate after 1 second of encoding.
|
||||
// Opus 24k base + ~1 kbps DRED ≈ 25 kbps ≈ 3125 bytes/sec.
|
||||
// Allow generous headroom (2000 lower bound, 8000 upper bound) —
|
||||
// this is a behavioral regression check, not a tight bitrate assertion.
|
||||
// The exact value is printed with --nocapture for diagnostic use.
|
||||
eprintln!(
|
||||
"[phase1 bitrate probe] legacy_fec_mode={} total_encoded={} bytes/sec",
|
||||
enc.is_legacy_fec_mode(),
|
||||
total_encoded_bytes
|
||||
);
|
||||
assert!(
|
||||
total_encoded_bytes > 2000,
|
||||
"encoder output too small: {total_encoded_bytes} bytes/sec (DRED likely not emitting)"
|
||||
);
|
||||
assert!(
|
||||
total_encoded_bytes < 8000,
|
||||
"encoder output too large: {total_encoded_bytes} bytes/sec"
|
||||
);
|
||||
}
|
||||
|
||||
// ─── Phase 1 — set_profile updates DRED duration on tier switch ─────────
|
||||
|
||||
#[test]
|
||||
fn profile_switch_refreshes_dred_duration() {
|
||||
// Start on GOOD (Opus 24k, DRED 20 frames), switch to DEGRADED
|
||||
// (Opus 6k, DRED 50 frames). The encoder should accept both profile
|
||||
// changes without error. We can't directly observe the DRED duration
|
||||
// inside libopus, but apply_protection_mode returns Ok for both.
|
||||
let mut enc = OpusEncoder::new(QualityProfile::GOOD).unwrap();
|
||||
assert_eq!(enc.codec_id, CodecId::Opus24k);
|
||||
|
||||
enc.set_profile(QualityProfile::DEGRADED).unwrap();
|
||||
assert_eq!(enc.codec_id, CodecId::Opus6k);
|
||||
|
||||
enc.set_profile(QualityProfile::STUDIO_64K).unwrap();
|
||||
assert_eq!(enc.codec_id, CodecId::Opus64k);
|
||||
}
|
||||
|
||||
// ─── Phase 1 — Trait set_inband_fec is a no-op in DRED mode ─────────────
|
||||
|
||||
#[test]
|
||||
fn set_inband_fec_noop_in_dred_mode() {
|
||||
if std::env::var(LEGACY_FEC_ENV).is_ok() {
|
||||
return;
|
||||
}
|
||||
let mut enc = OpusEncoder::new(QualityProfile::GOOD).unwrap();
|
||||
// Should not error, should not re-enable inband FEC internally.
|
||||
enc.set_inband_fec(true);
|
||||
// We can't directly query libopus's inband FEC state through opusic-c,
|
||||
// but the call must not panic and the encoder must still work.
|
||||
let pcm_in = vec![0i16; 960];
|
||||
let mut encoded = vec![0u8; 512];
|
||||
let n = enc.encode(&pcm_in, &mut encoded).unwrap();
|
||||
assert!(n > 0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -584,6 +584,26 @@ pub enum SignalMessage {
|
||||
recommended_profile: crate::QualityProfile,
|
||||
},
|
||||
|
||||
/// Phase 4 telemetry: loss-recovery counts for the current session.
|
||||
/// Sent periodically from receivers to the relay so Prometheus metrics
|
||||
/// can distinguish DRED reconstructions from classical PLC invocations.
|
||||
/// Fields default to 0 on old receivers (`#[serde(default)]`), so
|
||||
/// introducing this variant is backward-compatible with pre-Phase-4
|
||||
/// relays — they'll just log "unknown signal variant" on receipt.
|
||||
LossRecoveryUpdate {
|
||||
/// Total frames reconstructed via DRED since call start (monotonic).
|
||||
#[serde(default)]
|
||||
dred_reconstructions: u64,
|
||||
/// Total frames filled via classical Opus/Codec2 PLC since call
|
||||
/// start (monotonic).
|
||||
#[serde(default)]
|
||||
classical_plc_invocations: u64,
|
||||
/// Total frames decoded since call start. Used by the relay to
|
||||
/// compute recovery rates as a fraction of total frames.
|
||||
#[serde(default)]
|
||||
frames_decoded: u64,
|
||||
},
|
||||
|
||||
/// Connection keepalive / RTT measurement.
|
||||
Ping { timestamp_ms: u64 },
|
||||
Pong { timestamp_ms: u64 },
|
||||
|
||||
@@ -29,6 +29,9 @@ pub struct RelayMetrics {
|
||||
pub session_rtt_ms: GaugeVec,
|
||||
pub session_underruns: IntCounterVec,
|
||||
pub session_overruns: IntCounterVec,
|
||||
// Phase 4: loss-recovery breakdown per session.
|
||||
pub session_dred_reconstructions: IntCounterVec,
|
||||
pub session_classical_plc: IntCounterVec,
|
||||
registry: Registry,
|
||||
}
|
||||
|
||||
@@ -130,6 +133,23 @@ impl RelayMetrics {
|
||||
)
|
||||
.expect("metric");
|
||||
|
||||
let session_dred_reconstructions = IntCounterVec::new(
|
||||
Opts::new(
|
||||
"wzp_relay_session_dred_reconstructions_total",
|
||||
"Frames reconstructed via DRED (Deep REDundancy) per session",
|
||||
),
|
||||
&["session_id"],
|
||||
)
|
||||
.expect("metric");
|
||||
let session_classical_plc = IntCounterVec::new(
|
||||
Opts::new(
|
||||
"wzp_relay_session_classical_plc_total",
|
||||
"Frames filled via classical Opus/Codec2 PLC per session",
|
||||
),
|
||||
&["session_id"],
|
||||
)
|
||||
.expect("metric");
|
||||
|
||||
registry.register(Box::new(active_sessions.clone())).expect("register");
|
||||
registry.register(Box::new(active_rooms.clone())).expect("register");
|
||||
registry.register(Box::new(packets_forwarded.clone())).expect("register");
|
||||
@@ -147,6 +167,8 @@ impl RelayMetrics {
|
||||
registry.register(Box::new(session_rtt_ms.clone())).expect("register");
|
||||
registry.register(Box::new(session_underruns.clone())).expect("register");
|
||||
registry.register(Box::new(session_overruns.clone())).expect("register");
|
||||
registry.register(Box::new(session_dred_reconstructions.clone())).expect("register");
|
||||
registry.register(Box::new(session_classical_plc.clone())).expect("register");
|
||||
|
||||
Self {
|
||||
active_sessions,
|
||||
@@ -166,6 +188,8 @@ impl RelayMetrics {
|
||||
session_rtt_ms,
|
||||
session_underruns,
|
||||
session_overruns,
|
||||
session_dred_reconstructions,
|
||||
session_classical_plc,
|
||||
registry,
|
||||
}
|
||||
}
|
||||
@@ -217,6 +241,39 @@ impl RelayMetrics {
|
||||
}
|
||||
}
|
||||
|
||||
/// Phase 4: update per-session loss-recovery counters from a client's
|
||||
/// `LossRecoveryUpdate` signal message. The client sends monotonic
|
||||
/// totals (frames reconstructed since call start); we compute the
|
||||
/// delta against the current Prometheus counter and increment by it.
|
||||
/// IntCounterVec only increases, so a client restart that resets the
|
||||
/// counter to 0 simply produces no delta until the new totals exceed
|
||||
/// the Prometheus state.
|
||||
pub fn update_session_loss_recovery(
|
||||
&self,
|
||||
session_id: &str,
|
||||
dred_reconstructions: u64,
|
||||
classical_plc: u64,
|
||||
) {
|
||||
let cur_dred = self
|
||||
.session_dred_reconstructions
|
||||
.with_label_values(&[session_id])
|
||||
.get();
|
||||
if dred_reconstructions > cur_dred {
|
||||
self.session_dred_reconstructions
|
||||
.with_label_values(&[session_id])
|
||||
.inc_by(dred_reconstructions - cur_dred);
|
||||
}
|
||||
let cur_plc = self
|
||||
.session_classical_plc
|
||||
.with_label_values(&[session_id])
|
||||
.get();
|
||||
if classical_plc > cur_plc {
|
||||
self.session_classical_plc
|
||||
.with_label_values(&[session_id])
|
||||
.inc_by(classical_plc - cur_plc);
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove all per-session label values for a disconnected session.
|
||||
pub fn remove_session_metrics(&self, session_id: &str) {
|
||||
let _ = self.session_buffer_depth.remove_label_values(&[session_id]);
|
||||
@@ -224,6 +281,10 @@ impl RelayMetrics {
|
||||
let _ = self.session_rtt_ms.remove_label_values(&[session_id]);
|
||||
let _ = self.session_underruns.remove_label_values(&[session_id]);
|
||||
let _ = self.session_overruns.remove_label_values(&[session_id]);
|
||||
let _ = self
|
||||
.session_dred_reconstructions
|
||||
.remove_label_values(&[session_id]);
|
||||
let _ = self.session_classical_plc.remove_label_values(&[session_id]);
|
||||
}
|
||||
|
||||
/// Get a reference to the underlying Prometheus registry.
|
||||
@@ -418,10 +479,13 @@ mod tests {
|
||||
};
|
||||
m.update_session_quality("sess-cleanup", &report);
|
||||
m.update_session_buffer("sess-cleanup", 42, 3, 1);
|
||||
m.update_session_loss_recovery("sess-cleanup", 17, 4);
|
||||
|
||||
// Verify they appear
|
||||
let output = m.metrics_handler();
|
||||
assert!(output.contains("sess-cleanup"));
|
||||
assert!(output.contains("wzp_relay_session_dred_reconstructions_total"));
|
||||
assert!(output.contains("wzp_relay_session_classical_plc_total"));
|
||||
|
||||
// Remove and verify they are gone
|
||||
m.remove_session_metrics("sess-cleanup");
|
||||
@@ -429,6 +493,55 @@ mod tests {
|
||||
assert!(!output.contains("sess-cleanup"));
|
||||
}
|
||||
|
||||
/// Phase 4: LossRecoveryUpdate → per-session counters, monotonic delta
|
||||
/// application.
|
||||
#[test]
|
||||
fn session_loss_recovery_monotonic_delta() {
|
||||
let m = RelayMetrics::new();
|
||||
let sess = "sess-dred";
|
||||
|
||||
// First update: 10 DRED, 2 PLC
|
||||
m.update_session_loss_recovery(sess, 10, 2);
|
||||
let dred1 = m
|
||||
.session_dred_reconstructions
|
||||
.with_label_values(&[sess])
|
||||
.get();
|
||||
let plc1 = m.session_classical_plc.with_label_values(&[sess]).get();
|
||||
assert_eq!(dred1, 10);
|
||||
assert_eq!(plc1, 2);
|
||||
|
||||
// Second update: 25 DRED, 5 PLC — counter advances by (15, 3)
|
||||
m.update_session_loss_recovery(sess, 25, 5);
|
||||
let dred2 = m
|
||||
.session_dred_reconstructions
|
||||
.with_label_values(&[sess])
|
||||
.get();
|
||||
let plc2 = m.session_classical_plc.with_label_values(&[sess]).get();
|
||||
assert_eq!(dred2, 25);
|
||||
assert_eq!(plc2, 5);
|
||||
|
||||
// Third update with LOWER values (e.g., client reset) — counters
|
||||
// hold steady, no decrement.
|
||||
m.update_session_loss_recovery(sess, 5, 1);
|
||||
let dred3 = m
|
||||
.session_dred_reconstructions
|
||||
.with_label_values(&[sess])
|
||||
.get();
|
||||
let plc3 = m.session_classical_plc.with_label_values(&[sess]).get();
|
||||
assert_eq!(dred3, 25, "counter must not decrease");
|
||||
assert_eq!(plc3, 5, "counter must not decrease");
|
||||
|
||||
// Fourth update: client caught up and exceeded the old max.
|
||||
m.update_session_loss_recovery(sess, 30, 8);
|
||||
let dred4 = m
|
||||
.session_dred_reconstructions
|
||||
.with_label_values(&[sess])
|
||||
.get();
|
||||
let plc4 = m.session_classical_plc.with_label_values(&[sess]).get();
|
||||
assert_eq!(dred4, 30);
|
||||
assert_eq!(plc4, 8);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn metrics_increment() {
|
||||
let m = RelayMetrics::new();
|
||||
|
||||
@@ -625,3 +625,123 @@ curl -s http://relay-host:9090/metrics | grep wzp_relay_active_sessions
|
||||
# Check federation probe health
|
||||
curl -s http://relay-host:9090/metrics | grep wzp_probe_up
|
||||
```
|
||||
|
||||
## Build Pipelines
|
||||
|
||||
All production artifacts (Android APK, Linux x86_64 binaries, Windows `.exe`) are built on **SepehrHomeserverdk** using Docker, not on developer workstations. The pipelines are fire-and-forget: a local script invokes a `tmux` session on the remote, the build runs in a Docker container, and the artifact is uploaded to `paste.dk.manko.yoga` (rustypaste) with a notification sent to `ntfy.sh/wzp` on start and completion.
|
||||
|
||||
### Docker images
|
||||
|
||||
Two long-lived images live on the remote:
|
||||
|
||||
| Image | Used by | Base | Key contents |
|
||||
|---|---|---|---|
|
||||
| `wzp-android-builder` | Android APK (Tauri mobile + legacy Kotlin), Linux x86_64 relay/CLI | Debian bookworm | Rust stable with Android targets, cargo-ndk, NDK 26.1, Android SDK (API 34 + 35 + 36), JDK 17, Gradle 8.5, Node.js 20, cmake, ninja, tauri-cli 2.x |
|
||||
| `wzp-windows-builder` | Windows x86_64 `.exe` | Debian bookworm | Rust stable with `x86_64-pc-windows-msvc` target, cargo-xwin (with pre-warmed MSVC CRT + Windows SDK cache), Node.js 20, cmake, ninja, clang, lld, nasm |
|
||||
|
||||
Both images are rebuilt rarely — once the base toolchain is stable, rebuilds are only needed to pick up new dependencies or security patches.
|
||||
|
||||
**Rebuilding an image** (fire-and-forget, ~10 min on a warm base):
|
||||
|
||||
```bash
|
||||
# Windows
|
||||
./scripts/build-windows-docker.sh --image-build
|
||||
|
||||
# Android (upload and rebuild handled by the Android build script itself — see
|
||||
# its --image-build flag or equivalent)
|
||||
```
|
||||
|
||||
The `--image-build` flag uploads the local Dockerfile to the remote, kicks off `docker build` under `nohup`, and returns immediately. Monitor with:
|
||||
|
||||
```bash
|
||||
ssh SepehrHomeserverdk 'tail -f /tmp/wzp-windows-image-build.log'
|
||||
```
|
||||
|
||||
### Pipeline: Android APK (Tauri Mobile)
|
||||
|
||||
```bash
|
||||
./scripts/build-tauri-android.sh # Full: pull + build + upload + notify
|
||||
./scripts/build-tauri-android.sh --no-pull # Skip git fetch
|
||||
./scripts/build-tauri-android.sh --clean # Force-clean Rust target
|
||||
```
|
||||
|
||||
- **Branch**: `android-rewrite`
|
||||
- **Image**: `wzp-android-builder`
|
||||
- **Build command**: `cargo tauri android build --release`
|
||||
- **Output**: `wzp-release.apk` → uploaded to rustypaste
|
||||
- **Notifications**: start + completion to `ntfy.sh/wzp`
|
||||
- **Remote artifact path**: `/mnt/storage/manBuilder/data/cache-android/target/…/release/app-release.apk`
|
||||
|
||||
### Pipeline: Linux x86_64 (relay + CLI + bench + web)
|
||||
|
||||
```bash
|
||||
./scripts/build-linux-docker.sh # Fire-and-forget
|
||||
./scripts/build-linux-docker.sh --no-pull # Skip git fetch
|
||||
./scripts/build-linux-docker.sh --clean # Force-clean target
|
||||
./scripts/build-linux-docker.sh --install # Wait for completion and download locally
|
||||
```
|
||||
|
||||
- **Branch**: `feat/android-voip-client` (script default — override by editing the script or passing an env var)
|
||||
- **Image**: `wzp-android-builder` (shared, not a separate Linux-only image)
|
||||
- **Targets built**: `wzp-relay`, `wzp-client`, `wzp-client-audio` (with `--features audio`), `wzp-web`, `wzp-bench`
|
||||
- **Output**: `wzp-linux-x86_64.tar.gz` with all five binaries → uploaded to rustypaste
|
||||
- **Local landing dir** (with `--install`): `target/linux-x86_64/`
|
||||
|
||||
### Pipeline: Windows x86_64 (`wzp-desktop.exe`)
|
||||
|
||||
```bash
|
||||
./scripts/build-windows-docker.sh # Full: pull + build + download locally
|
||||
./scripts/build-windows-docker.sh --no-pull # Skip git fetch
|
||||
./scripts/build-windows-docker.sh --rust # Force-clean target-windows cache
|
||||
./scripts/build-windows-docker.sh --image-build # Rebuild the Docker image (fire-and-forget)
|
||||
```
|
||||
|
||||
- **Branch**: `feat/desktop-audio-rewrite`
|
||||
- **Image**: `wzp-windows-builder`
|
||||
- **Build command**: `cargo xwin build --release --target x86_64-pc-windows-msvc --bin wzp-desktop`
|
||||
- **Output**: `wzp-desktop.exe` (~16 MB) → downloaded to `target/windows-exe/wzp-desktop.exe`, also uploaded to rustypaste
|
||||
- **Target cache volume**: `target-windows` (separate from the Android target cache to avoid triple cross-contamination)
|
||||
- **Shared cache volumes**: `cargo-registry`, `cargo-git` (shared with Android — both pipelines pull the same crates)
|
||||
|
||||
**A/B-preserving workflow** for testing audio backends: rename the prior `.exe` before re-running the build, so both coexist:
|
||||
|
||||
```bash
|
||||
# Preserve prior build as the noAEC baseline
|
||||
mv target/windows-exe/wzp-desktop.exe target/windows-exe/wzp-desktop-noAEC.exe
|
||||
./scripts/build-windows-docker.sh
|
||||
ls -la target/windows-exe/
|
||||
# wzp-desktop-noAEC.exe (previous build)
|
||||
# wzp-desktop.exe (new build)
|
||||
```
|
||||
|
||||
### Alternative pipeline: Windows via Hetzner Cloud VPS
|
||||
|
||||
For situations where Docker image rebuilds would be disruptive, or for one-shot debug builds on a clean machine:
|
||||
|
||||
```bash
|
||||
./scripts/build-windows-cloud.sh # Full: create VM → build → download → destroy
|
||||
./scripts/build-windows-cloud.sh --prepare # Create VM + install deps, don't build
|
||||
./scripts/build-windows-cloud.sh --build # Build on existing VM
|
||||
./scripts/build-windows-cloud.sh --transfer # Download .exe from existing VM
|
||||
./scripts/build-windows-cloud.sh --destroy # Delete the VM
|
||||
WZP_KEEP_VM=1 ./scripts/build-windows-cloud.sh # Don't auto-destroy after successful build
|
||||
```
|
||||
|
||||
- **Provider**: Hetzner Cloud
|
||||
- **Default server type**: `cx33` (8 GB RAM, 8 vCPU — `cx23` with 4 GB OOMs on the tauri+rustls cross-compile)
|
||||
- **Image**: `ubuntu-24.04`
|
||||
- **SSH key**: must be named `wz` in Hetzner and loaded in the local ssh-agent
|
||||
- **Reminder**: set `WZP_KEEP_VM=1` for multi-build sessions, then **remember to `--destroy` at end of day** so the VM isn't left running overnight. This is tracked in the auto-memory as `feedback_keep_windows_builder_vm.md`.
|
||||
|
||||
### Notifications
|
||||
|
||||
All pipelines post to `https://ntfy.sh/wzp`. Subscribe from your phone via the [ntfy.sh app](https://ntfy.sh/) to get push notifications on build start/success/failure. Messages include the short git hash and the rustypaste URL on success:
|
||||
|
||||
```
|
||||
WZP Windows build OK [03a80a3] (16M)
|
||||
https://paste.dk.manko.yoga/<uuid>/wzp-desktop.exe
|
||||
```
|
||||
|
||||
### Rustypaste credentials
|
||||
|
||||
Build pipelines read `rusty_address` and `rusty_auth_token` from the `.env` file at `/mnt/storage/manBuilder/.env` on SepehrHomeserverdk. Local scripts that upload directly (`build-windows-cloud.sh` when run in `--transfer` mode) read from `~/.wzp/rustypaste.env` with the same variable names. Both files must be kept in sync manually if rotated.
|
||||
|
||||
@@ -872,3 +872,71 @@ warzonePhone/
|
||||
| wzp-relay | 40 + 4 integration | Room ACL, session mgmt, metrics, probes, mesh, trunking |
|
||||
| wzp-client | 30 + 2 integration | Encoder/decoder, quality adapter, silence, drift, sweep |
|
||||
| wzp-web | 2 | Metrics |
|
||||
|
||||
## Audio Backend Architecture (Platform Matrix)
|
||||
|
||||
WarzonePhone's audio I/O goes through one of four backends depending on the target platform and feature flags. All backends expose the same public API (`AudioCapture::start() → AudioCapture { ring(), stop() }`) via conditional re-exports in `crates/wzp-client/src/lib.rs`, so the `CallEngine` above the audio layer doesn't know or care which backend is running.
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────┐
|
||||
│ CallEngine (platform-agnostic) │
|
||||
│ reads PCM from AudioCapture::ring() │
|
||||
│ writes PCM to AudioPlayback::ring() │
|
||||
└────────────────────┬────────────────────────┘
|
||||
│
|
||||
┌─────────────────────┼─────────────────────┐
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
┌───────────────┐ ┌────────────────┐ ┌───────────────┐
|
||||
│ audio_io │ │ audio_vpio │ │ audio_wasapi │
|
||||
│ (CPAL) │ │ (Core Audio │ │ (Windows │
|
||||
│ │ │ VoiceProc IO) │ │ IAudioClient2│
|
||||
│ All platforms │ │ macOS only │ │ Windows │
|
||||
│ (baseline) │ │ feature=vpio │ │ feature= │
|
||||
│ │ │ │ │ windows-aec │
|
||||
└───────────────┘ └────────────────┘ └───────────────┘
|
||||
│
|
||||
▼ on Android only
|
||||
┌───────────────┐
|
||||
│ wzp-native │
|
||||
│ (Oboe bridge │
|
||||
│ via dlopen) │
|
||||
│ │
|
||||
│ Android only │
|
||||
│ libloading │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
### Backend selection matrix
|
||||
|
||||
| Platform | Capture | Playback | OS AEC | Feature flags |
|
||||
|---|---|---|---|---|
|
||||
| macOS | VoiceProcessingIO (native Core Audio) | CPAL | **Yes** — Apple's hardware-accelerated AEC (same AEC as FaceTime, iMessage audio, Voice Memos) | `audio`, `vpio` |
|
||||
| Windows (AEC build) | Direct WASAPI with `AudioCategory_Communications` | CPAL | **Yes** — Windows routes the capture stream through the driver's communications APO chain (AEC + NS + AGC), driver-dependent quality | `audio`, `windows-aec` |
|
||||
| Windows (baseline) | CPAL (WASAPI shared mode) | CPAL | No | `audio` |
|
||||
| Linux | CPAL (ALSA / PulseAudio) | CPAL | No | `audio` |
|
||||
| Android (Tauri Mobile) | Oboe via `wzp-native` cdylib, `Usage::VoiceCommunication` + `MODE_IN_COMMUNICATION` | Same Oboe stream | Depends on device (some Android devices apply AEC to the voice-communication stream, most do not) | none (`wzp-client` compiled with `default-features = false`) |
|
||||
|
||||
### Why `wzp-native` is a standalone cdylib
|
||||
|
||||
On Android, the audio backend lives in a separate cdylib crate (`crates/wzp-native`) that `wzp-desktop`'s lib crate loads at runtime via `libloading`. It is **not** linked as a regular Rust dep.
|
||||
|
||||
This is deliberate. rust-lang/rust#104707 documents that a crate with `crate-type = ["cdylib", "staticlib"]` leaks non-exported symbols from the staticlib into the cdylib. On Android, that caused Bionic's private `__init_tcb` / `pthread_create` symbols to be bound LOCALLY inside our `.so` instead of resolved dynamically against `libc.so` at `dlopen` time — which crashed the app at launch as soon as `tao` tried to `std::thread::spawn()` from the JNI `onCreate` callback.
|
||||
|
||||
Keeping `wzp-native` in its own cdylib and loading it via `libloading` means:
|
||||
|
||||
1. The app's own `.so` has `crate-type = ["cdylib", "rlib"]` only — no `staticlib`, no symbol leak.
|
||||
2. `libwzp_native.so` is loaded via `System.loadLibrary` from the JVM side (or `dlopen` from Rust), which triggers the normal Bionic resolver and binds all private symbols against `libc.so` at load time.
|
||||
3. The C/C++ Oboe bridge is fully isolated inside `libwzp_native.so`'s symbol space — no chance of its archives leaking into `wzp-desktop`'s `.so`.
|
||||
|
||||
See `docs/BRANCH-android-rewrite.md` for the full incident postmortem and `docs/incident-tauri-android-init-tcb.md` for the debug log.
|
||||
|
||||
### Vendored `audiopus_sys` for libopus / clang-cl cross-compile
|
||||
|
||||
The workspace root carries a vendored copy of `audiopus_sys` at `vendor/audiopus_sys/` with a patched `opus/CMakeLists.txt`. This is needed because libopus 1.3.1 gates its per-file `-msse4.1` / `-mssse3` `COMPILE_FLAGS` behind `if(NOT MSVC)`, and under `clang-cl` (used by `cargo-xwin` for Windows cross-compiles) CMake sets `MSVC=1` unconditionally — so the SIMD source files compile without the required target feature and fail to link the intrinsic `always_inline` functions.
|
||||
|
||||
The patch introduces an `MSVC_CL` variable that is true only for real `cl.exe` (distinguished via `CMAKE_C_COMPILER_ID STREQUAL "MSVC"`), and flips the eight `if(NOT MSVC)` SIMD guards to `if(NOT MSVC_CL)` so clang-cl gets the GCC-style per-file flags. Wired in via `[patch.crates-io] audiopus_sys = { path = "vendor/audiopus_sys" }` at the workspace root.
|
||||
|
||||
This does not affect macOS or Linux builds — on those platforms `MSVC=0` everywhere so the patched logic behaves identically to upstream.
|
||||
|
||||
Upstream tracking: xiph/opus#256, xiph/opus PR #257 (both stale).
|
||||
|
||||
139
docs/BRANCH-android-rewrite.md
Normal file
139
docs/BRANCH-android-rewrite.md
Normal file
@@ -0,0 +1,139 @@
|
||||
# Branch: `android-rewrite`
|
||||
|
||||
Pivot away from the legacy Kotlin + JNI Android client to a pure-Rust **Tauri 2.x Mobile** app that shares the same frontend and backend code as the desktop client.
|
||||
|
||||
## Why this branch exists
|
||||
|
||||
The Kotlin + JNI stack was a crash factory. Every failure mode we hit was at the Kotlin ↔ Rust boundary, and each fix uncovered the next layer of the onion:
|
||||
|
||||
| Symptom | Root cause | Fix |
|
||||
|---|---|---|
|
||||
| App crashed on launch before `onCreate` returned | `__init_tcb` / `pthread_create` bionic private symbols leaking out of `libwzp_android.so` because the Rust crate used `crate-type = ["cdylib", "staticlib"]`. rust-lang/rust#104707 documents that staticlib alongside cdylib leaks non-exported symbols from the staticlib into the cdylib, and Bionic's private internal pthread symbols got bound LOCALLY inside our `.so` instead of resolved against `libc.so` at `dlopen` time | Dropped `staticlib` from the crate-type list. `crate-type = ["cdylib", "rlib"]` only. |
|
||||
| Stack overflow on `place_call` | `Dispatchers.IO` threads have a ~512 KB stack, too small for the Rust signal-connect path that does TLS handshake + quinn setup inside one closure | Launched JNI calls from a dedicated `java.lang.Thread` with an explicit 8 MB stack |
|
||||
| `ring` / `libcrypto` TLS reuse crash on second call | tokio runtime got dropped between calls, but `ring` keeps a TLS-stored SSL context that is invalidated when the runtime thread is reused by a new runtime — `ring` sees stale context and segfaults | Single long-lived tokio runtime for the entire signal client lifetime; split `start()` into an inline `connect+register` path and a `run()` path on a separate thread to avoid the `thread::spawn` closure's stack overflow |
|
||||
| Null dereference on register with fresh install | Identity seed file empty when it existed-but-was-blank, Rust side deref'd the zero-length slice | Generate seed if empty on register |
|
||||
|
||||
Every fix kept the app limping along but the fundamental design problem remained: **state management was split across a Kotlin ViewModel and a Rust engine, with a hand-rolled JNI bridge in between that had to be perfect to not crash**. The working desktop Tauri client (with the same Rust backend) had none of these problems because it spoke to the Rust code via in-process `invoke()` from a WebView, not JNI.
|
||||
|
||||
So: rewrite the Android app as a **Tauri 2.x Mobile app**, reusing the entire desktop codebase verbatim (`main.ts`, `style.css`, `index.html`, `main.rs`, `engine.rs` — everything). Tauri Mobile added Android support in v2, it's production-ready, and it eliminates the JNI boundary entirely.
|
||||
|
||||
The incident postmortem lives at [`docs/incident-tauri-android-init-tcb.md`](incident-tauri-android-init-tcb.md).
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────┐
|
||||
│ Tauri 2.x Mobile │
|
||||
│ │
|
||||
│ Android WebView ────────── HTML/JS/CSS │ ← Shared with desktop
|
||||
│ │ (main.ts) │
|
||||
│ │ │
|
||||
│ invoke() ─────────────── Rust Commands │ ← Shared with desktop
|
||||
│ (main.rs) │
|
||||
│ │ │
|
||||
│ ┌───────────────┼────────────┐ │
|
||||
│ │ │ │ │
|
||||
│ SignalMgr CallEngine Identity │ ← Shared crates
|
||||
│ (signal_hub) (wzp-client) (wzp-crypto)│
|
||||
│ │ │ │
|
||||
│ │ │ │
|
||||
│ ▼ ▼ │
|
||||
│ QUIC to relay Oboe audio (Android) │
|
||||
│ via wzp-native cdylib │
|
||||
└─────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**What is reused from desktop verbatim** (zero rewrite):
|
||||
|
||||
- `desktop/src/main.ts` — entire frontend
|
||||
- `desktop/src/style.css` — all styling
|
||||
- `desktop/src/identicon.ts` — identicon rendering
|
||||
- `desktop/index.html` — HTML structure
|
||||
- `desktop/src-tauri/src/main.rs` — all Tauri commands (`connect`, `disconnect`, `register_signal`, `place_call`, …)
|
||||
- `desktop/src-tauri/src/engine.rs` — `CallEngine` wrapper
|
||||
|
||||
**What is Android-specific**:
|
||||
|
||||
- `desktop/src-tauri/src/android_audio.rs` — JVM-side audio routing (`AudioManager.setSpeakerphoneOn` for earpiece/speaker toggle). Runs from Tauri's existing JNI context — no hand-rolled bridge, Tauri owns the JVM hookup.
|
||||
- `desktop/src-tauri/src/wzp_native.rs` — runtime `dlopen` of `libwzp_native.so`, a standalone cdylib crate (`crates/wzp-native`) that owns all C++ (Oboe bridge). Kept in its own crate so its C/C++ static archives never get statically linked into `wzp-desktop`'s `.so`, which would re-trigger the `__init_tcb` / pthread leak.
|
||||
- `crates/wzp-native/` — the standalone C++/Oboe bridge cdylib. Loaded via `libloading` at runtime from `wzp_native.rs`. Provides capture + playout streams using Oboe's `Usage::VoiceCommunication` + `MODE_IN_COMMUNICATION` combo.
|
||||
- Android-specific target dependencies in `desktop/src-tauri/Cargo.toml` (`jni`, `ndk-context`, `libloading`) — no CPAL, no VPIO.
|
||||
|
||||
## Key architectural decisions
|
||||
|
||||
### 1. `wzp-native` as a standalone cdylib loaded via `libloading`
|
||||
|
||||
The alternative — linking `wzp-native` as a regular Rust dep with C++ static archives — would cause the same `__init_tcb` crash that killed the Kotlin version. By making `wzp-native` its own cdylib and `dlopen`-ing it at runtime, Bionic's `libc.so` resolves every symbol at load time the way it's supposed to, and no private TCB symbols leak.
|
||||
|
||||
### 2. `crate-type = ["cdylib", "rlib"]` only (no `staticlib`)
|
||||
|
||||
Same reason. The `rlib` output is needed so the `wzp-desktop` binary target can link against the library; `cdylib` is needed for Android's `System.loadLibrary`; `staticlib` would reintroduce the symbol-leak bug.
|
||||
|
||||
### 3. Oboe audio config
|
||||
|
||||
`Usage::VoiceCommunication` + Java-side `MODE_IN_COMMUNICATION`. **Never** call `setAudioApi(AAudio)` explicitly — on some devices (Nothing Phone in particular) it causes Oboe to open the wrong stream type and audio goes silent. Let Oboe pick the audio API automatically. This is documented in the auto-memory `project_tauri_android_audio.md`.
|
||||
|
||||
### 4. Speaker/earpiece toggle uses `tokio::task::spawn_blocking`
|
||||
|
||||
Oboe's `stop()` + `start()` cycle is synchronous and can block for 50–200 ms. Calling it on the tokio executor stalls every other async task (including the QUIC datagram loop), dropping audio packets. Wrapping the toggle in `spawn_blocking` isolates it to a dedicated thread pool. Fixed in commit `76a4c53`.
|
||||
|
||||
## Build pipeline
|
||||
|
||||
Docker on SepehrHomeserverdk, same pattern as the Android legacy pipeline and the Windows pipeline:
|
||||
|
||||
```
|
||||
./scripts/build-tauri-android.sh # Full: pull + build + ntfy + rustypaste
|
||||
./scripts/build-tauri-android.sh --pull # Explicit git pull (default)
|
||||
./scripts/build-tauri-android.sh --clean # Blow away the Rust target cache
|
||||
```
|
||||
|
||||
**Image**: `wzp-android-builder` (shared with the legacy Kotlin pipeline). The Dockerfile was extended to install Node.js 20 LTS, Android API level 36, build-tools 35.0.0, tauri-cli 2.x, and all four Android Rust targets on top of the legacy NDK 26.1 + cargo-ndk + Gradle setup. Both pipelines coexist in the same image.
|
||||
|
||||
**Output**: `wzp-release.apk` uploaded to rustypaste, URL delivered via `ntfy.sh/wzp`.
|
||||
|
||||
## Known quirks (Tauri Mobile specific)
|
||||
|
||||
1. **tauri-cli `android init` writes absolute paths** into `gradle.properties` for the NDK path. Those paths are local to wherever `android init` was run, so they break any cross-machine build unless overridden with `ANDROID_NDK_HOME` at build time. The build script exports `ANDROID_NDK_HOME` explicitly to work around this.
|
||||
|
||||
2. **API 36 vs API 34 coexistence**: the legacy Kotlin pipeline targets API 34, Tauri Mobile 2.x wants compileSdk 36. The shared Docker image installs both SDK levels so neither pipeline needs to reinstall.
|
||||
|
||||
3. **Identity seed lives in Android-specific app data dir**: `/data/data/com.wzp.phone/files/.wzp/identity` instead of `$HOME/.wzp/identity`. The shared `load_or_create_seed()` function in `desktop/src-tauri/src/lib.rs` uses Tauri's `app_data_dir()` which resolves correctly on both Android and desktop — no per-platform code needed.
|
||||
|
||||
4. **Direct calls on macOS previously hit an identity mismatch bug** — the `CallEngine` was using `$HOME/.wzp/identity` directly while `register_signal` used Tauri's `app_data_dir()`. Fixed by routing both through `load_or_create_seed()` (commit `2fd9465`). This was important for cross-platform consistency.
|
||||
|
||||
## Current state (snapshot)
|
||||
|
||||
What works:
|
||||
|
||||
- Tauri Mobile scaffold builds and runs on Android
|
||||
- Signal hub connect + register works
|
||||
- Room mode (SFU group calls) works with Oboe audio
|
||||
- Direct 1:1 calls work with full parity to desktop
|
||||
- Speaker/earpiece toggle works without stalling the audio pipeline
|
||||
- Call history, recent contacts, deregister UI all present (inherited from desktop)
|
||||
|
||||
What remains (task list refs in parens):
|
||||
|
||||
- Background service for keeping signal alive when app is backgrounded (#19)
|
||||
- Proper permission requests (microphone, notifications) on first launch (#19)
|
||||
- Incoming call notification while backgrounded (#19)
|
||||
- App icon + splash screen (#19)
|
||||
|
||||
## Testing
|
||||
|
||||
- **Build**: `./scripts/build-tauri-android.sh` — verify the APK lands on rustypaste and installs on device.
|
||||
- **Smoke test**: Install → open app → Register → Place call → Receive call. No crashes, audio flows both ways.
|
||||
- **Speaker toggle**: During a call, toggle speaker/earpiece several times in rapid succession. Audio should never stop, and the toggle should respond within ~200 ms.
|
||||
- **Stress test**: Call for 10+ minutes continuous. No memory growth, no packet loss beyond what's attributable to the network.
|
||||
|
||||
## Files of interest
|
||||
|
||||
| Path | Purpose |
|
||||
|---|---|
|
||||
| `desktop/src-tauri/src/lib.rs` | Shared Tauri commands (desktop + Android) |
|
||||
| `desktop/src-tauri/src/android_audio.rs` | JVM-side speaker/earpiece routing |
|
||||
| `desktop/src-tauri/src/wzp_native.rs` | Runtime dlopen of libwzp_native.so |
|
||||
| `crates/wzp-native/` | Standalone C++/Oboe cdylib, loaded at runtime |
|
||||
| `scripts/build-tauri-android.sh` | Remote Docker build pipeline |
|
||||
| `scripts/Dockerfile.android-builder` | Shared Android Docker image (legacy + Tauri) |
|
||||
| `docs/incident-tauri-android-init-tcb.md` | Postmortem of the Kotlin+JNI crash cascade |
|
||||
22
docs/PRD-desktop-direct-calling.md
Normal file
22
docs/PRD-desktop-direct-calling.md
Normal file
@@ -0,0 +1,22 @@
|
||||
# PRD: Desktop Direct Calling — Backport SignalManager
|
||||
|
||||
## Problem
|
||||
|
||||
The desktop Tauri app has the direct calling UI (Room/Direct Call toggle, Register, Call buttons) but the backend uses inline async code in `main.rs` instead of a proper `SignalManager`. This needs to be backported from the Android refactor.
|
||||
|
||||
## Tasks
|
||||
|
||||
1. **Create `signal_mgr.rs` for desktop** — same pattern as Android, or reuse the crate directly
|
||||
2. **Wire into Tauri commands** — `register_signal` should use `SignalManager::connect()` + `run_recv_loop()` on a dedicated thread
|
||||
3. **State polling** — `get_signal_status` should call `SignalManager::get_state_json()`
|
||||
4. **place_call / answer_call** — delegate to SignalManager methods
|
||||
5. **Merge android branch into desktop branch** — resolve the 37 desktop-only + 90 android-only commit divergence
|
||||
6. **Test** — Android calls Desktop, Desktop calls Android
|
||||
|
||||
## UI Fixes
|
||||
|
||||
1. **Default alias** — generate random name on first start (like Android does)
|
||||
2. **Default room** — change from "android" to "general"
|
||||
3. **Fingerprint display** — ensure full `xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx` format (not truncated)
|
||||
4. **Deregister button** — ability to disconnect signal channel
|
||||
5. **Call state reset** — after hangup, return to "Registered" state, not stuck on "Ringing"
|
||||
360
docs/PRD-dred-integration.md
Normal file
360
docs/PRD-dred-integration.md
Normal file
@@ -0,0 +1,360 @@
|
||||
# PRD: DRED Integration & Opus-Tier FEC Simplification
|
||||
|
||||
## Problem
|
||||
|
||||
WarzonePhone's audio loss-recovery stack is built around classical Opus + application-level RaptorQ FEC. It was the right answer when WZP was designed, but libopus 1.5 (December 2023) introduced **Deep REDundancy (DRED)** — a neural speech-recovery feature that is strictly better than classical FEC for the loss patterns VoIP calls actually experience. We are paying real latency, bitrate, and complexity costs for protection that DRED now does better and cheaper.
|
||||
|
||||
Concretely, on every Opus call today we pay:
|
||||
|
||||
- **~40–100 ms of receiver-side latency** waiting for RaptorQ block completion before decode
|
||||
- **10–20% bitrate overhead** from RaptorQ repair symbols (more on studio profiles)
|
||||
- **~20–40% codec-internal overhead** from Opus inband FEC (LBRR)
|
||||
- Classical Opus PLC on loss bursts exceeding the RaptorQ block size — which sounds robotic and gap-ridden
|
||||
|
||||
…in exchange for bit-exact recovery of isolated single-frame losses, which is perceptually indistinguishable from classical Opus PLC for 20 ms of speech. The protection is misaligned with the failure modes.
|
||||
|
||||
DRED delivers:
|
||||
|
||||
- **Zero added receive latency** — reconstruction runs only on detected loss
|
||||
- **~1 kbps flat bitrate overhead** regardless of base bitrate
|
||||
- **Plausible reconstruction of bursts up to ~1 second** — DRED's headline capability, exactly the regime RaptorQ can't touch
|
||||
- Neural PLC that sounds like continuous speech, not a gap
|
||||
|
||||
We also have a second, unrelated problem blocking adoption: our FFI crate `audiopus_sys 0.2.2` vendors **libopus 1.3**, predating DRED entirely. We cannot enable DRED without first swapping the FFI layer. The naïve choice (`opus` crate from SpaceManiac) is a trap — it depends on the same dead `audiopus_sys`. The real target is `opusic-c 1.5.5` by DoumanAsh, which vendors libopus 1.5.2 with full DRED support and documents Android NDK cross-compile.
|
||||
|
||||
This PRD covers the FFI swap, DRED enablement, the decision to **remove RaptorQ and Opus inband FEC from the Opus tiers entirely** (keeping RaptorQ only for Codec2 where DRED is N/A), and the jitter buffer refactor that the DRED lookahead/backfill pattern requires.
|
||||
|
||||
## Goals
|
||||
|
||||
- Replace `audiopus 0.3.0-rc.0` + `audiopus_sys 0.2.2` (dead upstream, libopus 1.3) with `opusic-c 1.5.5` + `opusic-sys 0.6.0` (active upstream, libopus 1.5.2)
|
||||
- Enable DRED on every Opus profile with a tiered duration policy, lower at studio bitrates and higher at degraded bitrates
|
||||
- Disable Opus inband FEC (LBRR) on all Opus profiles — opusic-c's own docs recommend this, and it overlaps DRED's job
|
||||
- Remove `wzp-fec` (RaptorQ) from the Opus tiers entirely — the latency and bitrate savings are real, and DRED strictly dominates it on speech
|
||||
- Keep RaptorQ + current FEC ratios on the Codec2 tiers unchanged — DRED is libopus-only, Codec2 has no neural equivalent
|
||||
- Refactor `wzp-transport::jitter` to a lookahead/backfill pattern that lets DRED reconstruct loss windows when the next packet arrives, instead of the current "wait for block completion or fall through to classical PLC" policy
|
||||
- Ship behind a runtime escape hatch (`AUDIO_USE_LEGACY_FEC`) for the first rollout window so we can revert to RaptorQ if DRED has surprises in real-world conditions
|
||||
|
||||
## Non-goals
|
||||
|
||||
- Changing Codec2 at all. Codec2 1200 / 3200 are outside the DRED lineage and keep their current RaptorQ protection, block sizes, and PLC path.
|
||||
- Adding new Opus bitrate tiers or changing the quality adaptation thresholds. This PRD is about the protection layer, not the bitrate ladder.
|
||||
- Enabling OSCE (Opus Speech Coding Enhancement — a separate libopus 1.5 neural post-processor that opusic-c exposes via an `osce` feature flag). Valuable, complementary, and free once opusic-c is in — but out of scope here to keep the PRD focused. Track as follow-up.
|
||||
- Video, audio-over-MoQ, or any protocol-layer changes discussed in prior conversations.
|
||||
- Touching the wzp-web / browser client. Browser Opus is a separate codepath via WebAudio / WASM libopus and is not affected by the native FFI swap.
|
||||
|
||||
## Background
|
||||
|
||||
### How the three protection mechanisms actually differ
|
||||
|
||||
| | Opus inband FEC (LBRR) | RaptorQ (wzp-fec) | DRED |
|
||||
|---|---|---|---|
|
||||
| Layer | codec-internal | application, across Opus packets | codec-internal |
|
||||
| What it sends | low-bitrate copy of the *previous* frame, embedded in every packet | fountain-code repair symbols across a block | neural-coded history of the recent past |
|
||||
| Protection horizon | 1 packet back | block duration (currently 100 ms, proposed 40 ms) | configurable, 0–1040 ms |
|
||||
| Recovery granularity | 1 frame (lower quality) | 1 frame (bit-exact) | 10 ms frames (plausible reconstruction) |
|
||||
| Latency cost | 0 ms | block duration on receive | 0 ms |
|
||||
| Bitrate cost | ~20–40% of base | `fec_ratio × base` (currently +20% GOOD, +50% DEGRADED) | ~1 kbps flat |
|
||||
| Effective loss tolerance | ~single-packet losses | up to `(repair symbols / block)` losses, cliff beyond | bursts up to the configured duration |
|
||||
| Content assumption | any Opus audio | any | speech (DRED model is speech-trained) |
|
||||
|
||||
### Why DRED dominates on the Opus tiers
|
||||
|
||||
Loss-scenario walkthrough (verified against opusic-c and libopus 1.5 docs):
|
||||
|
||||
- **1-frame loss (20 ms)**: RaptorQ recovers bit-exactly, DRED wouldn't run (classical Opus PLC is perceptually indistinguishable for single 20 ms frames). RaptorQ "wins" on paper but not on ears.
|
||||
- **2–3 frame burst (40–60 ms)**: RaptorQ at current ratio 0.2 hits its tolerance cliff. DRED handles this trivially — well within a 200 ms window.
|
||||
- **5–10 frame burst (100–200 ms)**: RaptorQ completely overwhelmed at any reasonable ratio. DRED's sweet spot.
|
||||
- **10+ frame burst (>200 ms)**: RaptorQ useless. DRED at 500–1000 ms still recovers.
|
||||
|
||||
The only scenario where RaptorQ strictly beats DRED is bit-exact recovery of isolated single-frame losses — which is perceptually irrelevant for speech. In every other scenario DRED either ties or wins.
|
||||
|
||||
### Why Codec2 keeps RaptorQ
|
||||
|
||||
DRED lives inside libopus — it does not help Codec2 at all. Codec2's classical PLC is a parametric-vocoder interpolation that produces noticeably robotic artifacts on loss. On the Codec2 tiers, RaptorQ is the only protection we have, and it should stay at current ratios (1.0 on CATASTROPHIC, 0.5 on the Codec2 3200 tier).
|
||||
|
||||
### The opusic-c / opusic-sys situation
|
||||
|
||||
- `opusic-sys 0.6.0` — FFI crate, published 2026-03-17, vendors libopus 1.5.2 via its `bundled` feature (on by default), documents Android NDK cross-compile via `ANDROID_NDK_HOME` (which our `wzp-android/build.rs` already sets). Exposes raw bindings to `opus_dred_parse`, `opus_decoder_dred_decode`, and the `OpusDRED` state struct.
|
||||
- `opusic-c 1.5.5` — high-level safe wrapper. Its **encoder** side is fine: exposes `Encoder::set_dred_duration(value: u8) -> Result<(), ErrorCode>` with range `0..=104` (each unit is 10 ms, so 0–1040 ms configurable). Also exposes `set_bitrate`, `set_inband_fec`, `set_dtx`, `set_packet_loss`, `set_signal`, `set_complexity`, `set_bandwidth`, `set_application` on the encoder.
|
||||
- **opusic-c's decoder-side DRED wrapper is NOT sufficient for our architecture.** Confirmed by reading the source of `opusic-c/src/dred.rs`:
|
||||
1. `Dred::decode_to` ignores the `dred_end` output of `opus_dred_parse` (prefixed `_dred_end`), so the caller cannot know how much DRED history a given packet actually carried.
|
||||
2. In `opus_decoder_dred_decode(decoder, dred, dred_offset, pcm, frame_size)`, the wrapper passes `frame_size` to BOTH the `dred_offset` and `frame_size` arguments. This looks like a bug — it means reconstruction always starts at offset `frame_size` into the DRED window, not at an arbitrary caller-chosen offset. Arbitrary-gap reconstruction (which we need for the lookahead/backfill pattern) requires proper offset control.
|
||||
3. `DredPacket` is owned internally by a `Dred` instance; its internal buffer is overwritten on every `decode_to` call. We cannot hold a ring of parsed DredPackets from multiple recent arrivals — which is exactly what the lookahead/backfill jitter buffer pattern requires.
|
||||
- **Decision**: use opusic-c for the encoder path (its wrapper is correct and saves work), and drop to `opusic-sys` raw FFI for the entire decoder path AND the DRED reconstruction path. Both use a single shared `DecoderHandle` so internal decoder state stays consistent. **Verified at pre-flight**: `opusic_c::Decoder.inner` is `pub(crate)`, so there is no way to reach the raw `*mut OpusDecoder` from outside opusic-c. Running two parallel decoders (one from opusic-c for audio, one from opusic-sys for DRED) would cause state drift because the DRED-only decoder wouldn't see the normal decode calls. Single unified decoder via opusic-sys is the only correct architecture.
|
||||
- **Three FFI handles required** per decode session: `opusic_c::Encoder` (encoder side, unchanged), our own `DecoderHandle` wrapping `*mut OpusDecoder` from opusic-sys (for normal decode AND for the `OpusDecoder` pointer passed to `opus_decoder_dred_decode`), and a new `DredDecoderHandle` wrapping `*mut OpusDREDDecoder` from opusic-sys (passed to `opus_dred_parse`). Note: `OpusDREDDecoder` is a **separate struct** from `OpusDecoder` in libopus 1.5 — verified from opus.h. Allocation via `opus_dred_decoder_create()` (confirm exact symbol name at Phase 3a start).
|
||||
- The `opus` crate from SpaceManiac (0.3.1, published 2026-01-03) is a trap: it depends on `audiopus_sys ^0.2.0` — the same dead FFI crate we're trying to get away from. Do not use.
|
||||
- **Follow-up (out of scope for this PRD)**: upstream the fixes to `opusic-c/src/dred.rs` (preserve `dred_end`, fix the `dred_offset` double-pass, expose `DredPacket` externally). Worth a GitHub PR once our own implementation has proven correct. Would let us eventually delete our internal FFI wrapper.
|
||||
|
||||
### Critical note from opusic-c docs
|
||||
|
||||
From the `dred` module documentation: *"The documentation recommends disabling in-band FEC and using `Application::Voip` for optimal results."* This applies to the **codec-internal** Opus inband FEC (LBRR), not our application-level RaptorQ. The two are independent layers. This PRD disables both on Opus tiers, but for different reasons — inband FEC per upstream recommendation, RaptorQ per the analysis above.
|
||||
|
||||
### The libopus 1.5 loss-percentage gating quirk
|
||||
|
||||
In libopus 1.5, both inband FEC and DRED are gated on `OPUS_SET_PACKET_LOSS_PERC` being non-zero. If the encoder thinks loss is 0%, it will not emit DRED data even when `set_dred_duration` is configured. We must plumb a meaningful loss percentage into the encoder continuously, floored at a small non-zero value so DRED stays active even when the network is perfect. Planned floor: **5%**, overridden upward by the real `QualityReport` loss value when it exceeds the floor.
|
||||
|
||||
## Solution
|
||||
|
||||
### High-level architecture change
|
||||
|
||||
**Before** (per Opus frame encode path):
|
||||
```
|
||||
PCM → AdaptiveEncoder.encode (Opus)
|
||||
→ inband FEC embedded in packet
|
||||
→ wzp-fec FEC encoder (accumulate into block, generate repair symbols)
|
||||
→ DATAGRAM out
|
||||
```
|
||||
|
||||
**Before** (per Opus frame decode path):
|
||||
```
|
||||
DATAGRAM in → wzp-fec block assembly (wait for block, recover if possible)
|
||||
→ AdaptiveDecoder.decode (Opus) / decode_lost (classical PLC)
|
||||
→ PCM
|
||||
```
|
||||
|
||||
**After** (Opus tiers):
|
||||
```
|
||||
PCM → OpusEncoder.encode (opusic-c, DRED enabled via set_dred_duration, inband FEC off)
|
||||
→ DATAGRAM out directly (no RaptorQ block)
|
||||
```
|
||||
|
||||
```
|
||||
DATAGRAM in → jitter buffer (lookahead/backfill)
|
||||
→ on frame arrival: OpusDecoder.decode
|
||||
→ on detected gap: if next packet has DRED state → dred::Dred.reconstruct(gap)
|
||||
else → OpusDecoder.decode_lost (classical PLC)
|
||||
→ PCM
|
||||
```
|
||||
|
||||
**After** (Codec2 tiers): unchanged. RaptorQ block encoding + classical Codec2 decode path stay exactly as they are today.
|
||||
|
||||
### New per-profile protection matrix
|
||||
|
||||
| Profile | Codec | Inband FEC | RaptorQ ratio | DRED duration | Total overhead |
|
||||
|---|---|---|---|---|---|
|
||||
| `STUDIO_64K` | Opus 64k | **off** | **none** | **10 frames (100 ms)** | +1 kbps |
|
||||
| `STUDIO_48K` | Opus 48k | **off** | **none** | **10 frames (100 ms)** | +1 kbps |
|
||||
| `STUDIO_32K` | Opus 32k | **off** | **none** | **10 frames (100 ms)** | +1 kbps |
|
||||
| `GOOD` | Opus 24k | **off** | **none** | **20 frames (200 ms)** | +1 kbps |
|
||||
| `NORMAL_16K` | Opus 16k | **off** | **none** | **20 frames (200 ms)** | +1 kbps |
|
||||
| `DEGRADED` | Opus 6k | **off** | **none** | **50 frames (500 ms)** | +1 kbps |
|
||||
| `CODEC2_3200` | Codec2 3200 | N/A | **0.5 (unchanged)** | N/A | +50% |
|
||||
| `CATASTROPHIC` | Codec2 1200 | N/A | **1.0 (unchanged)** | N/A | +100% |
|
||||
| `COMFORT_NOISE` | CN | — | — | — | — |
|
||||
|
||||
DRED duration rationale:
|
||||
|
||||
- **Studio tiers (100 ms)**: loss is rare on the networks where users pick studio quality. Short DRED window keeps decode-side CPU modest. Still covers multi-frame bursts that classical PLC can't touch.
|
||||
- **Normal tiers (200 ms)**: balanced baseline. Handles the common VoIP loss pattern (20–150 ms bursts from wifi roam, transient congestion).
|
||||
- **Degraded tier (500 ms)**: users on Opus 6k are by definition on a bad link. Long DRED window buys maximum burst resilience where it matters most. Still well under the 1040 ms cap.
|
||||
|
||||
### Runtime escape hatch
|
||||
|
||||
Ship with a single environment variable / settings flag: **`AUDIO_USE_LEGACY_FEC`**. When set, the entire Opus-tier path reverts to the pre-PRD behavior: RaptorQ re-enabled at the old ratios, Opus inband FEC re-enabled, DRED disabled (`set_dred_duration(0)`). This is the rollback safety valve for the first production window.
|
||||
|
||||
Escape hatch semantics:
|
||||
- Read once at `CallEncoder::new` / `CallDecoder::new` time. Call-scoped, not re-read mid-call.
|
||||
- Exposed via Android Settings UI as a hidden "Legacy FEC (debug)" toggle, and as a CLI flag `--legacy-fec` on the desktop client.
|
||||
- Logged in `DebugReporter` so we can tell which mode a call was in when diagnosing.
|
||||
- Removed entirely after 2 months of stable production with no regressions reported. Removal is a follow-up PR, not part of this PRD's scope.
|
||||
|
||||
## Detailed design
|
||||
|
||||
### Phase 0 — FFI crate swap (prerequisite, no behavior change)
|
||||
|
||||
**Files touched:**
|
||||
- `Cargo.toml` (workspace root) — replace `audiopus = "0.3.0-rc.0"` with `opusic-c = { version = "1.5.5", features = ["bundled", "dred"] }` and `opusic-sys = { version = "0.6.0", features = ["bundled"] }`. The `opusic-sys` direct dep is for the DRED decoder path below.
|
||||
- `crates/wzp-codec/Cargo.toml` — update `audiopus = { workspace = true }` to `opusic-c = { workspace = true }`, add `opusic-sys = { workspace = true }`, add `bytemuck = "1"` for the i16↔u16 slice cast.
|
||||
- `crates/wzp-codec/src/opus_enc.rs` — rewrite against opusic-c. API mapping:
|
||||
- `audiopus::coder::Encoder::new(SampleRate::Hz48000, Channels::Mono, Application::Voip)` → `opusic_c::Encoder::new(Channels::Mono, SampleRate::Hz48000, Application::Voip)` (argument order swapped)
|
||||
- `set_bitrate(Bitrate::BitsPerSecond(bps))` → `set_bitrate(Bitrate::Bits(bps))` or equivalent variant — verify at implementation time
|
||||
- `set_inband_fec(true/false)` → `set_inband_fec(InbandFec::On/Off)` (now an enum)
|
||||
- `set_packet_loss_perc(u8)` → `set_packet_loss(u8)` (method renamed)
|
||||
- `set_dtx(bool)`, `set_signal(Signal::Voice)`, `set_complexity(u8)` — names match
|
||||
- `encode(&[i16], &mut [u8])` → `encode_to_slice(&[u16], &mut [u8])` with `bytemuck::cast_slice::<i16, u16>(pcm)` at the call site
|
||||
- `crates/wzp-codec/src/opus_dec.rs` — same-style rewrite for the `Decoder` path. Note that opusic-c's decoder methods take `decode_fec: bool` as a parameter directly (not a separate ctl).
|
||||
- `vendor/audiopus_sys/` — delete the directory (only exists on `feat/desktop-audio-rewrite`, not on `android-rewrite`, so this is a no-op on the current branch but do remove the `[patch.crates-io]` block from Cargo.toml when merging back).
|
||||
|
||||
**Acceptance criteria:**
|
||||
- `cargo check --workspace` passes on Linux x86_64, macOS, and Android NDK cross-compile.
|
||||
- All existing codec unit tests in `crates/wzp-codec/src/adaptive.rs` pass unchanged. DRED is still disabled at this phase (default `set_dred_duration(0)`), so behavior is equivalent to pre-swap libopus 1.3 for call quality purposes.
|
||||
- A short real-call smoke test produces audio identical to current behavior (no audible regression).
|
||||
- `opusic_c::version()` at startup logs libopus version containing `1.5.2` — hard signal that the swap landed correctly.
|
||||
|
||||
### Phase 1 — DRED encoder enable on all Opus profiles
|
||||
|
||||
**Files touched:**
|
||||
- `crates/wzp-codec/src/opus_enc.rs`:
|
||||
- Add `fn dred_duration_for(codec: CodecId) -> u8` returning the per-profile value from the matrix above (10 / 20 / 50 frames).
|
||||
- In `OpusEncoder::new`, after the existing `set_bitrate`/`set_signal`/`set_complexity` block: call `inner.set_inband_fec(InbandFec::Off)`, then `inner.set_dred_duration(dred_duration_for(profile.codec))`, then `inner.set_packet_loss(5)` as the default floor.
|
||||
- Add `pub fn set_dred_duration(&mut self, frames: u8)` to allow the adaptive ladder to update DRED duration on profile switch.
|
||||
- In the existing `set_profile` impl, call `set_dred_duration(dred_duration_for(profile.codec))` after `apply_bitrate`.
|
||||
- `crates/wzp-codec/src/adaptive.rs`:
|
||||
- `AdaptiveEncoder::set_profile` already delegates to `self.opus.set_profile` — no changes needed. DRED update rides along.
|
||||
- `crates/wzp-client/src/call.rs` (and equivalent on `wzp-android/src/pipeline.rs`):
|
||||
- In the `QualityReport` handler (wherever we currently call `set_expected_loss` / `set_packet_loss_perc`), also ensure the loss value is floored at 5% before passing to the Opus encoder. This is a 1-line change.
|
||||
|
||||
**Acceptance criteria:**
|
||||
- Encoder produces DRED-enabled Opus packets. Verifiable via libopus's reference decoder in debug mode, or by wire capture + inspection — a DRED-bearing Opus packet has a larger `opus_packet_get_nb_frames` footprint than a non-DRED one of the same nominal bitrate.
|
||||
- Total outgoing bitrate on Opus 24k is ~25 kbps (up from ~24 kbps) — confirms ~1 kbps DRED overhead.
|
||||
- On a lossless path, decoder output is audibly identical to Phase 0.
|
||||
- Escape hatch `AUDIO_USE_LEGACY_FEC=1` cleanly reverts the DRED enable (calls `set_dred_duration(0)` and `set_inband_fec(InbandFec::On)` instead).
|
||||
|
||||
### Phase 2 — RaptorQ removal on Opus tiers
|
||||
|
||||
**Files touched:**
|
||||
- `crates/wzp-client/src/call.rs`:
|
||||
- In `CallEncoder::encode_frame` (or wherever `wzp_fec::Encoder::add_source_symbol` is called), gate the RaptorQ path on `!profile.codec.is_opus()` — Opus frames go straight to DATAGRAM emit, Codec2 frames continue through RaptorQ.
|
||||
- When a profile switch crosses the Opus↔Codec2 boundary, flush/reset the RaptorQ encoder state.
|
||||
- `crates/wzp-android/src/pipeline.rs`:
|
||||
- Mirror the same gate in the Android encode path.
|
||||
- `crates/wzp-proto/src/packet.rs`:
|
||||
- `MediaHeader.fec_block` and `fec_symbol` are still valid fields on the wire. For Opus packets we emit `fec_block = 0`, `fec_symbol = 0`, `fec_ratio_encoded = 0`. No wire format change; the receiver just sees all-zeros in the FEC fields for Opus packets and skips the FEC decoder path.
|
||||
- Bump protocol version to v1 → v2? **No** — the change is semantically backward compatible because existing RaptorQ decoders handle a zero ratio correctly (ratio 0.0 means "no repair symbols expected"). Old receivers can still decode new Opus packets; they just won't see any DRED benefit because their libopus is old. This is a property we want: the opposite (new receiver, old sender) is the more common mixed-version case during rollout and also Just Works.
|
||||
- `crates/wzp-client/src/call.rs` — `CallDecoder`:
|
||||
- Symmetric change: Opus frames bypass the RaptorQ block assembly, go straight to the decoder. Only Codec2 frames (`codec_id.is_codec2()`) feed through `wzp-fec` block decoding.
|
||||
|
||||
**Acceptance criteria:**
|
||||
- Outgoing Opus packets have `fec_ratio_encoded == 0` (verifiable with the existing wire capture tooling in `wzp-client/src/echo_test.rs`).
|
||||
- On a clean network, receiver latency (measured as encode-to-playout one-way delay) drops by ~40 ms versus Phase 1. This is the primary win and should be directly measurable with the existing telemetry.
|
||||
- Codec2 calls show no latency change and no packet-format change. Regression-test Codec2 3200 and Codec2 1200 specifically.
|
||||
- Total outgoing bitrate on Opus 24k drops from ~28.8 kbps (24k base + 0.2 RaptorQ ratio) to ~25 kbps (24k base + ~1 kbps DRED). Direct savings observable in network telemetry.
|
||||
|
||||
### Phase 3 — DRED reconstruction wrapper + jitter buffer lookahead/backfill refactor
|
||||
|
||||
This phase is larger than originally estimated because opusic-c's decoder-side DRED wrapper is unusable for our architecture (see Background). We write our own safe wrapper over `opusic-sys` raw FFI first, then plumb it through the jitter buffer.
|
||||
|
||||
**Step 3a — Safe DRED reconstruction wrapper in `wzp-codec`:**
|
||||
|
||||
New file `crates/wzp-codec/src/dred_ffi.rs`. Wraps the raw libopus 1.5 DRED API:
|
||||
|
||||
- `pub struct DredState` — owns an `OpusDRED` buffer (allocated via `opusic_sys::opus_dred_alloc` or equivalent; size is fixed at 10,592 bytes per libopus 1.5). `Clone` is intentionally NOT implemented — the state is heap-owned and non-trivial to copy.
|
||||
- `pub fn parse_from_packet(&mut self, decoder: &opusic_c::Decoder, packet: &[u8], max_dred_samples: i32) -> Result<DredParseResult, DredError>` — wraps `opus_dred_parse`, preserves the `dred_end` output (number of samples of history the packet carried), returns it in `DredParseResult { samples_available: i32, frames_available: u8 }`.
|
||||
- `pub fn reconstruct_into(&self, decoder: &mut opusic_c::Decoder, dred_offset_samples: i32, output: &mut [i16]) -> Result<usize, DredError>` — wraps `opus_decoder_dred_decode`, takes the offset explicitly, decodes `output.len()` samples starting from that offset in the DRED window.
|
||||
- All `unsafe` contained here, strict bounds checking on offsets, Rust-level panic safety. Unit tests use a reference encoder + known-good reference decoder to verify that reconstruction at specific offsets produces expected output.
|
||||
- Depends on `opusic-sys` directly and on `opusic-c::Decoder` for the decoder handle. The Decoder handle must be reachable as a raw pointer; opusic-c exposes this via an unstable internal or we wrap the pointer ourselves. **Verify at implementation time** — if opusic-c doesn't expose the raw decoder pointer safely, we create our own thin Decoder wrapper in `dred_ffi.rs` using raw opusic-sys, losing the convenience of opusic-c's decoder but keeping its encoder. This is the smaller-risk fallback.
|
||||
|
||||
New `pub trait DredReconstructor` in `wzp-codec/src/lib.rs`:
|
||||
```rust
|
||||
pub trait DredReconstructor: Send {
|
||||
/// Parse DRED state from an arriving Opus packet into `state`.
|
||||
/// Returns number of 48 kHz samples of history available, or 0 if the packet has no DRED.
|
||||
fn parse(&mut self, state: &mut DredState, packet: &[u8]) -> Result<i32, DredError>;
|
||||
|
||||
/// Reconstruct `output.len()` samples from `state`, starting at the given
|
||||
/// sample offset (measured from the end of the DRED window going backward).
|
||||
fn reconstruct(&mut self, state: &DredState, offset_samples: i32, output: &mut [i16]) -> Result<usize, DredError>;
|
||||
}
|
||||
```
|
||||
|
||||
Implement `DredReconstructor` over the `dred_ffi::DredState` + opusic-c Decoder combination. This is the clean boundary the jitter buffer will talk to.
|
||||
|
||||
**Step 3b — Jitter buffer refactor in `crates/wzp-transport/src/jitter.rs`:**
|
||||
|
||||
- Current behavior: buffer waits a fixed number of frames of jitter before emitting; on a missing slot, after a timeout it gives up and signals the decoder to run `decode_lost()` (classical Opus PLC or Codec2 PLC).
|
||||
- New behavior on Opus tiers: when a frame arrives (in-order or late), first call `DredReconstructor::parse` on it to update a rolling ring of `DredState` instances tagged with their originating sequence number. When a gap is detected (missing sequence number between last-emitted and current arrival), and the ring contains a `DredState` from a nearby packet that covers the gap's sample offset, call `DredReconstructor::reconstruct` with the correct offset to synthesize the missing frames, splice them into playout, then continue normal decode.
|
||||
- If no DRED state covers the gap (e.g., gap too far back, or every nearby packet was dropped), fall through to classical PLC exactly as today. The classical path stays intact as the ultimate fallback.
|
||||
- Codec2 packets bypass the entire DRED ring. They are not inspected for DRED state and take the unchanged classical PLC path.
|
||||
- Ring sizing: `max_dred_duration_frames` + `jitter_depth_frames` worth of `DredState` instances. At 500 ms DRED on degraded tier + 60 ms jitter depth, that's ~28 DredState instances × 10,592 bytes ≈ 300 KB. Acceptable. On studio tier with 100 ms DRED it's only ~80 KB.
|
||||
- The jitter buffer takes a `Box<dyn DredReconstructor>` at construction, passed in by the call engine. `wzp-transport` does NOT take a direct dep on `opusic-c` or `opusic-sys` — it only knows about the trait defined in `wzp-codec`.
|
||||
|
||||
**Files touched:**
|
||||
- `crates/wzp-codec/src/dred_ffi.rs` (new, ~150–300 lines)
|
||||
- `crates/wzp-codec/src/lib.rs` — expose `DredReconstructor`, `DredState`, `DredError` types
|
||||
- `crates/wzp-codec/Cargo.toml` — add `opusic-sys = { workspace = true }` as a direct dep (already done in Phase 0)
|
||||
- `crates/wzp-transport/src/jitter.rs` — lookahead/backfill refactor, DRED ring
|
||||
- `crates/wzp-transport/Cargo.toml` — add `wzp-codec = { workspace = true }` (likely already present) for the trait import
|
||||
- `crates/wzp-client/src/call.rs` — construct a `DredReconstructor` and pass into `CallDecoder`'s jitter buffer
|
||||
- `crates/wzp-android/src/pipeline.rs` — same on Android
|
||||
|
||||
**Acceptance criteria:**
|
||||
- Unit tests in `dred_ffi.rs`: round-trip a known speech waveform through an encoder with DRED enabled, parse the resulting packets, reconstruct at several different offsets, verify the reconstructed samples are within an energy/spectral threshold of the original. (Not bit-exact — DRED reconstruction is lossy by design.)
|
||||
- Synthetic loss test on the full pipeline: inject 200 ms bursts at 10% rate into a looped call, verify the DRED reconstruction rate on receiver telemetry is ≥95% of all loss events whose gaps fall within the configured DRED duration window.
|
||||
- Reconstructed audio is audibly continuous on 40–200 ms bursts — no gaps, no classical-PLC robot artifact. Verified on real voice samples (not just sine tones), and on at least two distinct speaker profiles (male, female) because DRED can have voice-dependent quality.
|
||||
- End-to-end latency metric is unchanged versus Phase 2 (no regression from adding the lookahead path). The DRED ring insertion on packet arrival must be O(1) in practice.
|
||||
- Existing `echo_test.rs` and `drift_test.rs` pass with the new jitter buffer.
|
||||
- Codec2 path uses classical PLC exclusively (no DRED invocation) because Codec2 packets don't carry DRED state. Verify by injecting loss on a Codec2 call and confirming zero DRED reconstruction telemetry events during that call.
|
||||
- `wzp-transport` has no direct dependency on `opusic-sys` or `opusic-c` in its `Cargo.toml` after the refactor — only on `wzp-codec`. Verify by grepping the Cargo.toml file.
|
||||
|
||||
### Phase 4 — Telemetry and tooling updates
|
||||
|
||||
**Files touched:**
|
||||
- `crates/wzp-proto/src/packet.rs` — `QualityReport` or equivalent telemetry message gains `dred_reconstructions: u32` as a new counter (frames reconstructed via DRED this reporting window) and `classical_plc_invocations: u32` (frames filled by Opus/Codec2 classical PLC). These are separate counters because they're different recovery mechanisms.
|
||||
- `crates/wzp-relay/src/*` — relay telemetry pipeline surfaces both counters in Prometheus metrics: `wzp_dred_reconstructions_total{call_id}`, `wzp_classical_plc_total{call_id}`.
|
||||
- `docs/grafana-dashboard.json` — new panel: "Loss recovery breakdown" stacked bar, DRED vs classical PLC vs clean decode, per call.
|
||||
- `android/app/src/main/java/com/wzp/debug/DebugReporter.kt` — surfaces `dredReconstructions` and `classicalPlc` counts in the debug report; also logs active DRED duration and whether legacy-FEC mode is engaged.
|
||||
|
||||
**Acceptance criteria:**
|
||||
- Grafana dashboard shows a clear visual distinction between DRED-recovered and classical-PLC-recovered frames across a test fleet of calls.
|
||||
- Debug report includes the active protection mode ("DRED 200 ms" / "Legacy RaptorQ") and reconstruction counts, so incidents can be classified unambiguously.
|
||||
|
||||
### Phase 5 — Escape hatch removal (follow-up, ~2 months post-ship)
|
||||
|
||||
After 2 months of stable production with no rollbacks triggered:
|
||||
- Delete `AUDIO_USE_LEGACY_FEC` handling in `opus_enc.rs` / `call.rs` / `pipeline.rs`
|
||||
- Delete the Opus-tier paths of `wzp-fec` (the crate stays for Codec2)
|
||||
- Delete the Android settings toggle and desktop CLI flag
|
||||
- Remove the `--legacy-fec` path from smoke tests
|
||||
|
||||
## Critical files to modify (summary)
|
||||
|
||||
- `Cargo.toml` (workspace) — dep swap (audiopus → opusic-c + opusic-sys)
|
||||
- `crates/wzp-codec/Cargo.toml` — dep swap + `bytemuck` for slice cast
|
||||
- `crates/wzp-codec/src/opus_enc.rs` — opusic-c rewrite + DRED enable + inband FEC off
|
||||
- `crates/wzp-codec/src/opus_dec.rs` — opusic-c rewrite
|
||||
- `crates/wzp-codec/src/dred_ffi.rs` — **new file**, safe wrapper over opusic-sys raw DRED FFI
|
||||
- `crates/wzp-codec/src/lib.rs` — expose `DredReconstructor` trait, `DredState`, `DredError`
|
||||
- `crates/wzp-codec/src/adaptive.rs` — verify profile switch carries DRED duration
|
||||
- `crates/wzp-client/src/call.rs` — Opus/Codec2 gate on RaptorQ path, loss floor, wire DredReconstructor into CallDecoder
|
||||
- `crates/wzp-android/src/pipeline.rs` — same gate, same loss floor, wire DredReconstructor
|
||||
- `crates/wzp-transport/src/jitter.rs` — lookahead/backfill refactor, DRED ring, reconstruction dispatch
|
||||
- `crates/wzp-transport/Cargo.toml` — verify it depends only on `wzp-codec`, not directly on opusic-*
|
||||
- `crates/wzp-proto/src/packet.rs` — new telemetry counters
|
||||
- `crates/wzp-relay/` — Prometheus metric exposure
|
||||
- `android/app/src/main/java/com/wzp/debug/DebugReporter.kt` — debug output
|
||||
- `docs/grafana-dashboard.json` — loss-recovery panel
|
||||
- (delete) `vendor/audiopus_sys/` on `feat/desktop-audio-rewrite` when merging back
|
||||
|
||||
## Existing utilities to reuse
|
||||
|
||||
- `wzp_codec::resample::Downsampler48to8` / `Upsampler8to48` — unchanged, only Codec2 path uses them
|
||||
- `wzp_codec::adaptive::AdaptiveEncoder` / `AdaptiveDecoder` — existing profile-switching machinery, DRED duration changes ride along
|
||||
- `wzp_codec::silence::SilenceDetector` / `ComfortNoise` — unchanged
|
||||
- `wzp_codec::agc::AutoGainControl` — unchanged, runs before encode as today
|
||||
- `wzp_fec::RaptorQFecEncoder` / decoder — unchanged, still used for Codec2 tiers
|
||||
- `wzp_client::call::QualityAdapter` — unchanged; drives profile switching, which now also reconfigures DRED duration via the existing `set_profile` path
|
||||
|
||||
## Verification
|
||||
|
||||
End-to-end testing, in order:
|
||||
|
||||
1. **Unit**: `cargo test -p wzp-codec` — Opus encode/decode round-trip at every profile, DRED enabled. Verify `version()` reports libopus 1.5.2.
|
||||
2. **Unit**: `cargo test -p wzp-transport` — jitter buffer lookahead/backfill behavior with injected loss patterns (0%, 5%, 15%, 30%, 50% loss; isolated losses, 40 ms bursts, 200 ms bursts, 500 ms bursts).
|
||||
3. **Integration**: `crates/wzp-client/src/echo_test.rs` — existing echo test must pass on all Opus profiles with <5% perceived quality regression (measure via the time-window analysis already built into `echo_test.rs`).
|
||||
4. **Integration**: `crates/wzp-client/src/drift_test.rs` — latency measurement. Must show ~40 ms reduction on Opus profiles versus pre-PRD baseline. Codec2 profiles unchanged.
|
||||
5. **Manual**: Android release build, real call over bad wifi (or a shaped network via `tc netem` on Linux). Burst losses of 200 ms should be perceptually continuous speech, not robotic gaps.
|
||||
6. **Manual**: Same call with `AUDIO_USE_LEGACY_FEC=1` — verify behavior reverts to current production behavior. This is the pre-ship rollback rehearsal.
|
||||
7. **Cross-compile**: full build matrix — Android arm64-v8a + armeabi-v7a (via `scripts/build-and-notify.sh`), macOS universal, Linux x86_64 (via `scripts/build-linux-docker.sh`). Windows cross-compile via cargo-xwin should also pass — libopus 1.5 upstream fixed the clang-cl SIMD issue that required the vendor patch on `feat/desktop-audio-rewrite`.
|
||||
8. **Telemetry smoke**: deploy to staging relay, make 10 test calls, verify Grafana's new "Loss recovery breakdown" panel shows DRED reconstruction events firing on injected loss and classical-PLC on packet-loss beyond DRED's window.
|
||||
|
||||
## Risks and mitigations
|
||||
|
||||
- **Custom DRED FFI wrapper is WZP-maintained code with no second source.** opusic-c's decoder-side DRED wrapper is insufficient (see Background), so we carry our own `dred_ffi.rs` that calls `opus_dred_parse` and `opus_decoder_dred_decode` directly via opusic-sys. Bugs in this wrapper — offset arithmetic off-by-ones, lifetime errors on `OpusDRED` buffers, UB from misuse of the C API — could manifest as silent audio corruption on loss bursts, hard to diagnose. **Mitigation**: extensive unit tests in `dred_ffi.rs` using a reference encoder + reference decoder round-trip with known offsets; strict bounds checking on every `unsafe` boundary; Miri run in CI if feasible; the legacy-FEC escape hatch disables the entire DRED code path including our custom wrapper, giving us a single flag to revert any wrapper bug in production. Long-term: upstream the fixes to opusic-c (follow-up task, not blocking).
|
||||
- **opusic-c's encoder-side API and internal Decoder pointer access**. Step 3a depends on being able to call opusic-sys raw functions that take an `*mut OpusDecoder` pointer while still using opusic-c's `Decoder` for normal decode. If opusic-c doesn't expose the raw pointer cleanly, we fall back to a thin opusic-sys-direct Decoder wrapper inside `dred_ffi.rs` and lose some of opusic-c's convenience. **Mitigation**: verify at the start of Phase 3 (one afternoon of reading opusic-c source). If the clean path doesn't work, the fallback is not difficult — it's what we'd have built anyway if opusic-c didn't exist.
|
||||
- **DRED reconstruction quality varies by voice / content**. The neural model is trained on speech; edge cases (shouting, whispering, heavy accents, music-on-hold, cough, laughter) may reconstruct less cleanly than continuous speech. **Mitigation**: escape hatch ships from day one. If production telemetry shows perceptible quality regression on specific voice patterns, flip legacy mode for affected users while tuning. Also: classical Opus PLC remains as the third-tier fallback when DRED state is unavailable.
|
||||
- **Removing RaptorQ removes bit-exact recovery**. Isolated single-packet losses are now reconstructed plausibly instead of bit-exactly. **Mitigation**: as argued in Background, bit-exactness on a single 20 ms speech frame is perceptually meaningless. The assumption is "speech is the workload" — if we ever add non-speech features (music bot, ringtones over the call path, DTMF-over-audio) we revisit.
|
||||
- **libopus 1.5 DRED API stability**. **Verified at pre-flight**: opus.h in the upstream xiph/opus repository has no "experimental" marker on the DRED API declarations. The earlier characterization was incorrect. DRED shipped as a first-class feature in libopus 1.5.0 (Dec 2023) and has been iterated in 1.5.1 and 1.5.2. Google Meet and Duo ship it at scale. **Mitigation**: pin `opusic-sys` exactly (no `^` range) to ensure reproducible builds, follow upstream 1.5.x bugfixes as they land. No special stability concerns beyond normal dependency hygiene.
|
||||
- **Jitter buffer refactor is the largest code change**. Jitter bugs are notoriously subtle (off-by-one on sequence wraparound, clock drift interactions, playout starvation corner cases). **Mitigation**: keep the classical-PLC path intact as the DRED fallback, so jitter bugs degrade to "current behavior" rather than "broken audio". Write targeted unit tests for the buffer at each loss-pattern scenario before touching production paths. Consider shipping Phase 3 behind a sub-flag separate from the main escape hatch, so we can independently toggle "DRED enabled but classical jitter buffer" for bisection.
|
||||
- **Cross-compile surprises**. `opusic-sys` is actively maintained but our exact combination of Android NDK version / Docker builder environment / Windows cross-compile via cargo-xwin has not been tested by upstream. **Mitigation**: Phase 0 includes the full cross-compile matrix as an acceptance criterion. Any blockers surface before we touch loss-recovery behavior.
|
||||
- **Wire-format compatibility during rollout**. Mixed-version calls (new sender + old receiver, or vice versa) need to keep working. **Verified at pre-flight**: traced both live receive paths (`wzp-client/src/call.rs::CallDecoder::ingest` and `wzp-android/src/engine.rs` the JNI-driven engine path), and both degrade gracefully: new-sender Opus packets with `fec_ratio_encoded=0` / `fec_block=0` / `fec_symbol=0` flow through to the jitter buffer and decode normally on old receivers. The RaptorQ decoder either ignores zero-FEC packets entirely (Android pipeline.rs gates on non-zero fec_block/fec_symbol) or accumulates them harmlessly until the 2-second staleness eviction (desktop call.rs). Old-sender packets with populated RaptorQ fields are handled by new receivers via the unchanged Codec2 path (new receivers keep wzp-fec for Codec2 tiers and simply ignore RaptorQ fields on Opus packets). **No wire format version bump required.**
|
||||
- **Pre-existing desktop RaptorQ gap** (incidental finding, NOT caused by this PRD). The desktop `wzp-client/src/call.rs::CallDecoder` feeds packets into `fec_dec.add_symbol` but **never calls `fec_dec.try_decode`** — RaptorQ recovery is effectively dead code on the desktop path today. Main decode reads from the jitter buffer directly, falling through to classical Opus PLC on missing packets. The Android `engine.rs` path properly uses `try_decode` for recovery. This PRD does not fix the desktop gap — it's unrelated — but is noted here so nobody is surprised that removing RaptorQ from Opus tiers on the desktop client causes no measurable recovery regression (there was nothing to lose). Recommend filing a follow-up task to either fix or remove the vestigial desktop RaptorQ wiring independently of this work.
|
||||
- **`AUDIO_USE_LEGACY_FEC` itself becoming permanent tech debt**. Escape hatches have a way of outliving their intended lifespan. **Mitigation**: put an explicit removal date in a `// TODO(2026-06-15): remove legacy FEC path` comment at the flag-handling site. Track in taskmaster.
|
||||
|
||||
## Open questions
|
||||
|
||||
- ~~**Does opusic-c expose `opusic_c::Decoder`'s raw inner pointer?**~~ **Resolved at pre-flight**: no, it's `pub(crate)`. We build a unified `DecoderHandle` over raw opusic-sys in `dred_ffi.rs` and use it for both normal decode and DRED reconstruction. Opusic-c is used only for the encoder side.
|
||||
- **Exact opusic-sys symbol name for DRED decoder allocation**. opus.h documents the `OpusDREDDecoder` type and `opus_dred_parse`/`opus_decoder_dred_decode` functions, but the allocation function name is not in the fetched snippet. Expected to be `opus_dred_decoder_create` / `opus_dred_decoder_destroy` per libopus naming convention, but confirm at the very start of Phase 3a by reading the actual opusic-sys bindings. If the function is not exported by opusic-sys, we file a PR upstream to opusic-sys (small fix, trivially mergeable) and temporarily vendor the function declaration locally.
|
||||
- **Should the 5% loss floor be configurable per profile?** Currently specified as a constant. A future refinement might make it higher at degraded tiers and lower at studio tiers, but without real telemetry we don't know if the constant is wrong. Keep as a constant for now, revisit after 1 month of production data.
|
||||
- **OSCE enable**: opusic-c has an `osce` feature flag for Opus Speech Coding Enhancement, a separate libopus 1.5 neural post-processor. Out of scope for this PRD but should be the next audio-quality follow-up. Probably one-line enable once opusic-c is in.
|
||||
- **Upstream PR to opusic-c**: our own `dred_ffi.rs` wrapper should be proven in production first, then the fixes upstreamed to `opusic-c/src/dred.rs` (preserve `dred_end`, fix `dred_offset` double-pass, expose `DredPacket` externally). Follow-up task, not blocking this PRD.
|
||||
- **`feat/desktop-audio-rewrite` merge**: the vendored `audiopus_sys` patch on that branch becomes obsolete under this PRD. Coordinate removal with whoever owns that branch.
|
||||
@@ -457,3 +457,52 @@ Auto mode uses three tiers (Good, Degraded, Catastrophic). It does not use the S
|
||||
When you select a specific profile (not Auto), adaptive switching is disabled. The encoder stays at the selected profile regardless of network conditions. This is useful when you know your network quality and want consistent encoding, or when you want to force a specific bitrate.
|
||||
|
||||
Note: The decoder always accepts all codecs. A manual quality selection only affects what you send, not what you receive.
|
||||
|
||||
## Direct 1:1 Calling (Desktop + Android)
|
||||
|
||||
In addition to room-mode group calls, you can place direct calls to a specific peer by fingerprint. Direct calls bypass room state entirely — the relay is used purely as a signaling gateway and for media relay. There is no need for the callee to join a room beforehand; they just need to be registered with the same signal hub.
|
||||
|
||||
### UI elements in the direct-call panel
|
||||
|
||||
- **Place call field** — paste a fingerprint (the long hex string you see under your own identity) and click Call. The callee sees a ringing UI.
|
||||
- **Recent contacts row** — a horizontal strip of chips showing your most recently called/receiving peers. Click a chip to re-dial. Aliases are shown if the peer has one, otherwise a short fingerprint prefix.
|
||||
- **Call history list** — every direct call you've placed, received, or missed, with direction indicator (↗ Outgoing, ↙ Incoming, ✗ Missed), the peer's alias (if known) or fingerprint prefix, and a timestamp. Click an entry to re-dial.
|
||||
- **Deregister button** — drops your signal-hub registration without quitting the app. Useful when switching identities (e.g. testing with two accounts on one machine) or when you want to explicitly appear offline to peers.
|
||||
- **Clear history button** — wipes the call history store. Does not affect current calls.
|
||||
|
||||
### Live updates
|
||||
|
||||
The call history updates in real time across all views via Tauri events (`history-changed`). Placing, answering, or missing a call immediately refreshes the history list and the recent contacts row — no manual refresh needed.
|
||||
|
||||
### Default room
|
||||
|
||||
On first launch, the room name in the room-mode panel defaults to `general` (changed from the prior `android` default so the desktop and Android clients don't silently talk past each other). You can still change it to any room name, and the last-used room is remembered across launches.
|
||||
|
||||
### Random alias
|
||||
|
||||
New installations derive a human-friendly alias from your identity seed — something like `silent-forest-41` or `bold-river-07`. It's deterministic, so reinstalling without changing your seed gives you the same alias. The alias is shown alongside your fingerprint in the header and is what peers see in their call history when they receive your call.
|
||||
|
||||
You can override the alias in Settings → Identity if you want a specific name.
|
||||
|
||||
## Windows AEC Variants
|
||||
|
||||
The Windows desktop build ships in two variants for echo cancellation, depending on which backend you want to exercise. Both are `wzp-desktop.exe` binaries — only the internal audio backend differs.
|
||||
|
||||
| Build | File | Capture backend | AEC | When to use |
|
||||
|---|---|---|---|---|
|
||||
| **noAEC baseline** | `wzp-desktop-noAEC.exe` | CPAL (WASAPI shared mode) | None | Headphone-only use, or for A/B comparison against the AEC build |
|
||||
| **Communications AEC** | `wzp-desktop.exe` | Direct WASAPI with `AudioCategory_Communications` | **Yes** — Windows routes the capture stream through the driver's communications APO chain (AEC + noise suppression + automatic gain control) | Any speaker-mode call, laptop built-in speakers, anywhere echo is audible |
|
||||
|
||||
**Quality caveat**: the communications AEC operates at the OS level and its algorithm depends on the audio driver's installed APO chain. On modern consumer laptops with Intel Smart Sound, Dolby, recent Realtek, or Windows 11 Voice Clarity, the quality is excellent (effectively matching what Teams/Zoom deliver). On generic class-compliant USB microphones or older drivers, the communications APO may not be present at all — in that case the build behaves identically to the noAEC baseline.
|
||||
|
||||
If you hear echo on the AEC build, try these in order before escalating:
|
||||
|
||||
1. **Check which capture device is selected as "Default Device - Communications"** in Windows Sound Settings → Recording tab. Right-click any device to set it. The AEC build opens the device marked as `eCommunications`, not `eConsole`, so changing the default-communications device changes what we capture from.
|
||||
2. **Verify the driver exposes a communications APO**. Sound Settings → Recording → your mic → Properties → Advanced → look for an "Enhancements" or "Signal Enhancements" tab. If it's absent, the driver has no APOs and the AEC build effectively has no AEC.
|
||||
3. **Try the classic Voice Capture DSP build** when it ships (tracked as task #26). That uses Microsoft's bundled software AEC (`CLSID_CWMAudioAEC`) which works on every Windows machine regardless of driver.
|
||||
|
||||
### Installing the Windows builds
|
||||
|
||||
1. Windows 10: install the [WebView2 Runtime Evergreen Bootstrapper](https://developer.microsoft.com/en-us/microsoft-edge/webview2/) first. Windows 11 has it pre-installed.
|
||||
2. Copy `wzp-desktop.exe` (or `wzp-desktop-noAEC.exe`) to any directory and double-click. No installer needed.
|
||||
3. First launch creates the config + identity store at `%APPDATA%\com.wzp.phone\`.
|
||||
|
||||
431
docs/incident-tauri-android-init-tcb.md
Normal file
431
docs/incident-tauri-android-init-tcb.md
Normal file
@@ -0,0 +1,431 @@
|
||||
# Incident report — Tauri Android `__init_tcb+4` SIGSEGV
|
||||
|
||||
**Status:** Blocked. Reproducible crash with a known trigger at the cc::Build /
|
||||
rustc-link-lib layer that we cannot yet explain. Writing this report to hand
|
||||
off for external help.
|
||||
|
||||
**Project:** WarzonePhone (Rust + Tauri 2.x Mobile) Android rewrite
|
||||
**Branch:** `feat/desktop-audio-rewrite`
|
||||
**Target phone:** Pixel 6 (`oriole`), Android 16 (`BP3A.250905.014`), arm64-v8a
|
||||
**Date range of investigation:** 2026-04-09 (one working session, ~27 builds)
|
||||
|
||||
---
|
||||
|
||||
## One-paragraph summary
|
||||
|
||||
We're porting the existing CPAL-backed desktop Tauri app (`desktop/src-tauri`)
|
||||
to Tauri Mobile Android so the same Rust + Tauri + WebView codebase runs on
|
||||
both platforms. The Android `.apk` launches, renders the home screen, and
|
||||
registers on a relay for signal-only builds (no audio backend). The moment
|
||||
we add **any** `cc::Build::new().cpp(true).cpp_link_stdlib("c++_shared")`
|
||||
call to `build.rs` — even with a 6-line cpp file that just returns 42 and is
|
||||
never called from Rust — the built `.so` crashes at launch inside
|
||||
`__init_tcb(bionic_tcb*, pthread_internal_t*)+4` via `pthread_create` via
|
||||
`std::thread::spawn` via `tao::ndk_glue::create` via
|
||||
`Java_com_wzp_desktop_WryActivity_create`, before our Rust entry point has
|
||||
a chance to run. The exact same NDK, exact same Rust toolchain, exact same
|
||||
Docker image is used by the legacy `wzp-android` crate (via `cargo-ndk`)
|
||||
which compiles Oboe and runs fine on the same phone.
|
||||
|
||||
---
|
||||
|
||||
## Environment
|
||||
|
||||
**Docker build image:** `wzp-android-builder` (Dockerfile at
|
||||
`scripts/Dockerfile.android-builder`)
|
||||
|
||||
- Base: `debian:bookworm`
|
||||
- JDK 17
|
||||
- Android SDK:
|
||||
- cmdline-tools latest
|
||||
- `platforms;android-34`, `platforms;android-36`
|
||||
- `build-tools;34.0.0`, `build-tools;35.0.0`
|
||||
- `ndk;26.1.10909125` (last stable before scudo/MTE crash on NDK r27+)
|
||||
- `platform-tools`
|
||||
- Node.js 20 LTS
|
||||
- Rust stable `1.94.1 (e408947bf 2026-03-25)`
|
||||
- Rust android targets: `aarch64-linux-android`, `armv7-linux-androideabi`,
|
||||
`i686-linux-android`, `x86_64-linux-android`
|
||||
- `cargo-ndk` + `cargo tauri-cli 2.10.1` (latest 2.x)
|
||||
|
||||
**Host:** Docker on `SepehrHomeserverdk` (remote build server).
|
||||
|
||||
**Phone:** Pixel 6, Android 16, kernel 6.1.134-android14-11, on the same LAN
|
||||
as the build machine and a local `wzp-relay` binary.
|
||||
|
||||
**Tauri crate:** `desktop/src-tauri/` in the workspace at the root of the
|
||||
repo. Depends on `tauri = "2"`, `tauri-plugin-shell = "2"`, `tokio`, `rustls`,
|
||||
`wzp-proto`, `wzp-codec`, `wzp-fec`, `wzp-crypto`, `wzp-transport`, and (on
|
||||
non-Android only) `wzp-client` with `features = ["audio", "vpio"]`. The
|
||||
crate's `[lib]` section is:
|
||||
|
||||
```toml
|
||||
[lib]
|
||||
name = "wzp_desktop_lib"
|
||||
crate-type = ["staticlib", "cdylib", "rlib"]
|
||||
```
|
||||
|
||||
The crate produces `libwzp_desktop_lib.so` which is `System.loadLibrary`'d by
|
||||
Tauri's generated `WryActivity.onCreate` via JNI.
|
||||
|
||||
---
|
||||
|
||||
## The crash
|
||||
|
||||
Every failing build produces the same stack at launch, same pc offsets:
|
||||
|
||||
```
|
||||
signal 11 (SIGSEGV), code 2 (SEGV_ACCERR), fault addr 0x00000072XXXXXX00f (write)
|
||||
|
||||
#00 pc 000000000130cc74 libwzp_desktop_lib.so (__init_tcb(bionic_tcb*, pthread_internal_t*)+4)
|
||||
#01 pc 0000000001331cf0 libwzp_desktop_lib.so (pthread_create+360)
|
||||
#02 pc 00000000012bee04 libwzp_desktop_lib.so (std::sys::thread::unix::Thread::new::h87be8e9feeaaaf84+184)
|
||||
#03 pc 0000000000e37f5c libwzp_desktop_lib.so (std::thread::lifecycle::spawn_unchecked::h941f828f9a95150d+1504)
|
||||
#04 pc 0000000000e461e8 libwzp_desktop_lib.so (std::thread::builder::Builder::spawn_unchecked::hec5f087680cb0248+112)
|
||||
#05 pc 0000000000e441c8 libwzp_desktop_lib.so (std::thread::functions::spawn::ha3d3fbf2d9fe53e3+108)
|
||||
#06 pc ... libwzp_desktop_lib.so (tao::platform_impl::platform::ndk_glue::create::h254c68662718841a+1792)
|
||||
#07 pc ... libwzp_desktop_lib.so (Java_com_wzp_desktop_WryActivity_create+76)
|
||||
```
|
||||
|
||||
The offsets are **byte-identical across every failing build**, even when the
|
||||
cpp content changes drastically (cf. `cpp_smoke.cpp` at 6 lines, 20 lines,
|
||||
200+ Oboe source files). We believe this is because cargo caches the Rust
|
||||
compilation unit and only the build-script artifacts differ, and the final
|
||||
link produces the same layout.
|
||||
|
||||
`__init_tcb` is defined locally inside our `.so` with C++ mangling:
|
||||
|
||||
```
|
||||
_Z10__init_tcbP10bionic_tcbP18pthread_internal_t
|
||||
```
|
||||
|
||||
It originates from bionic's `pthread_create.cpp`, which got pulled in
|
||||
statically from the NDK's `sysroot/usr/lib/aarch64-linux-android/libc.a`.
|
||||
Both failing and known-good (legacy `wzp_android.so`) builds contain this
|
||||
same static symbol — the presence of the symbol is not the problem.
|
||||
|
||||
Fault address `0x72XXXXXX00f` with code `SEGV_ACCERR` (access permission
|
||||
error, write). Aligned to `+4` inside `__init_tcb`, which is typically a
|
||||
store into the passed-in `bionic_tcb*`. The pointer is either NULL-ish or
|
||||
pointing into read-only memory.
|
||||
|
||||
---
|
||||
|
||||
## Bisection (the important part)
|
||||
|
||||
We started from a known-good commit (`5309938`) where the Tauri Android app
|
||||
launches, registers on a relay, and behaves identically to the desktop app
|
||||
modulo audio. Then we added features **one variable at a time**:
|
||||
|
||||
| Step | Commit | Change vs previous | Result |
|
||||
|---|---|---|---|
|
||||
| Baseline | `5309938` | — | ✅ launches, renders home, registers on relay |
|
||||
| **A** | `f96d7ce` | Add `cc = "1"` build-dep + compile trivial `cpp/hello.c` via `cc::Build` (C, not C++). Static lib never linked in. | ✅ |
|
||||
| **B** | `ae4f366` | Add `wzp-client` Android dep with `default-features = false` (no CPAL, no VPIO). No new imports. | ✅ |
|
||||
| **C** | `19fd3dd` | Un-cfg-gate `mod engine;` in `lib.rs` so `engine.rs` compiles on Android. `CallEngine::start()` has an Android stub returning an error. | ✅ |
|
||||
| **D** | `a852cad` | Compile `cpp/getauxval_fix.c` (legacy wzp-android shim). Still pure C. | ✅ |
|
||||
| **E** | `4250f1b` | **Compile full Oboe C++ bridge** (200+ source files from `google/oboe@1.8.1`). `cc::Build::new().cpp(true).std("c++17").cpp_link_stdlib(Some("c++_shared"))` + `-llog` + `-lOpenSLES` link directives. Nothing called from Rust yet — the `extern "C"` bridge functions are exported but never referenced from the Rust side. | ❌ **crash** |
|
||||
| E.4 | `aa240c6` | **Only change:** replace the entire Oboe compile with ONE tiny `cpp_smoke.cpp` file: `extern "C" int wzp_cpp_smoke(void) { std::lock_guard<std::mutex> lk(m); std::thread t([](){...}); t.join(); return g.load(); }`. Still `cpp(true) + cpp_link_stdlib("c++_shared")`. Drop `-llog`/`-lOpenSLES`. | ❌ **same crash, same offsets** |
|
||||
| E.2 | `0224ce6` | Shrink `cpp_smoke.cpp` further: just `std::atomic<int>` + `fetch_add`, no mutex, no thread, no includes beyond `<atomic>`. | ❌ **same crash, same offsets** |
|
||||
| E.1 | `0d74366` | **Absolute minimum:** `cpp_smoke.cpp` = `extern "C" int wzp_cpp_hello(void){return 42;}`. NO `#include`. NO STL. Just a function. Still compiled with `cpp(true) + cpp_link_stdlib("c++_shared")`. | ❌ **same crash, same offsets** |
|
||||
|
||||
### Additional confirming observations
|
||||
|
||||
1. **The cpp code is dead-stripped.** `llvm-nm -a libwzp_desktop_lib.so` shows
|
||||
zero matches for `wzp_cpp_hello`, `wzp_cpp_smoke`, or any Oboe symbol in
|
||||
builds E through E.1. The static archive (`libwzp_cpp_smoke.a` /
|
||||
`liboboe_bridge.a`) exists on disk under
|
||||
`target/aarch64-linux-android/debug/build/wzp-desktop-*/out/`, but because
|
||||
nothing in Rust ever references the exported C function, the final linker
|
||||
drops it.
|
||||
|
||||
2. **`build.rs` link directives are the real delta.** `cc::Build::new()
|
||||
.cpp(true).cpp_link_stdlib(Some("c++_shared"))` emits a
|
||||
`cargo:rustc-link-lib=c++_shared` directive that adds a `NEEDED` entry for
|
||||
`libc++_shared.so` to the final `.so`'s dynamic table. `readelf -d` on
|
||||
the crashing `.so` shows:
|
||||
|
||||
```
|
||||
NEEDED Shared library: [libc++_shared.so]
|
||||
NEEDED Shared library: [liblog.so] (only in full Oboe build)
|
||||
NEEDED Shared library: [libOpenSLES.so] (only in full Oboe build)
|
||||
```
|
||||
|
||||
The working baseline `.so` has no `NEEDED` entries beyond libc/liblog.
|
||||
|
||||
3. **Linker version doesn't matter.** We tried forcing
|
||||
`aarch64-linux-android26-clang` as the linker (API 26 has proper dynamic
|
||||
bindings to libc.so's runtime `pthread_create`/`__init_tcb`) via three
|
||||
different mechanisms:
|
||||
- `CARGO_TARGET_AARCH64_LINUX_ANDROID_LINKER` env var in `docker run`
|
||||
- `.cargo/config.toml` workspace-level linker override
|
||||
- **Binary replacement inside the image**: `mv
|
||||
aarch64-linux-android24-clang .orig` and replace with a shell script
|
||||
that `exec`s `aarch64-linux-android26-clang`. Verified by calling
|
||||
`--version` which prints `Target: aarch64-unknown-linux-android26`.
|
||||
|
||||
All three made no difference. The `__init_tcb` symbol is pulled statically
|
||||
from the **same** `libc.a` regardless of which clang wrapper is used — the
|
||||
NDK ships ONE `libc.a` at
|
||||
`sysroot/usr/lib/aarch64-linux-android/libc.a` shared across all API
|
||||
levels. Only the per-API `libc.so` symlinks change (and we're linked
|
||||
statically, not dynamically, against libc).
|
||||
|
||||
4. **Legacy `wzp-android` crate works on the same phone, same image.** Run
|
||||
in the exact same Docker container, the legacy Kotlin app's JNI library
|
||||
(`crates/wzp-android` built via `cargo ndk`) compiles a subset of the
|
||||
same Oboe code, produces a `.so` that has the same static
|
||||
`_Z10__init_tcbP...` + `pthread_create` + `pthread_create.cpp` symbols,
|
||||
and launches cleanly on the Pixel 6. Key differences between the two
|
||||
build paths:
|
||||
|
||||
| | `wzp-android` (works) | `wzp-desktop` Tauri (crashes) |
|
||||
|---|---|---|
|
||||
| Build driver | `cargo ndk -t arm64-v8a build --release -p wzp-android` | `cargo tauri android build --debug --target aarch64 --apk` |
|
||||
| Profile | release | debug (release crashes identically) |
|
||||
| Linker | `aarch64-linux-android26-clang` (via `.cargo/config.toml` which cargo-ndk honors) | `aarch64-linux-android24-clang` (tauri-cli hardcodes and ignores config; the shim redirect makes no difference) |
|
||||
| crate-type | `["cdylib", "rlib"]` | `["staticlib", "cdylib", "rlib"]` |
|
||||
| JNI entrypoint | direct Kotlin `System.loadLibrary` + our own `native fun` declarations; first `pthread_create` runs later from the tokio runtime inside a command | `WryActivity.onCreate` via Tauri's generated Java glue; first `pthread_create` runs **inside the JNI call** via `tao::ndk_glue::create` |
|
||||
| Other heavy deps | tokio, wzp-{proto,codec,fec,crypto,transport} | tokio, tauri, tauri-runtime-wry, tao, wry, webview2-com, soup3, webkit2gtk (all platform-specific ones cfg-gated out of android), and also all of the above |
|
||||
| Binary size | `libwzp_android.so` ≈ 14 MB (release) | `libwzp_desktop_lib.so` ≈ 160 MB (debug), 16 MB (release) |
|
||||
|
||||
5. **The crash happens in the JNI-callback thread during `onCreate`.** Frame
|
||||
#06 `tao::platform_impl::platform::ndk_glue::create+1792` is tao's Android
|
||||
event-loop bootstrap, which Tauri calls from inside
|
||||
`Java_com_wzp_desktop_WryActivity_create` in response to the Java-side
|
||||
activity lifecycle. This means the thread spawn is happening while the
|
||||
Java VM still holds the native onCreate call, before `onCreate` has
|
||||
returned to the Android runtime. Legacy `wzp-android` never spawns a
|
||||
thread from an onCreate JNI call — it spawns threads only from
|
||||
`nativeSignalConnect`/similar commands invoked later from Kotlin button
|
||||
clicks, after the activity is fully initialised.
|
||||
|
||||
---
|
||||
|
||||
## Current suspect
|
||||
|
||||
One of the two items below, probably (2):
|
||||
|
||||
1. **The `.cpp(true)` mode in cc-rs changes something invisible in the link
|
||||
pipeline** (for example, emitting a different `-x` flag to clang, or
|
||||
changing linker driver selection). We have not yet verified this by
|
||||
diffing the actual rustc linker invocation between a working and a
|
||||
crashing build with `--verbose` + `-Clink-arg=-Wl,-t`.
|
||||
|
||||
2. **Adding `libc++_shared.so` as a NEEDED entry causes Android's dynamic
|
||||
linker to load libc++_shared.so before our `.so`'s init runs, and
|
||||
something in libc++_shared's `.init_array` interacts badly with
|
||||
tao::ndk_glue's `pthread_create` call from inside the JNI onCreate
|
||||
window**. The legacy crate doesn't hit this because (a) it has no
|
||||
NEEDED libc++_shared when built without Oboe, and (b) even when it does
|
||||
build Oboe, its thread spawns happen outside the onCreate JNI call so
|
||||
whatever libc state is wrong at that moment is already stabilised.
|
||||
|
||||
We have not yet confirmed (2) with the obvious A/B test: keep `cpp_smoke.cpp`
|
||||
but drop `.cpp_link_stdlib(Some("c++_shared"))` (and drop any manual
|
||||
`cargo:rustc-link-lib=c++_shared`) so the NEEDED entry disappears but the
|
||||
rest of the pipeline stays identical. That's the next experiment we were
|
||||
going to run, but the user reasonably asked for this report first.
|
||||
|
||||
---
|
||||
|
||||
## What we've ruled out
|
||||
|
||||
- **NDK API level** — forcing API-26 linker via three independent mechanisms
|
||||
made zero difference.
|
||||
- **Build profile** — release (`0x6b8000` offset, 21 MB unsigned APK) and
|
||||
debug (same 193 MB APK, same crash offsets) both crash identically.
|
||||
- **Oboe specifically** — replacing the Oboe compile with 6 lines of C++
|
||||
that does nothing still reproduces the crash.
|
||||
- **cpp code being executed at runtime** — dead-stripped, not in the final
|
||||
`.so` at all per `nm -a`.
|
||||
- **minSdk in build.gradle** — bumped from 24 to 26, no effect.
|
||||
- **libdl.a stub issue** — ruled out via logcat (`libdl.a is a stub --- use
|
||||
libdl.so instead` was only surfacing from our own `dlsym` shim that we
|
||||
subsequently deleted).
|
||||
- **`pthread_create` interposition via `-Wl,--wrap=pthread_create`** — tried
|
||||
and reverted; the wrap target still resolved to the broken static stub.
|
||||
- **Keystore / signing** — debug signing with persistent `~/.android/
|
||||
debug.keystore` works fine; no signature mismatch issues.
|
||||
|
||||
---
|
||||
|
||||
## The files involved
|
||||
|
||||
### `desktop/src-tauri/build.rs` (current state, E.1)
|
||||
|
||||
```rust
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
|
||||
fn main() {
|
||||
// Embedded git hash
|
||||
let git_hash = Command::new("git")
|
||||
.args(["rev-parse", "--short", "HEAD"])
|
||||
.output()
|
||||
.ok()
|
||||
.filter(|o| o.status.success())
|
||||
.and_then(|o| String::from_utf8(o.stdout).ok())
|
||||
.map(|s| s.trim().to_string())
|
||||
.unwrap_or_else(|| "unknown".into());
|
||||
println!("cargo:rustc-env=WZP_GIT_HASH={git_hash}");
|
||||
println!("cargo:rerun-if-changed=../../.git/HEAD");
|
||||
println!("cargo:rerun-if-changed=../../.git/refs/heads");
|
||||
|
||||
let target = std::env::var("TARGET").unwrap_or_default();
|
||||
if target.contains("android") {
|
||||
// Step A: plain C sanity file
|
||||
println!("cargo:rerun-if-changed=cpp/hello.c");
|
||||
cc::Build::new().file("cpp/hello.c").compile("wzp_hello");
|
||||
|
||||
// Step D: legacy getauxval shim
|
||||
println!("cargo:rerun-if-changed=cpp/getauxval_fix.c");
|
||||
cc::Build::new().file("cpp/getauxval_fix.c").compile("getauxval_fix");
|
||||
|
||||
// Step E.1: minimal C++ smoke — THIS STEP BRINGS BACK THE CRASH
|
||||
println!("cargo:rerun-if-changed=cpp/cpp_smoke.cpp");
|
||||
cc::Build::new()
|
||||
.cpp(true)
|
||||
.std("c++17")
|
||||
.cpp_link_stdlib(Some("c++_shared"))
|
||||
.file("cpp/cpp_smoke.cpp")
|
||||
.compile("wzp_cpp_smoke");
|
||||
|
||||
// Copy libc++_shared.so into gen/android jniLibs so the runtime
|
||||
// linker can find it when the NEEDED entry fires.
|
||||
if let Ok(ndk) = std::env::var("ANDROID_NDK_HOME").or_else(|_| std::env::var("NDK_HOME")) {
|
||||
let triple = "aarch64-linux-android";
|
||||
let abi = "arm64-v8a";
|
||||
let lib_dir = format!(
|
||||
"{ndk}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/{triple}"
|
||||
);
|
||||
println!("cargo:rustc-link-search=native={lib_dir}");
|
||||
let shared_so = format!("{lib_dir}/libc++_shared.so");
|
||||
if std::path::Path::new(&shared_so).exists() {
|
||||
let manifest = std::env::var("CARGO_MANIFEST_DIR").unwrap_or_default();
|
||||
let jni_dir = format!("{manifest}/gen/android/app/src/main/jniLibs/{abi}");
|
||||
if std::fs::create_dir_all(&jni_dir).is_ok() {
|
||||
let _ = std::fs::copy(&shared_so, format!("{jni_dir}/libc++_shared.so"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tauri_build::build()
|
||||
}
|
||||
```
|
||||
|
||||
### `desktop/src-tauri/cpp/cpp_smoke.cpp` (E.1)
|
||||
|
||||
```cpp
|
||||
extern "C" int wzp_cpp_hello(void) {
|
||||
return 42;
|
||||
}
|
||||
```
|
||||
|
||||
### `desktop/src-tauri/Cargo.toml` (relevant excerpts)
|
||||
|
||||
```toml
|
||||
[package]
|
||||
name = "wzp-desktop"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[lib]
|
||||
name = "wzp_desktop_lib"
|
||||
crate-type = ["staticlib", "cdylib", "rlib"]
|
||||
|
||||
[[bin]]
|
||||
name = "wzp-desktop"
|
||||
path = "src/main.rs"
|
||||
|
||||
[build-dependencies]
|
||||
tauri-build = { version = "2", features = [] }
|
||||
cc = "1"
|
||||
|
||||
[dependencies]
|
||||
tauri = { version = "2", features = [] }
|
||||
tauri-plugin-shell = "2"
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = "0.3"
|
||||
anyhow = "1"
|
||||
rustls = { version = "0.23", default-features = false, features = ["ring", "std"] }
|
||||
|
||||
wzp-proto = { path = "../../crates/wzp-proto" }
|
||||
wzp-codec = { path = "../../crates/wzp-codec" }
|
||||
wzp-fec = { path = "../../crates/wzp-fec" }
|
||||
wzp-crypto = { path = "../../crates/wzp-crypto" }
|
||||
wzp-transport = { path = "../../crates/wzp-transport" }
|
||||
|
||||
[target.'cfg(not(target_os = "android"))'.dependencies]
|
||||
wzp-client = { path = "../../crates/wzp-client", features = ["audio", "vpio"] }
|
||||
|
||||
[target.'cfg(target_os = "android")'.dependencies]
|
||||
wzp-client = { path = "../../crates/wzp-client", default-features = false }
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Reproduction
|
||||
|
||||
A fresh clone on a Linux x86_64 host with:
|
||||
|
||||
```bash
|
||||
git clone ssh://git@git.manko.yoga:222/manawenuz/wz-phone.git
|
||||
cd wz-phone
|
||||
git checkout feat/desktop-audio-rewrite
|
||||
git reset --hard 0d74366 # <-- step E.1, smallest crashing commit
|
||||
|
||||
# Need: Android NDK r26.1.10909125, JDK 17, Node 20, Rust stable, cargo tauri 2.x
|
||||
scripts/prep-linux-mint.sh # installs all the above into /opt/android-sdk etc.
|
||||
|
||||
cd desktop
|
||||
npm install
|
||||
cd src-tauri
|
||||
cargo tauri android build --debug --target aarch64 --apk
|
||||
adb install -r gen/android/app/build/outputs/apk/universal/debug/app-universal-debug.apk
|
||||
adb logcat -c && adb shell am start -n com.wzp.desktop/.MainActivity
|
||||
adb logcat | grep -E "F DEBUG|__init_tcb|pthread_create"
|
||||
```
|
||||
|
||||
Expected result: SIGSEGV at `__init_tcb+4` within ~500 ms of launch.
|
||||
|
||||
Reverting `cpp/cpp_smoke.cpp` + the `cc::Build` call for it in `build.rs`
|
||||
(one git command: `git revert 0d74366 aa240c6 0224ce6 a852cad`) restores a
|
||||
working build. Keeping the C sanity compile (`hello.c`, `getauxval_fix.c`)
|
||||
is fine — only the `.cpp(true) + .cpp_link_stdlib("c++_shared")` combination
|
||||
triggers the regression.
|
||||
|
||||
---
|
||||
|
||||
## What we'd like help with
|
||||
|
||||
1. **Is our suspect #2 actually the mechanism?** Is there a known issue
|
||||
where a Tauri/tao android cdylib crashes on load when it has a
|
||||
`libc++_shared.so` NEEDED entry and tries to spawn a thread from inside
|
||||
an onCreate JNI call?
|
||||
|
||||
2. **What's the correct way to link Oboe (or any C++ Android audio
|
||||
library) into a `cargo tauri android build` cdylib** without hitting
|
||||
this? Is there a known-good combination of cc-rs flags / linker
|
||||
arguments / cargo config?
|
||||
|
||||
3. **Is there a way to force `cargo tauri` to use the same linker setup
|
||||
as `cargo ndk`**, which reliably produces working Oboe-linked .so
|
||||
files from the exact same workspace? We've tried env var override,
|
||||
`.cargo/config.toml`, and image-level binary replacement — cargo
|
||||
tauri ignores all three and keeps using
|
||||
`aarch64-linux-android24-clang`.
|
||||
|
||||
4. **Is there a way to defer `tao::ndk_glue::create`'s thread spawn to
|
||||
after `onCreate` returns** so that whatever bionic state `__init_tcb`
|
||||
depends on is ready?
|
||||
|
||||
5. **Lastly** — is there a fundamentally different approach we should
|
||||
take (e.g., use the `oboe` Rust crate from crates.io instead of a
|
||||
hand-rolled C++ bridge, use Android's AAudio directly via the `ndk`
|
||||
crate's aaudio bindings, or even abandon the C++ audio path and
|
||||
implement mic/speaker via JNI into Java `AudioRecord`/`AudioTrack`)?
|
||||
@@ -1,11 +1,16 @@
|
||||
# =============================================================================
|
||||
# WZ Phone — Android build environment (Debian 12 / Bookworm)
|
||||
#
|
||||
# Matches the bare-metal build-android.sh environment:
|
||||
# Supports both:
|
||||
# 1. Legacy Kotlin+JNI Android app (via cargo-ndk + gradle)
|
||||
# 2. Tauri 2.x Mobile Android app (via tauri-cli + Node/npm)
|
||||
#
|
||||
# Toolchain:
|
||||
# - Debian 12 (cmake 3.25, no Android cross-compilation bugs)
|
||||
# - JDK 17 (Gradle 8.5 + AGP 8.2.0 compatible)
|
||||
# - NDK 26.1 (last stable before scudo/MTE crash on NDK 27+)
|
||||
# - Rust stable with aarch64-linux-android target + cargo-ndk
|
||||
# - Node.js 20 LTS (for Tauri frontend build)
|
||||
# - Rust stable with all 4 Android targets + cargo-ndk + tauri-cli 2.x
|
||||
#
|
||||
# Build: docker build -t wzp-android-builder -f Dockerfile.android-builder .
|
||||
# =============================================================================
|
||||
@@ -13,6 +18,11 @@ FROM debian:bookworm
|
||||
|
||||
ARG NDK_VERSION=26.1.10909125
|
||||
ARG ANDROID_API=34
|
||||
# Tauri 2.x mobile targets compileSdk 36 + build-tools 35 by default. Install
|
||||
# both 34 (legacy Kotlin app) and 35/36 (Tauri mobile) so the same image works
|
||||
# for both pipelines.
|
||||
ARG ANDROID_API_TAURI=36
|
||||
ARG BUILD_TOOLS_TAURI=35.0.0
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive \
|
||||
ANDROID_HOME=/opt/android-sdk \
|
||||
@@ -35,8 +45,17 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
openjdk-17-jdk-headless \
|
||||
ca-certificates \
|
||||
libasound2-dev \
|
||||
file \
|
||||
xz-utils \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# ── Node.js 20 LTS (required by Tauri for frontend build) ────────────────────
|
||||
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
|
||||
&& apt-get install -y --no-install-recommends nodejs \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& node --version \
|
||||
&& npm --version
|
||||
|
||||
# ── Android SDK + NDK 26.1 ──────────────────────────────────────────────────
|
||||
RUN mkdir -p $ANDROID_HOME/cmdline-tools \
|
||||
&& cd /tmp \
|
||||
@@ -49,10 +68,36 @@ RUN yes | $ANDROID_HOME/cmdline-tools/latest/bin/sdkmanager --licenses > /dev/nu
|
||||
&& $ANDROID_HOME/cmdline-tools/latest/bin/sdkmanager --install \
|
||||
"platforms;android-${ANDROID_API}" \
|
||||
"build-tools;${ANDROID_API}.0.0" \
|
||||
"platforms;android-${ANDROID_API_TAURI}" \
|
||||
"build-tools;${BUILD_TOOLS_TAURI}" \
|
||||
"ndk;${NDK_VERSION}" \
|
||||
"platform-tools" \
|
||||
2>&1 | grep -v '^\[' > /dev/null
|
||||
|
||||
# Work around the API-24 libc.a stub in the NDK. Any C++ static lib we
|
||||
# link into libwzp_desktop_lib.so (e.g. the Oboe audio bridge) pulls in
|
||||
# bionic's static pthread_create from API-24 libc.a via libc++_shared,
|
||||
# and that pthread_create crashes at __init_tcb+4 when called from a
|
||||
# .so loaded via dlopen (the static stub expects libc init state that
|
||||
# only exists for main executables). API-26 has the proper runtime
|
||||
# bindings. Tauri-cli hard-codes aarch64-linux-android24-clang as the
|
||||
# linker and ignores .cargo/config.toml overrides, so the only sure
|
||||
# fix is to replace the NDK's ${abi}24-clang binary itself with a
|
||||
# shim that exec()s the ${abi}26-clang equivalent. Applies to all four
|
||||
# ABIs × {clang, clang++}. The legacy wzp-android crate works without
|
||||
# this because cargo-ndk honours a crate-level linker override; the
|
||||
# shim is the minimal targeted fix for the cargo-tauri build path.
|
||||
# Added as Option 3 for the incremental Step E regression (commit 4250f1b).
|
||||
RUN set -eux; \
|
||||
BIN=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin; \
|
||||
for abi in aarch64-linux-android armv7a-linux-androideabi i686-linux-android x86_64-linux-android; do \
|
||||
for suffix in clang clang++; do \
|
||||
mv "$BIN/${abi}24-${suffix}" "$BIN/${abi}24-${suffix}.orig"; \
|
||||
printf '#!/bin/sh\nexec "%s/%s26-%s" "$@"\n' "$BIN" "$abi" "$suffix" > "$BIN/${abi}24-${suffix}"; \
|
||||
chmod +x "$BIN/${abi}24-${suffix}"; \
|
||||
done; \
|
||||
done
|
||||
|
||||
# Make SDK world-readable so builder user can access it
|
||||
RUN chmod -R a+rX $ANDROID_HOME
|
||||
|
||||
@@ -64,12 +109,22 @@ USER builder
|
||||
WORKDIR /home/builder
|
||||
|
||||
# ── Rust toolchain ───────────────────────────────────────────────────────────
|
||||
# Install all 4 Android targets (Tauri Mobile builds for all ABIs by default;
|
||||
# cargo-ndk legacy path only needs arm64-v8a — both workflows supported).
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \
|
||||
| sh -s -- -y --default-toolchain stable \
|
||||
&& . $HOME/.cargo/env \
|
||||
&& rustup target add aarch64-linux-android \
|
||||
&& cargo install cargo-ndk
|
||||
&& rustup target add \
|
||||
aarch64-linux-android \
|
||||
armv7-linux-androideabi \
|
||||
i686-linux-android \
|
||||
x86_64-linux-android \
|
||||
&& cargo install cargo-ndk \
|
||||
&& cargo install tauri-cli --version "^2.0" --locked
|
||||
|
||||
ENV PATH="/home/builder/.cargo/bin:$ANDROID_HOME/cmdline-tools/latest/bin:$ANDROID_HOME/platform-tools:$JAVA_HOME/bin:$PATH"
|
||||
|
||||
# NDK_HOME is the env var tauri-cli checks (in addition to ANDROID_NDK_HOME)
|
||||
ENV NDK_HOME=$ANDROID_NDK_HOME
|
||||
|
||||
WORKDIR /build/source
|
||||
|
||||
@@ -5,10 +5,15 @@ set -euo pipefail
|
||||
# notify via ntfy.sh/wzp. Fire and forget.
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/build-and-notify.sh Build + upload + notify
|
||||
# ./scripts/build-and-notify.sh Build current local branch
|
||||
# ./scripts/build-and-notify.sh --branch opus-DRED Build a specific branch
|
||||
# ./scripts/build-and-notify.sh --rust Force Rust rebuild
|
||||
# ./scripts/build-and-notify.sh --pull Git pull before building
|
||||
# ./scripts/build-and-notify.sh --no-pull Skip git pull (use cached source)
|
||||
# ./scripts/build-and-notify.sh --install Also download + adb install locally
|
||||
#
|
||||
# The remote builder pulls the requested branch from its `origin` (gitea:
|
||||
# git.manko.yoga). Make sure you've pushed the branch to `origin` before
|
||||
# running this script, otherwise the remote fetch will fail loudly.
|
||||
|
||||
REMOTE_HOST="SepehrHomeserverdk"
|
||||
BASE_DIR="/mnt/storage/manBuilder"
|
||||
@@ -19,14 +24,29 @@ SSH_OPTS="-o ConnectTimeout=15 -o ServerAliveInterval=15 -o ServerAliveCountMax=
|
||||
REBUILD_RUST=0
|
||||
DO_PULL=1
|
||||
DO_INSTALL=0
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
# Default to whatever branch the local workspace is on — "build what I'm
|
||||
# working on" is the intuitive behavior for iterative development.
|
||||
BRANCH=$(git -C "$(dirname "$0")/.." branch --show-current 2>/dev/null || echo "")
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--rust) REBUILD_RUST=1 ;;
|
||||
--pull) DO_PULL=1 ;;
|
||||
--no-pull) DO_PULL=0 ;;
|
||||
--install) DO_INSTALL=1 ;;
|
||||
--branch)
|
||||
shift
|
||||
BRANCH="$1"
|
||||
;;
|
||||
--branch=*) BRANCH="${1#--branch=}" ;;
|
||||
*) echo "Unknown arg: $1"; exit 1 ;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
if [ -z "$BRANCH" ]; then
|
||||
echo "ERROR: could not determine target branch (detached HEAD?). Pass --branch NAME."
|
||||
exit 1
|
||||
fi
|
||||
echo "Target branch: $BRANCH"
|
||||
|
||||
log() { echo -e "\033[1;36m>>> $*\033[0m"; }
|
||||
|
||||
@@ -42,20 +62,33 @@ BASE_DIR="/mnt/storage/manBuilder"
|
||||
NTFY_TOPIC="https://ntfy.sh/wzp"
|
||||
REBUILD_RUST="${1:-0}"
|
||||
DO_PULL="${2:-0}"
|
||||
BRANCH="${3:-}"
|
||||
|
||||
if [ -z "$BRANCH" ]; then
|
||||
echo "ERROR: remote script invoked without a BRANCH argument"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
notify() { curl -s -d "$1" "$NTFY_TOPIC" > /dev/null 2>&1 || true; }
|
||||
|
||||
trap 'notify "WZP Android build FAILED! Check /tmp/wzp-build.log"' ERR
|
||||
trap 'notify "WZP Android build FAILED [$BRANCH]! Check /tmp/wzp-build.log"' ERR
|
||||
|
||||
# Pull if requested
|
||||
# Pull the requested branch. Previously this was hardcoded to
|
||||
# feat/android-voip-client with `|| true` on the reset, which silently
|
||||
# left the tree on whatever branch it was last on when the hardcoded
|
||||
# branch didn't exist on origin. Now the branch is a parameter and any
|
||||
# failure aborts the build so nobody ships an APK from the wrong source.
|
||||
if [ "$DO_PULL" = "1" ]; then
|
||||
echo ">>> Pulling latest..."
|
||||
echo ">>> Pulling branch '$BRANCH' from origin..."
|
||||
cd "$BASE_DIR/data/source"
|
||||
git reset --hard HEAD 2>/dev/null || true
|
||||
git clean -fd 2>/dev/null || true
|
||||
git gc --prune=now 2>/dev/null || true
|
||||
git fetch origin feat/android-voip-client 2>&1 | tail -3
|
||||
git reset --hard origin/feat/android-voip-client 2>/dev/null || true
|
||||
git fetch origin "$BRANCH"
|
||||
git reset --hard "origin/$BRANCH"
|
||||
BUILT_HASH=$(git rev-parse --short HEAD)
|
||||
BUILT_SUBJECT=$(git log -1 --format=%s)
|
||||
echo ">>> HEAD after pull: $BUILT_HASH — $BUILT_SUBJECT"
|
||||
fi
|
||||
|
||||
# Clean Rust if requested
|
||||
@@ -73,7 +106,7 @@ find "$BASE_DIR/data/source" "$BASE_DIR/data/cache" \
|
||||
rm -rf "$BASE_DIR/data/source/android/app/src/main/jniLibs/arm64-v8a"
|
||||
|
||||
GIT_HASH=$(cd $BASE_DIR/data/source && git rev-parse --short HEAD 2>/dev/null || echo unknown)
|
||||
notify "WZP Android build started [$GIT_HASH]..."
|
||||
notify "WZP Android build started [$BRANCH @ $GIT_HASH]..."
|
||||
|
||||
echo ">>> Building in Docker..."
|
||||
docker run --rm --user 1000:1000 \
|
||||
@@ -106,7 +139,7 @@ ls -lh android/app/src/main/jniLibs/arm64-v8a/
|
||||
|
||||
echo ">>> APK build..."
|
||||
cd android && chmod +x gradlew
|
||||
./gradlew clean assembleDebug --no-daemon --warning-mode=none 2>&1 | tail -3
|
||||
./gradlew clean assembleDebug --no-daemon --warning-mode=none 2>&1 | tail -50
|
||||
echo "APK_BUILT"
|
||||
'
|
||||
|
||||
@@ -117,10 +150,10 @@ APK=$(find "$BASE_DIR/data/source/android" -name "app-debug*.apk" -path "*/outpu
|
||||
if [ -n "$APK" ]; then
|
||||
URL=$(curl -s -F "file=@$APK" -H "Authorization: $rusty_auth_token" "$rusty_address")
|
||||
echo "UPLOAD_URL=$URL"
|
||||
notify "WZP Android [$GIT_HASH] done! APK: $URL"
|
||||
notify "WZP Android [$BRANCH @ $GIT_HASH] done! APK: $URL"
|
||||
echo ">>> Done! APK at: $URL"
|
||||
else
|
||||
notify "WZP build FAILED - no APK"
|
||||
notify "WZP build FAILED [$BRANCH @ $GIT_HASH] - no APK"
|
||||
echo "ERROR: No APK found"
|
||||
exit 1
|
||||
fi
|
||||
@@ -129,9 +162,9 @@ REMOTE_SCRIPT
|
||||
ssh_cmd "chmod +x /tmp/wzp-docker-build.sh"
|
||||
|
||||
# Run in tmux
|
||||
log "Starting build in tmux..."
|
||||
log "Starting build in tmux (branch: $BRANCH)..."
|
||||
ssh_cmd "tmux kill-session -t wzp-build 2>/dev/null; true"
|
||||
ssh_cmd "tmux new-session -d -s wzp-build '/tmp/wzp-docker-build.sh $REBUILD_RUST $DO_PULL 2>&1 | tee /tmp/wzp-build.log'"
|
||||
ssh_cmd "tmux new-session -d -s wzp-build '/tmp/wzp-docker-build.sh $REBUILD_RUST $DO_PULL $BRANCH 2>&1 | tee /tmp/wzp-build.log'"
|
||||
|
||||
log "Build running! You'll get a notification on ntfy.sh/wzp with the download URL."
|
||||
echo ""
|
||||
|
||||
253
scripts/build-tauri-android.sh
Executable file
253
scripts/build-tauri-android.sh
Executable file
@@ -0,0 +1,253 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# =============================================================================
|
||||
# WZ Phone — Tauri 2.x Mobile Android APK build
|
||||
#
|
||||
# Builds the desktop/ Tauri app as an Android APK via cargo-tauri inside the
|
||||
# wzp-android-builder Docker image on SepehrHomeserverdk. Uploads the APK to
|
||||
# rustypaste, fires ntfy.sh/wzp notifications at start + finish, and SCPs the
|
||||
# APK back locally.
|
||||
#
|
||||
# Same pattern as build-and-notify.sh but for the Tauri mobile pipeline:
|
||||
# - Source: desktop/src-tauri/ (not android/)
|
||||
# - Build: cargo tauri android build (not gradlew assembleDebug)
|
||||
# - Output: desktop/src-tauri/gen/android/.../*.apk
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/build-tauri-android.sh # full pipeline (debug)
|
||||
# ./scripts/build-tauri-android.sh --release # release APK
|
||||
# ./scripts/build-tauri-android.sh --no-pull # skip git fetch
|
||||
# ./scripts/build-tauri-android.sh --rust # force-clean rust target
|
||||
# ./scripts/build-tauri-android.sh --init # also run `cargo tauri android init`
|
||||
#
|
||||
# Environment:
|
||||
# WZP_BRANCH Branch to build (default: feat/desktop-audio-rewrite)
|
||||
# =============================================================================
|
||||
|
||||
REMOTE_HOST="SepehrHomeserverdk"
|
||||
BASE_DIR="/mnt/storage/manBuilder"
|
||||
NTFY_TOPIC="https://ntfy.sh/wzp"
|
||||
LOCAL_OUTPUT="target/tauri-android-apk"
|
||||
BRANCH="${WZP_BRANCH:-feat/desktop-audio-rewrite}"
|
||||
SSH_OPTS="-o ConnectTimeout=15 -o ServerAliveInterval=15 -o ServerAliveCountMax=4 -o LogLevel=ERROR"
|
||||
|
||||
REBUILD_RUST=0
|
||||
DO_PULL=1
|
||||
DO_INIT=0
|
||||
BUILD_RELEASE=0
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--rust) REBUILD_RUST=1 ;;
|
||||
--pull) DO_PULL=1 ;;
|
||||
--no-pull) DO_PULL=0 ;;
|
||||
--init) DO_INIT=1 ;;
|
||||
--release) BUILD_RELEASE=1 ;;
|
||||
-h|--help)
|
||||
sed -n '3,30p' "$0"
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
log() { echo -e "\033[1;36m>>> $*\033[0m"; }
|
||||
ssh_cmd() { ssh -A $SSH_OPTS "$REMOTE_HOST" "$@"; }
|
||||
|
||||
notify_local() { curl -s -d "$1" "$NTFY_TOPIC" > /dev/null 2>&1 || true; }
|
||||
|
||||
mkdir -p "$LOCAL_OUTPUT"
|
||||
|
||||
log "Uploading remote build script..."
|
||||
ssh_cmd "cat > /tmp/wzp-tauri-build.sh" <<'REMOTE_SCRIPT'
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
BASE_DIR="/mnt/storage/manBuilder"
|
||||
NTFY_TOPIC="https://ntfy.sh/wzp"
|
||||
BRANCH="${1:-feat/desktop-audio-rewrite}"
|
||||
DO_PULL="${2:-1}"
|
||||
REBUILD_RUST="${3:-0}"
|
||||
DO_INIT="${4:-0}"
|
||||
BUILD_RELEASE="${5:-0}"
|
||||
|
||||
LOG_FILE=/tmp/wzp-tauri-build.log
|
||||
GIT_HASH="unknown" # populated after fetch
|
||||
ENV_FILE="$BASE_DIR/.env"
|
||||
|
||||
notify() { curl -s -d "$1" "$NTFY_TOPIC" > /dev/null 2>&1 || true; }
|
||||
|
||||
# Upload a file to rustypaste; print URL on stdout (or empty on failure).
|
||||
upload_to_rustypaste() {
|
||||
local file="$1"
|
||||
[ ! -f "$ENV_FILE" ] && { echo ""; return; }
|
||||
# shellcheck disable=SC1090
|
||||
source "$ENV_FILE"
|
||||
if [ -n "${rusty_address:-}" ] && [ -n "${rusty_auth_token:-}" ]; then
|
||||
curl -s -F "file=@$file" -H "Authorization: $rusty_auth_token" "$rusty_address" || echo ""
|
||||
else
|
||||
echo ""
|
||||
fi
|
||||
}
|
||||
|
||||
# On failure: upload the build log to rustypaste, then notify with hash + url.
|
||||
on_error() {
|
||||
local line="$1"
|
||||
local log_url
|
||||
log_url=$(upload_to_rustypaste "$LOG_FILE" || echo "")
|
||||
if [ -n "$log_url" ]; then
|
||||
notify "WZP Tauri Android build FAILED [$GIT_HASH] (line $line)
|
||||
log: $log_url"
|
||||
else
|
||||
notify "WZP Tauri Android build FAILED [$GIT_HASH] (line $line) — log upload failed, see $LOG_FILE on remote"
|
||||
fi
|
||||
}
|
||||
trap 'on_error $LINENO' ERR
|
||||
|
||||
exec > >(tee "$LOG_FILE") 2>&1
|
||||
|
||||
if [ "$DO_PULL" = "1" ]; then
|
||||
echo ">>> git fetch + reset $BRANCH"
|
||||
cd "$BASE_DIR/data/source"
|
||||
git reset --hard HEAD 2>/dev/null || true
|
||||
# NOTE: deliberately do NOT run `git clean -fd` here. It would wipe the
|
||||
# tauri-generated `desktop/src-tauri/gen/android/` scaffold (gradlew,
|
||||
# settings.gradle, etc.) which is expensive to recreate and breaks
|
||||
# subsequent builds with "gradlew not found".
|
||||
git gc --prune=now 2>/dev/null || true
|
||||
git fetch origin "$BRANCH" 2>&1 | tail -3
|
||||
git checkout "$BRANCH" 2>/dev/null || git checkout -b "$BRANCH" "origin/$BRANCH"
|
||||
git reset --hard "origin/$BRANCH"
|
||||
git submodule update --init || true
|
||||
fi
|
||||
|
||||
GIT_HASH=$(cd "$BASE_DIR/data/source" && git rev-parse --short HEAD 2>/dev/null || echo unknown)
|
||||
GIT_MSG=$(cd "$BASE_DIR/data/source" && git log -1 --pretty=%s 2>/dev/null | head -c 60 || echo "?")
|
||||
notify "WZP Tauri Android build STARTED [$GIT_HASH] — $GIT_MSG"
|
||||
|
||||
# Fix perms so uid 1000 can write
|
||||
find "$BASE_DIR/data/source" "$BASE_DIR/data/cache" \
|
||||
! -user 1000 -o ! -group 1000 2>/dev/null | \
|
||||
xargs -r chown 1000:1000 2>/dev/null || true
|
||||
|
||||
# Optionally clean rust target for android triples
|
||||
if [ "$REBUILD_RUST" = "1" ]; then
|
||||
echo ">>> Cleaning Rust android target dirs..."
|
||||
rm -rf "$BASE_DIR/data/cache/target/aarch64-linux-android" \
|
||||
"$BASE_DIR/data/cache/target/armv7-linux-androideabi" \
|
||||
"$BASE_DIR/data/cache/target/i686-linux-android" \
|
||||
"$BASE_DIR/data/cache/target/x86_64-linux-android"
|
||||
fi
|
||||
|
||||
# Profile flag
|
||||
PROFILE_FLAG="--debug"
|
||||
[ "$BUILD_RELEASE" = "1" ] && PROFILE_FLAG=""
|
||||
|
||||
# Persist ~/.android (where the auto-generated debug.keystore lives) so every
|
||||
# build is signed with the SAME key. Without this, every fresh container gets
|
||||
# a new debug keystore and `adb install -r` fails with INSTALL_FAILED_UPDATE_
|
||||
# INCOMPATIBLE because the signature changed.
|
||||
mkdir -p "$BASE_DIR/data/cache/android-home"
|
||||
chown 1000:1000 "$BASE_DIR/data/cache/android-home" 2>/dev/null || true
|
||||
|
||||
docker run --rm \
|
||||
--user 1000:1000 \
|
||||
-e DO_INIT="$DO_INIT" \
|
||||
-e PROFILE_FLAG="$PROFILE_FLAG" \
|
||||
-v "$BASE_DIR/data/source:/build/source" \
|
||||
-v "$BASE_DIR/data/cache/cargo-registry:/home/builder/.cargo/registry" \
|
||||
-v "$BASE_DIR/data/cache/cargo-git:/home/builder/.cargo/git" \
|
||||
-v "$BASE_DIR/data/cache/target:/build/source/target" \
|
||||
-v "$BASE_DIR/data/cache/gradle:/home/builder/.gradle" \
|
||||
-v "$BASE_DIR/data/cache/android-home:/home/builder/.android" \
|
||||
wzp-android-builder \
|
||||
bash -c '
|
||||
set -euo pipefail
|
||||
cd /build/source/desktop
|
||||
|
||||
echo ">>> npm install"
|
||||
npm install --silent 2>&1 | tail -5 || npm install 2>&1 | tail -20
|
||||
|
||||
cd src-tauri
|
||||
|
||||
# Run init if forced, OR if the gradle wrapper is missing. Just checking
|
||||
# for `gen/android` is not enough — Tauri creates a few subdirectories
|
||||
# during build (app/, buildSrc/, .gradle/) that survive a partial wipe and
|
||||
# would make a naive `[ ! -d gen/android ]` check return false even though
|
||||
# the build wrapper itself is gone.
|
||||
if [ "${DO_INIT}" = "1" ] || [ ! -x gen/android/gradlew ]; then
|
||||
echo ">>> cargo tauri android init"
|
||||
cargo tauri android init 2>&1 | tail -20
|
||||
fi
|
||||
|
||||
# ─── wzp-native standalone cdylib (built with cargo-ndk, not cargo-tauri) ──
|
||||
# Produces libwzp_native.so which wzp-desktop dlopens at runtime via
|
||||
# libloading. Split exists because cargo-tauri`s linker wiring pulls
|
||||
# bionic private symbols into any cdylib with cc::Build C++, causing
|
||||
# __init_tcb+4 SIGSEGV. cargo-ndk uses the same linker path as the
|
||||
# legacy wzp-android crate which works.
|
||||
echo ">>> cargo ndk build -p wzp-native --release"
|
||||
JNI_ABI_DIR=gen/android/app/src/main/jniLibs/arm64-v8a
|
||||
mkdir -p "$JNI_ABI_DIR"
|
||||
(
|
||||
cd /build/source
|
||||
cargo ndk -t arm64-v8a -o desktop/src-tauri/gen/android/app/src/main/jniLibs \
|
||||
build --release -p wzp-native 2>&1 | tail -10
|
||||
)
|
||||
if [ -f "$JNI_ABI_DIR/libwzp_native.so" ]; then
|
||||
ls -lh "$JNI_ABI_DIR/libwzp_native.so"
|
||||
else
|
||||
echo ">>> WARNING: libwzp_native.so not produced"
|
||||
fi
|
||||
|
||||
echo ">>> cargo tauri android build ${PROFILE_FLAG} --target aarch64 --apk"
|
||||
cargo tauri android build ${PROFILE_FLAG} --target aarch64 --apk
|
||||
|
||||
echo ""
|
||||
echo ">>> Build artifacts:"
|
||||
find gen/android -name "*.apk" -exec ls -lh {} \; 2>/dev/null
|
||||
'
|
||||
|
||||
# Locate the produced APK
|
||||
APK=$(find "$BASE_DIR/data/source/desktop/src-tauri/gen/android" -name "*.apk" -type f 2>/dev/null | head -1)
|
||||
if [ -z "$APK" ] || [ ! -f "$APK" ]; then
|
||||
LOG_URL=$(upload_to_rustypaste "$LOG_FILE" || echo "")
|
||||
if [ -n "$LOG_URL" ]; then
|
||||
notify "WZP Tauri Android build [$GIT_HASH]: no APK produced
|
||||
log: $LOG_URL"
|
||||
else
|
||||
notify "WZP Tauri Android build [$GIT_HASH]: no APK produced — log upload failed"
|
||||
fi
|
||||
exit 1
|
||||
fi
|
||||
APK_SIZE=$(du -h "$APK" | cut -f1)
|
||||
|
||||
RUSTY_URL=$(upload_to_rustypaste "$APK" || echo "")
|
||||
if [ -n "$RUSTY_URL" ]; then
|
||||
notify "WZP Tauri Android build OK [$GIT_HASH] ($APK_SIZE)
|
||||
$RUSTY_URL"
|
||||
else
|
||||
notify "WZP Tauri Android build OK [$GIT_HASH] ($APK_SIZE) — rustypaste upload skipped"
|
||||
fi
|
||||
|
||||
# Print path so the local script can grab it
|
||||
echo "APK_REMOTE_PATH=$APK"
|
||||
REMOTE_SCRIPT
|
||||
|
||||
ssh_cmd "chmod +x /tmp/wzp-tauri-build.sh"
|
||||
|
||||
notify_local "WZP Tauri Android build dispatched (branch=$BRANCH, release=$BUILD_RELEASE)"
|
||||
log "Triggering remote build (branch=$BRANCH)..."
|
||||
|
||||
# Run; capture full output, last line is APK_REMOTE_PATH=...
|
||||
REMOTE_OUTPUT=$(ssh_cmd "/tmp/wzp-tauri-build.sh '$BRANCH' '$DO_PULL' '$REBUILD_RUST' '$DO_INIT' '$BUILD_RELEASE'" || true)
|
||||
echo "$REMOTE_OUTPUT" | tail -60
|
||||
|
||||
APK_REMOTE=$(echo "$REMOTE_OUTPUT" | grep '^APK_REMOTE_PATH=' | tail -1 | cut -d= -f2-)
|
||||
if [ -n "$APK_REMOTE" ]; then
|
||||
log "Downloading APK to $LOCAL_OUTPUT/wzp-tauri.apk..."
|
||||
scp $SSH_OPTS "$REMOTE_HOST:$APK_REMOTE" "$LOCAL_OUTPUT/wzp-tauri.apk"
|
||||
echo " $LOCAL_OUTPUT/wzp-tauri.apk ($(du -h "$LOCAL_OUTPUT/wzp-tauri.apk" | cut -f1))"
|
||||
else
|
||||
log "No APK produced — see ntfy / remote log /tmp/wzp-tauri-build.log"
|
||||
exit 1
|
||||
fi
|
||||
72
scripts/mint-tmux.sh
Executable file
72
scripts/mint-tmux.sh
Executable file
@@ -0,0 +1,72 @@
|
||||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# mint-tmux.sh — run a command inside a persistent tmux session on the
|
||||
# Linux Mint build box so the user can attach and watch/interact at any time.
|
||||
#
|
||||
# Usage:
|
||||
# mint-tmux.sh run <window-name> <command...> # start a new tmux window
|
||||
# mint-tmux.sh send <window-name> <text...> # send keys to a window
|
||||
# mint-tmux.sh kill <window-name> # close a window
|
||||
# mint-tmux.sh list # list windows
|
||||
# mint-tmux.sh tail <window-name> # dump last 200 lines
|
||||
#
|
||||
# Session name is always "wzp". Attach manually with:
|
||||
# ssh -t root@172.16.81.192 tmux attach -t wzp
|
||||
#
|
||||
# If the wzp session doesn't exist yet, it's created automatically.
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
HOST="root@172.16.81.192"
|
||||
SESSION="wzp"
|
||||
SSH_OPTS="-o ConnectTimeout=10 -o LogLevel=ERROR"
|
||||
|
||||
ensure_session() {
|
||||
ssh $SSH_OPTS "$HOST" "
|
||||
tmux has-session -t $SESSION 2>/dev/null || tmux new-session -d -s $SESSION -n home 'bash -l'
|
||||
"
|
||||
}
|
||||
|
||||
cmd="${1:-list}"
|
||||
shift || true
|
||||
|
||||
case "$cmd" in
|
||||
run)
|
||||
WIN="${1:?window name required}"; shift
|
||||
ensure_session
|
||||
# Use a heredoc so multi-arg commands don't need escaping
|
||||
CMD="$*"
|
||||
ssh $SSH_OPTS "$HOST" bash -s <<REMOTE
|
||||
if tmux list-windows -t $SESSION -F '#W' 2>/dev/null | grep -qx '$WIN'; then
|
||||
tmux kill-window -t $SESSION:$WIN 2>/dev/null || true
|
||||
fi
|
||||
tmux new-window -t $SESSION -n '$WIN' "bash -l -c '$CMD; echo; echo --- window $WIN exited with code \\\$?; exec bash -l'"
|
||||
REMOTE
|
||||
echo "Started '$WIN' in tmux session $SESSION on $HOST"
|
||||
echo "Attach: ssh -t $HOST tmux attach -t $SESSION"
|
||||
;;
|
||||
send)
|
||||
WIN="${1:?window name required}"; shift
|
||||
TEXT="$*"
|
||||
ssh $SSH_OPTS "$HOST" "tmux send-keys -t $SESSION:$WIN '$TEXT' C-m"
|
||||
;;
|
||||
kill)
|
||||
WIN="${1:?window name required}"
|
||||
ssh $SSH_OPTS "$HOST" "tmux kill-window -t $SESSION:$WIN 2>/dev/null || true"
|
||||
;;
|
||||
list)
|
||||
ensure_session
|
||||
ssh $SSH_OPTS "$HOST" "tmux list-windows -t $SESSION"
|
||||
;;
|
||||
tail)
|
||||
WIN="${1:?window name required}"
|
||||
ssh $SSH_OPTS "$HOST" "tmux capture-pane -p -t $SESSION:$WIN -S -200 || echo 'no such window'"
|
||||
;;
|
||||
attach)
|
||||
exec ssh -t $SSH_OPTS "$HOST" tmux attach -t $SESSION
|
||||
;;
|
||||
*)
|
||||
sed -n '3,20p' "$0"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
167
scripts/prep-linux-mint.sh
Executable file
167
scripts/prep-linux-mint.sh
Executable file
@@ -0,0 +1,167 @@
|
||||
#!/usr/bin/env bash
|
||||
# =============================================================================
|
||||
# Prepare a Linux Mint / Debian / Ubuntu x86_64 host as a full WarzonePhone
|
||||
# Android build environment. Installs everything the docker wzp-android-builder
|
||||
# image has, but directly on the host — so we can iterate locally without
|
||||
# docker layer caching, see real linker output, run gdbserver, etc.
|
||||
#
|
||||
# Target host: root@172.16.81.192 (Linux Mint on the LAN)
|
||||
#
|
||||
# Usage (from the macOS workstation):
|
||||
# scp scripts/prep-linux-mint.sh root@172.16.81.192:/tmp/
|
||||
# ssh root@172.16.81.192 'nohup bash /tmp/prep-linux-mint.sh > /var/log/wzp-prep.log 2>&1 &'
|
||||
#
|
||||
# The script is idempotent: safe to re-run if a step fails. Each stage tests
|
||||
# for its target before doing work. Progress + completion is pinged to
|
||||
# ntfy.sh/wzp so we can track it from the phone.
|
||||
#
|
||||
# On success the host has:
|
||||
# - JDK 17
|
||||
# - Android SDK (cmdline-tools + platforms 34/36, build-tools 34/35, NDK 26.1)
|
||||
# - Node.js 20 LTS + npm
|
||||
# - Rust stable + aarch64/armv7/i686/x86_64 android targets
|
||||
# - cargo-ndk + cargo tauri-cli 2.x
|
||||
# - /opt/wzp/warzonePhone (cloned workspace checkout on feat/desktop-audio-rewrite)
|
||||
#
|
||||
# Everything lives under /opt/android-sdk and /opt/wzp so nothing leaks into $HOME.
|
||||
# =============================================================================
|
||||
set -euo pipefail
|
||||
|
||||
NTFY_TOPIC="https://ntfy.sh/wzp"
|
||||
NDK_VERSION="26.1.10909125"
|
||||
ANDROID_API=34
|
||||
ANDROID_API_TAURI=36
|
||||
BUILD_TOOLS_TAURI="35.0.0"
|
||||
ANDROID_HOME=/opt/android-sdk
|
||||
WZP_DIR=/opt/wzp
|
||||
GIT_REPO="ssh://git@git.manko.yoga:222/manawenuz/wz-phone.git"
|
||||
GIT_BRANCH="feat/desktop-audio-rewrite"
|
||||
|
||||
export DEBIAN_FRONTEND=noninteractive
|
||||
export ANDROID_HOME ANDROID_NDK_HOME="$ANDROID_HOME/ndk/$NDK_VERSION"
|
||||
export NDK_HOME="$ANDROID_NDK_HOME"
|
||||
export PATH="$ANDROID_HOME/cmdline-tools/latest/bin:$ANDROID_HOME/platform-tools:/root/.cargo/bin:$PATH"
|
||||
|
||||
notify() { curl -s -d "$1" "$NTFY_TOPIC" > /dev/null 2>&1 || true; }
|
||||
log() { echo -e "\n\033[1;36m[prep-linux-mint]\033[0m $*"; }
|
||||
die() { notify "wzp prep-linux-mint FAILED: $1"; echo "FATAL: $1" >&2; exit 1; }
|
||||
|
||||
trap 'die "line $LINENO"' ERR
|
||||
|
||||
notify "wzp prep-linux-mint STARTED on $(hostname) ($(whoami))"
|
||||
|
||||
# ─── 1. Base packages ────────────────────────────────────────────────────────
|
||||
log "Installing base packages..."
|
||||
apt-get update -qq
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
ca-certificates \
|
||||
cmake \
|
||||
curl \
|
||||
file \
|
||||
git \
|
||||
libasound2-dev \
|
||||
libc6-dev \
|
||||
libssl-dev \
|
||||
openjdk-17-jdk-headless \
|
||||
pkg-config \
|
||||
unzip \
|
||||
wget \
|
||||
xz-utils \
|
||||
zip
|
||||
|
||||
# ─── 2. Android SDK + NDK ────────────────────────────────────────────────────
|
||||
if [ ! -x "$ANDROID_HOME/cmdline-tools/latest/bin/sdkmanager" ]; then
|
||||
log "Installing Android cmdline-tools..."
|
||||
mkdir -p "$ANDROID_HOME/cmdline-tools"
|
||||
cd /tmp
|
||||
wget -q https://dl.google.com/android/repository/commandlinetools-linux-11076708_latest.zip -O cmdtools.zip
|
||||
unzip -qo cmdtools.zip -d "$ANDROID_HOME/cmdline-tools"
|
||||
mv "$ANDROID_HOME/cmdline-tools/cmdline-tools" "$ANDROID_HOME/cmdline-tools/latest"
|
||||
rm cmdtools.zip
|
||||
else
|
||||
log "cmdline-tools already installed"
|
||||
fi
|
||||
|
||||
if [ ! -d "$ANDROID_HOME/ndk/$NDK_VERSION" ] || \
|
||||
[ ! -d "$ANDROID_HOME/platforms/android-$ANDROID_API" ] || \
|
||||
[ ! -d "$ANDROID_HOME/platforms/android-$ANDROID_API_TAURI" ]; then
|
||||
log "Installing Android platforms + NDK $NDK_VERSION..."
|
||||
yes | "$ANDROID_HOME/cmdline-tools/latest/bin/sdkmanager" --licenses > /dev/null 2>&1 || true
|
||||
"$ANDROID_HOME/cmdline-tools/latest/bin/sdkmanager" --install \
|
||||
"platforms;android-$ANDROID_API" \
|
||||
"build-tools;$ANDROID_API.0.0" \
|
||||
"platforms;android-$ANDROID_API_TAURI" \
|
||||
"build-tools;$BUILD_TOOLS_TAURI" \
|
||||
"ndk;$NDK_VERSION" \
|
||||
"platform-tools" 2>&1 | grep -v '^\[' || true
|
||||
else
|
||||
log "Android SDK components already installed"
|
||||
fi
|
||||
|
||||
# ─── 3. Node.js 20 LTS ───────────────────────────────────────────────────────
|
||||
if ! command -v node >/dev/null 2>&1 || ! node --version | grep -q "^v20"; then
|
||||
log "Installing Node.js 20 LTS..."
|
||||
curl -fsSL https://deb.nodesource.com/setup_20.x | bash -
|
||||
apt-get install -y --no-install-recommends nodejs
|
||||
else
|
||||
log "Node.js already at $(node --version)"
|
||||
fi
|
||||
|
||||
# ─── 4. Rust + Android targets ───────────────────────────────────────────────
|
||||
if ! command -v rustup >/dev/null 2>&1; then
|
||||
log "Installing rustup..."
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
|
||||
fi
|
||||
. /root/.cargo/env
|
||||
|
||||
log "Ensuring Rust android targets + cargo-ndk + cargo-tauri..."
|
||||
rustup target add \
|
||||
aarch64-linux-android \
|
||||
armv7-linux-androideabi \
|
||||
i686-linux-android \
|
||||
x86_64-linux-android
|
||||
command -v cargo-ndk >/dev/null 2>&1 || cargo install cargo-ndk
|
||||
command -v cargo-tauri >/dev/null 2>&1 || cargo install tauri-cli --version "^2.0" --locked
|
||||
|
||||
# ─── 5. Clone the workspace ──────────────────────────────────────────────────
|
||||
mkdir -p "$WZP_DIR"
|
||||
cd "$WZP_DIR"
|
||||
if [ -d warzonePhone/.git ]; then
|
||||
log "Pulling latest on $GIT_BRANCH..."
|
||||
cd warzonePhone
|
||||
git fetch origin || true
|
||||
git checkout "$GIT_BRANCH" 2>/dev/null || git checkout -b "$GIT_BRANCH" "origin/$GIT_BRANCH"
|
||||
git reset --hard "origin/$GIT_BRANCH" || true
|
||||
else
|
||||
log "Cloning warzonePhone from $GIT_REPO..."
|
||||
# The public repo URL needs ssh keys; if unavailable, skip and let the user sort it later
|
||||
if git clone --branch "$GIT_BRANCH" "$GIT_REPO" warzonePhone 2>/dev/null; then
|
||||
log " cloned ok"
|
||||
else
|
||||
log " clone failed (no SSH keys for $GIT_REPO — skipping, user will rsync)"
|
||||
fi
|
||||
fi
|
||||
|
||||
# ─── 6. Persistent env for the user ──────────────────────────────────────────
|
||||
cat > /etc/profile.d/wzp-android.sh <<ENVEOF
|
||||
export ANDROID_HOME=$ANDROID_HOME
|
||||
export ANDROID_NDK_HOME=$ANDROID_HOME/ndk/$NDK_VERSION
|
||||
export NDK_HOME=\$ANDROID_NDK_HOME
|
||||
export PATH=\$ANDROID_HOME/cmdline-tools/latest/bin:\$ANDROID_HOME/platform-tools:/root/.cargo/bin:\$PATH
|
||||
ENVEOF
|
||||
chmod 644 /etc/profile.d/wzp-android.sh
|
||||
|
||||
# ─── 7. Sanity summary ───────────────────────────────────────────────────────
|
||||
log "Sanity checks:"
|
||||
echo " java: $(java -version 2>&1 | head -1)"
|
||||
echo " node: $(node --version)"
|
||||
echo " npm: $(npm --version)"
|
||||
echo " rustc: $(rustc --version)"
|
||||
echo " cargo-ndk: $(cargo ndk --version 2>&1 | head -1)"
|
||||
echo " cargo-tauri:$(cargo tauri --version 2>&1 | head -1)"
|
||||
echo " NDK dir: $ANDROID_NDK_HOME"
|
||||
echo " WZP dir: $WZP_DIR/warzonePhone"
|
||||
|
||||
notify "wzp prep-linux-mint DONE on $(hostname) — ready at /opt/wzp/warzonePhone"
|
||||
log "All done."
|
||||
Reference in New Issue
Block a user