feat: add real audio pipeline with Opus + RaptorQ FEC

- AudioPipeline: Kotlin AudioRecord/AudioTrack on JVM threads, PCM
  shuttled to Rust via lock-free ring buffers + JNI
- FEC: RaptorQ fountain codes on encode (5 frames/block, 20% repair
  ratio for GOOD profile), decoder feeds repair symbols for recovery
- Real audio level meter from mic RMS (replaces fake animation)
- Room name editable in UI (default: "android")
- Relay changed to pangolin.manko.yoga:4433
- Stats overlay shows FEC recovered count
- CallState now synced from polled stats (fixes "Connecting" stuck bug)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Claude
2026-04-05 12:33:59 +00:00
parent 81c756c076
commit bf91cf25bd
15 changed files with 663 additions and 68 deletions

View File

@@ -0,0 +1,174 @@
package com.wzp.audio
import android.Manifest
import android.content.Context
import android.content.pm.PackageManager
import android.media.AudioAttributes
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.AudioTrack
import android.media.MediaRecorder
import android.util.Log
import androidx.core.content.ContextCompat
import com.wzp.engine.WzpEngine
/**
* Audio pipeline that captures mic audio and plays received audio using
* Android AudioRecord/AudioTrack APIs running on JVM threads.
*
* PCM samples are shuttled to/from the Rust engine via JNI ring buffers:
* - Capture: AudioRecord → WzpEngine.writeAudio() → Rust encoder → network
* - Playout: network → Rust decoder → WzpEngine.readAudio() → AudioTrack
*
* All audio is 48kHz, mono, 16-bit PCM (matching Opus codec requirements).
*/
class AudioPipeline(private val context: Context) {
companion object {
private const val TAG = "AudioPipeline"
private const val SAMPLE_RATE = 48000
private const val CHANNEL_IN = AudioFormat.CHANNEL_IN_MONO
private const val CHANNEL_OUT = AudioFormat.CHANNEL_OUT_MONO
private const val ENCODING = AudioFormat.ENCODING_PCM_16BIT
/** 20ms frame at 48kHz = 960 samples */
private const val FRAME_SAMPLES = 960
}
@Volatile
private var running = false
private var captureThread: Thread? = null
private var playoutThread: Thread? = null
fun start(engine: WzpEngine) {
if (running) return
running = true
captureThread = Thread({
runCapture(engine)
}, "wzp-capture").apply {
priority = Thread.MAX_PRIORITY
start()
}
playoutThread = Thread({
runPlayout(engine)
}, "wzp-playout").apply {
priority = Thread.MAX_PRIORITY
start()
}
Log.i(TAG, "audio pipeline started")
}
fun stop() {
running = false
captureThread?.join(1000)
playoutThread?.join(1000)
captureThread = null
playoutThread = null
Log.i(TAG, "audio pipeline stopped")
}
private fun runCapture(engine: WzpEngine) {
if (ContextCompat.checkSelfPermission(context, Manifest.permission.RECORD_AUDIO)
!= PackageManager.PERMISSION_GRANTED
) {
Log.e(TAG, "RECORD_AUDIO permission not granted, capture disabled")
return
}
val minBuf = AudioRecord.getMinBufferSize(SAMPLE_RATE, CHANNEL_IN, ENCODING)
val bufSize = maxOf(minBuf, FRAME_SAMPLES * 2 * 4) // at least 4 frames
val recorder = try {
AudioRecord(
MediaRecorder.AudioSource.VOICE_COMMUNICATION,
SAMPLE_RATE,
CHANNEL_IN,
ENCODING,
bufSize
)
} catch (e: SecurityException) {
Log.e(TAG, "AudioRecord SecurityException: ${e.message}")
return
}
if (recorder.state != AudioRecord.STATE_INITIALIZED) {
Log.e(TAG, "AudioRecord failed to initialize")
recorder.release()
return
}
recorder.startRecording()
Log.i(TAG, "capture started: ${SAMPLE_RATE}Hz mono, buf=$bufSize")
val pcm = ShortArray(FRAME_SAMPLES)
try {
while (running) {
val read = recorder.read(pcm, 0, FRAME_SAMPLES)
if (read > 0) {
engine.writeAudio(pcm)
} else if (read < 0) {
Log.e(TAG, "AudioRecord.read error: $read")
break
}
}
} finally {
recorder.stop()
recorder.release()
Log.i(TAG, "capture stopped")
}
}
private fun runPlayout(engine: WzpEngine) {
val minBuf = AudioTrack.getMinBufferSize(SAMPLE_RATE, CHANNEL_OUT, ENCODING)
val bufSize = maxOf(minBuf, FRAME_SAMPLES * 2 * 4)
val track = AudioTrack.Builder()
.setAudioAttributes(
AudioAttributes.Builder()
.setUsage(AudioAttributes.USAGE_VOICE_COMMUNICATION)
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
.build()
)
.setAudioFormat(
AudioFormat.Builder()
.setSampleRate(SAMPLE_RATE)
.setChannelMask(CHANNEL_OUT)
.setEncoding(ENCODING)
.build()
)
.setBufferSizeInBytes(bufSize)
.setTransferMode(AudioTrack.MODE_STREAM)
.build()
if (track.state != AudioTrack.STATE_INITIALIZED) {
Log.e(TAG, "AudioTrack failed to initialize")
track.release()
return
}
track.play()
Log.i(TAG, "playout started: ${SAMPLE_RATE}Hz mono, buf=$bufSize")
val pcm = ShortArray(FRAME_SAMPLES)
val silence = ShortArray(FRAME_SAMPLES) // pre-allocated silence
try {
while (running) {
val read = engine.readAudio(pcm)
if (read >= FRAME_SAMPLES) {
track.write(pcm, 0, read)
} else {
// Not enough decoded audio — write silence to keep stream alive
track.write(silence, 0, FRAME_SAMPLES)
// Sleep briefly to avoid busy-spinning
Thread.sleep(5)
}
}
} finally {
track.stop()
track.release()
Log.i(TAG, "playout stopped")
}
}
}

View File

@@ -27,7 +27,11 @@ data class CallStats(
/** Total frames decoded since call start. */
val framesDecoded: Long = 0,
/** Number of playout underruns (buffer empty when audio was needed). */
val underruns: Long = 0
val underruns: Long = 0,
/** Frames recovered by FEC. */
val fecRecovered: Long = 0,
/** Current mic audio level (RMS, 0-32767). */
val audioLevel: Int = 0
) {
/** Human-readable quality label. */
val qualityLabel: String
@@ -53,7 +57,9 @@ data class CallStats(
jitterBufferDepth = obj.optInt("jitter_buffer_depth", 0),
framesEncoded = obj.optLong("frames_encoded", 0),
framesDecoded = obj.optLong("frames_decoded", 0),
underruns = obj.optLong("underruns", 0)
underruns = obj.optLong("underruns", 0),
fecRecovered = obj.optLong("fec_recovered", 0),
audioLevel = obj.optInt("audio_level", 0)
)
} catch (e: Exception) {
CallStats()

View File

@@ -97,6 +97,24 @@ class WzpEngine(private val callback: WzpCallback) {
}
}
/**
* Write captured PCM samples into the engine's capture ring buffer.
* Called from the AudioRecord capture thread.
*/
fun writeAudio(pcm: ShortArray): Int {
if (nativeHandle == 0L) return 0
return nativeWriteAudio(nativeHandle, pcm)
}
/**
* Read decoded PCM samples from the engine's playout ring buffer.
* Called from the AudioTrack playout thread.
*/
fun readAudio(pcm: ShortArray): Int {
if (nativeHandle == 0L) return 0
return nativeReadAudio(nativeHandle, pcm)
}
// -- JNI native methods --------------------------------------------------
private external fun nativeInit(): Long
@@ -108,6 +126,8 @@ class WzpEngine(private val callback: WzpCallback) {
private external fun nativeSetSpeaker(handle: Long, speaker: Boolean)
private external fun nativeGetStats(handle: Long): String?
private external fun nativeForceProfile(handle: Long, profile: Int)
private external fun nativeWriteAudio(handle: Long, pcm: ShortArray): Int
private external fun nativeReadAudio(handle: Long, pcm: ShortArray): Int
private external fun nativeDestroy(handle: Long)
companion object {

View File

@@ -39,6 +39,8 @@ class CallActivity : ComponentActivity() {
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
viewModel.setContext(this)
setContent {
WzpTheme {
InCallScreen(

View File

@@ -1,7 +1,9 @@
package com.wzp.ui.call
import android.content.Context
import androidx.lifecycle.ViewModel
import androidx.lifecycle.viewModelScope
import com.wzp.audio.AudioPipeline
import com.wzp.engine.CallStats
import com.wzp.engine.WzpCallback
import com.wzp.engine.WzpEngine
@@ -17,9 +19,11 @@ class CallViewModel : ViewModel(), WzpCallback {
private var engine: WzpEngine? = null
private var engineInitialized = false
private var audioPipeline: AudioPipeline? = null
private var audioStarted = false
private val _callState = MutableStateFlow(0)
val callState: StateFlow<Int> = _callState.asStateFlow()
val callState: StateFlow<Int> get() = _callState.asStateFlow()
private val _isMuted = MutableStateFlow(false)
val isMuted: StateFlow<Boolean> = _isMuted.asStateFlow()
@@ -36,16 +40,26 @@ class CallViewModel : ViewModel(), WzpCallback {
private val _errorMessage = MutableStateFlow<String?>(null)
val errorMessage: StateFlow<String?> = _errorMessage.asStateFlow()
private val _roomName = MutableStateFlow(DEFAULT_ROOM)
val roomName: StateFlow<String> = _roomName.asStateFlow()
private var statsJob: Job? = null
companion object {
const val DEFAULT_RELAY = "172.16.81.175:4433"
const val DEFAULT_RELAY = "pangolin.manko.yoga:4433"
const val DEFAULT_ROOM = "android"
}
/** Must be called once with Activity context before startCall. */
fun setContext(context: Context) {
if (audioPipeline == null) {
audioPipeline = AudioPipeline(context.applicationContext)
}
}
fun startCall(
relayAddr: String = DEFAULT_RELAY,
room: String = DEFAULT_ROOM
room: String = _roomName.value
) {
try {
if (engine == null) {
@@ -58,9 +72,6 @@ class CallViewModel : ViewModel(), WzpCallback {
_callState.value = 1 // Connecting
startStatsPolling()
// startCall blocks (runs tokio on calling thread), so dispatch
// to a background coroutine. Using Dispatchers.IO which uses
// Java threads (not native pthread_create).
viewModelScope.launch(kotlinx.coroutines.Dispatchers.IO) {
try {
val result = engine?.startCall(relayAddr, room) ?: -1
@@ -80,6 +91,7 @@ class CallViewModel : ViewModel(), WzpCallback {
}
fun stopCall() {
stopAudio()
stopStatsPolling()
try {
engine?.stopCall()
@@ -101,11 +113,26 @@ class CallViewModel : ViewModel(), WzpCallback {
fun clearError() { _errorMessage.value = null }
fun setRoomName(name: String) { _roomName.value = name }
// WzpCallback
override fun onCallStateChanged(state: Int) { _callState.value = state }
override fun onQualityTierChanged(tier: Int) { _qualityTier.value = tier }
override fun onError(code: Int, message: String) { _errorMessage.value = "Error $code: $message" }
private fun startAudio() {
if (audioStarted) return
val e = engine ?: return
audioPipeline?.start(e)
audioStarted = true
}
private fun stopAudio() {
if (!audioStarted) return
audioPipeline?.stop()
audioStarted = false
}
private fun startStatsPolling() {
statsJob?.cancel()
statsJob = viewModelScope.launch {
@@ -113,7 +140,16 @@ class CallViewModel : ViewModel(), WzpCallback {
try {
val json = engine?.getStats() ?: "{}"
if (json.isNotEmpty()) {
_stats.value = CallStats.fromJson(json)
val s = CallStats.fromJson(json)
_stats.value = s
// Sync call state from native engine stats
if (s.state != 0) {
_callState.value = s.state
}
// Start audio pipeline when call becomes active
if (s.state == 2 && !audioStarted) {
startAudio()
}
}
} catch (_: Exception) {}
delay(500L)
@@ -128,6 +164,7 @@ class CallViewModel : ViewModel(), WzpCallback {
override fun onCleared() {
super.onCleared()
stopAudio()
stopStatsPolling()
try {
engine?.stopCall()

View File

@@ -21,6 +21,7 @@ import androidx.compose.material3.FilledTonalIconButton
import androidx.compose.material3.IconButtonDefaults
import androidx.compose.material3.LinearProgressIndicator
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.OutlinedTextField
import androidx.compose.material3.Surface
import androidx.compose.material3.Text
import androidx.compose.runtime.Composable
@@ -48,6 +49,7 @@ fun InCallScreen(
val stats by viewModel.stats.collectAsState()
val qualityTier by viewModel.qualityTier.collectAsState()
val errorMessage by viewModel.errorMessage.collectAsState()
val roomName by viewModel.roomName.collectAsState()
Surface(
modifier = Modifier.fillMaxSize(),
@@ -83,11 +85,13 @@ fun InCallScreen(
style = MaterialTheme.typography.bodyMedium,
color = MaterialTheme.colorScheme.onSurfaceVariant
)
Spacer(modifier = Modifier.height(4.dp))
Text(
text = "Room: ${CallViewModel.DEFAULT_ROOM}",
style = MaterialTheme.typography.bodyMedium,
color = MaterialTheme.colorScheme.onSurfaceVariant
Spacer(modifier = Modifier.height(8.dp))
OutlinedTextField(
value = roomName,
onValueChange = { viewModel.setRoomName(it) },
label = { Text("Room") },
singleLine = true,
modifier = Modifier.fillMaxWidth(0.6f)
)
Spacer(modifier = Modifier.height(32.dp))
@@ -132,7 +136,7 @@ fun InCallScreen(
Spacer(modifier = Modifier.height(32.dp))
AudioLevelBar(stats.framesEncoded)
AudioLevelBar(stats.audioLevel)
Spacer(modifier = Modifier.weight(1f))
@@ -222,9 +226,11 @@ private fun QualityIndicator(tier: Int, label: String) {
}
@Composable
private fun AudioLevelBar(framesEncoded: Long) {
val level = if (framesEncoded > 0) {
((framesEncoded % 100).toFloat() / 100f).coerceIn(0.05f, 1f)
private fun AudioLevelBar(audioLevel: Int) {
// audioLevel is RMS of i16 samples (0-32767).
// Map to 0.0-1.0 with a log-ish curve for better visual feel.
val level = if (audioLevel > 0) {
(audioLevel.toFloat() / 8000f).coerceIn(0.02f, 1f)
} else {
0f
}
@@ -351,7 +357,7 @@ private fun StatsOverlay(stats: CallStats) {
) {
StatItem("Enc", "${stats.framesEncoded}")
StatItem("Dec", "${stats.framesDecoded}")
StatItem("JB", "${stats.jitterBufferDepth}")
StatItem("FEC", "${stats.fecRecovered}")
StatItem("Under", "${stats.underruns}")
}
}