From cfa9ff67cf48f1fb8b12cddb820032da52382d82 Mon Sep 17 00:00:00 2001 From: Siavash Sameni Date: Thu, 9 Apr 2026 21:24:26 +0400 Subject: [PATCH] fix(android-audio): VoIP mode + speakerphone + debug PCM recorder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Build 96be740 logs proved the entire software pipeline is healthy: capture heartbeat: calls=1100 to_write=960 full_drops=0 total_written=1056000 recv heartbeat: decoded_frames=1035 last_written=960 decode_errs=0 recv decoded PCM: range=[-13564..9244] rms=8044 (real audio) playout WRITE: in_len=960 written=960 rms=2318 (real audio into the ring) playout heartbeat: calls=1100 nonempty=1099 total_played_real=1055040 1055040 samples / 48000 Hz = 22s — exactly matches wall-clock elapsed, meaning Oboe IS calling our playout callback at the expected rate and WE ARE handing it real PCM every 20ms. User still heard nothing. Ergo Oboe accepted the PCM and routed it to a silent output. Two fixes: 1) MainActivity.kt: switch to MODE_IN_COMMUNICATION + speakerphone ON right after permissions are granted, and crank STREAM_VOICE_CALL to max. Without this, an Oboe Usage::VoiceCommunication stream gets opened, the OS creates a real AAudio pipeline, the callback fires on schedule — and audio goes to either the earpiece at muted volume or a "call not active" dead end. Logs the audio mode + volume levels before and after the switch so we can confirm the state change in logcat next run. 2) oboe_bridge.cpp: revert Usage::Media → VoiceCommunication (the mode that matches MODE_IN_COMMUNICATION), pin the audio API to AAudio explicitly instead of letting Oboe fall back to OpenSLES (which has its own silent-drop failure modes on some devices), and add getState + getXRunCount to the playout heartbeat so we'll see silent stream disconnects instead of reading zeros forever. 3) engine.rs recv task: dump the first ~10s of post-AGC decoded PCM to `/decoded.pcm` as raw i16 LE so we can adb pull it and play it back locally: adb shell run-as com.wzp.desktop cat .wzp/decoded.pcm > decoded.pcm ffmpeg -f s16le -ar 48000 -ac 1 -i decoded.pcm decoded.wav This divorces "is our decoder actually producing audible audio" from "is Android's audio stack playing it". If the recorded WAV sounds correct when played on a laptop, the decoder is fine and 100% of the remaining bug surface is AudioManager / Oboe routing. 4) engine.rs: also log when spk_muted=true blocks the write. User reported the Speaker button in the UI has inconsistent semantics between desktop and android — adding this log rules out the accidental "first click muted playback" theory for good. --- crates/wzp-native/cpp/oboe_bridge.cpp | 33 ++++++----- .../main/java/com/wzp/desktop/MainActivity.kt | 49 +++++++++++++++- desktop/src-tauri/src/engine.rs | 57 +++++++++++++++++++ 3 files changed, 124 insertions(+), 15 deletions(-) diff --git a/crates/wzp-native/cpp/oboe_bridge.cpp b/crates/wzp-native/cpp/oboe_bridge.cpp index c8c4ece..c9e6de4 100644 --- a/crates/wzp-native/cpp/oboe_bridge.cpp +++ b/crates/wzp-native/cpp/oboe_bridge.cpp @@ -210,10 +210,13 @@ public: // Heartbeat every 50 callbacks (~1s at 20ms/burst) calls++; if ((calls % 50) == 0) { - LOGI("playout heartbeat: calls=%llu nonempty=%llu numFrames=%d ring_avail_read=%d to_read=%d underrun_frames=%llu total_played_real=%llu", + int state = (int)stream->getState(); + int xruns = stream->getXRunCount().value_or(-1); + LOGI("playout heartbeat: calls=%llu nonempty=%llu numFrames=%d ring_avail_read=%d to_read=%d underrun_frames=%llu total_played_real=%llu state=%d xruns=%d", (unsigned long long)calls, (unsigned long long)nonempty_calls, numFrames, avail, to_read, - (unsigned long long)underrun_frames, (unsigned long long)total_played_real); + (unsigned long long)underrun_frames, (unsigned long long)total_played_real, + state, xruns); } // Update latency estimate @@ -273,26 +276,30 @@ int wzp_oboe_start(const WzpOboeConfig* config, const WzpOboeRings* rings) { (int)g_capture_stream->getSharingMode(), (int)g_capture_stream->getPerformanceMode()); - // Build playout stream + // Build playout stream. // - // Usage::Media (NOT VoiceCommunication) routes to the media audio - // stream which plays through the loud speaker and uses the media - // volume slider. VoiceCommunication routes to the in-call earpiece - // stream which is silent unless AudioManager.setMode(IN_COMMUNICATION) - // has been called from the Activity, and even then only the earpiece - // (or a bluetooth headset) gets audio by default. For a debug-friendly - // smoke test we want loud speaker by default. A future polish step - // will wire setMode + setSpeakerphoneOn from MainActivity.kt so we - // can switch back to VoiceCommunication (for AEC benefits etc). + // Usage::Media was a failed experiment — diagnosis from build 96be740 + // showed the whole pipeline is healthy (capture → encode → network → + // decode → playout ring → C++ callback reads 960 samples every 20ms + // with real audio content) but nothing was audible. This means Oboe + // received the PCM and routed it to a silent output. Usage::Media + // alone is not enough — the AudioManager must also be switched to + // MODE_IN_COMMUNICATION and speakerphone explicitly turned on from + // the Activity side, which MainActivity.kt now does on startup. + // + // Reverting to Usage::VoiceCommunication + ContentType::Speech + + // explicit AAudio API (more reliable routing than OpenSLES default) + // on top of the Kotlin-side setMode/setSpeakerphoneOn changes. oboe::AudioStreamBuilder playoutBuilder; playoutBuilder.setDirection(oboe::Direction::Output) + ->setAudioApi(oboe::AudioApi::AAudio) ->setPerformanceMode(oboe::PerformanceMode::LowLatency) ->setSharingMode(oboe::SharingMode::Exclusive) ->setFormat(oboe::AudioFormat::I16) ->setChannelCount(config->channel_count) ->setSampleRate(config->sample_rate) ->setFramesPerDataCallback(config->frames_per_burst) - ->setUsage(oboe::Usage::Media) + ->setUsage(oboe::Usage::VoiceCommunication) ->setContentType(oboe::ContentType::Speech) ->setDataCallback(&g_playout_cb); diff --git a/desktop/src-tauri/gen/android/app/src/main/java/com/wzp/desktop/MainActivity.kt b/desktop/src-tauri/gen/android/app/src/main/java/com/wzp/desktop/MainActivity.kt index c1994a2..bad3b98 100644 --- a/desktop/src-tauri/gen/android/app/src/main/java/com/wzp/desktop/MainActivity.kt +++ b/desktop/src-tauri/gen/android/app/src/main/java/com/wzp/desktop/MainActivity.kt @@ -1,7 +1,9 @@ package com.wzp.desktop import android.Manifest +import android.content.Context import android.content.pm.PackageManager +import android.media.AudioManager import android.os.Bundle import android.util.Log import androidx.activity.enableEdgeToEdge @@ -25,8 +27,7 @@ class MainActivity : TauriActivity() { // Request RECORD_AUDIO early so Oboe (inside libwzp_native.so) can open // the AAudio input stream without silently failing. The grant is // persisted, so after the first launch the dialog no longer appears. - // MODIFY_AUDIO_SETTINGS is requested alongside because Oboe toggles the - // audio mode to communication on some devices. + // MODIFY_AUDIO_SETTINGS is needed to switch AudioManager mode + speaker. val needsRequest = REQUIRED_AUDIO_PERMISSIONS.any { ContextCompat.checkSelfPermission(this, it) != PackageManager.PERMISSION_GRANTED } @@ -35,6 +36,7 @@ class MainActivity : TauriActivity() { ActivityCompat.requestPermissions(this, REQUIRED_AUDIO_PERMISSIONS, AUDIO_PERMISSIONS_REQUEST) } else { Log.i(TAG, "audio permissions already granted") + configureAudioForCall() } } @@ -48,6 +50,49 @@ class MainActivity : TauriActivity() { val allGranted = grantResults.isNotEmpty() && grantResults.all { it == PackageManager.PERMISSION_GRANTED } Log.i(TAG, "audio permissions result: allGranted=$allGranted grants=${grantResults.toList()}") + if (allGranted) { + configureAudioForCall() + } + } + } + + /** + * Put the phone into VoIP-call audio mode so that the Oboe playout stream + * (opened with Usage::VoiceCommunication) actually routes to the loud + * speaker and uses the in-call volume slider. Without this, the stream is + * accepted by AAudio, the callback is driven at realtime with valid PCM, + * and nothing is audible because the OS routes the stream to a muted or + * unavailable output. See build 96be740's logcat for the full proof: + * playout callback played 1055040 samples in 22s with RMS up to 2318 and + * still produced zero audible output, which was the smoking gun pointing + * at this AudioManager state rather than the Rust pipeline. + * + * This is a temporary "call mode always on" setup — fine for smoke tests + * and the current single-purpose VoIP app. A polished version should + * setMode(IN_COMMUNICATION) only while a call is active and restore + * MODE_NORMAL on hangup, with proper audio-focus requests. + */ + private fun configureAudioForCall() { + try { + val am = getSystemService(Context.AUDIO_SERVICE) as AudioManager + Log.i(TAG, "audio mode before: ${am.mode} speaker=${am.isSpeakerphoneOn} " + + "voiceVol=${am.getStreamVolume(AudioManager.STREAM_VOICE_CALL)}/" + + "${am.getStreamMaxVolume(AudioManager.STREAM_VOICE_CALL)} " + + "musicVol=${am.getStreamVolume(AudioManager.STREAM_MUSIC)}/" + + "${am.getStreamMaxVolume(AudioManager.STREAM_MUSIC)}") + + am.mode = AudioManager.MODE_IN_COMMUNICATION + am.isSpeakerphoneOn = true + + // Nudge volumes to max so the smoke test can actually hear something. + // Users can adjust with the hardware volume buttons afterwards. + val maxVoice = am.getStreamMaxVolume(AudioManager.STREAM_VOICE_CALL) + am.setStreamVolume(AudioManager.STREAM_VOICE_CALL, maxVoice, 0) + + Log.i(TAG, "audio mode after: ${am.mode} speaker=${am.isSpeakerphoneOn} " + + "voiceVol=${am.getStreamVolume(AudioManager.STREAM_VOICE_CALL)}/$maxVoice") + } catch (e: Throwable) { + Log.e(TAG, "configureAudioForCall failed: ${e.message}", e) } } } diff --git a/desktop/src-tauri/src/engine.rs b/desktop/src-tauri/src/engine.rs index 8d82d8a..b6dc272 100644 --- a/desktop/src-tauri/src/engine.rs +++ b/desktop/src-tauri/src/engine.rs @@ -300,6 +300,33 @@ impl CallEngine { let mut pcm = vec![0i16; FRAME_SAMPLES_40MS]; info!(codec = ?current_codec, "recv task starting (android/oboe)"); + // ─── Decoded-PCM recorder (debug) ──────────────────────────── + // Dumps the first ~10 seconds of post-AGC PCM to a raw i16 LE + // file in the app's private data dir so we can adb pull it and + // play it back to prove the pipeline is producing real audio + // independent of Oboe routing. Convert locally with e.g. + // ffmpeg -f s16le -ar 48000 -ac 1 -i decoded.pcm decoded.wav + use std::io::Write; + let recorder_path = crate::APP_DATA_DIR + .get() + .map(|p| p.join("decoded.pcm")); + let mut recorder = match recorder_path.as_ref() { + Some(p) => match std::fs::File::create(p) { + Ok(f) => { + info!(path = %p.display(), "decoded-pcm recorder open"); + Some(std::io::BufWriter::new(f)) + } + Err(e) => { + tracing::warn!(path = %p.display(), error = %e, "decoded-pcm recorder open failed"); + None + } + }, + None => None, + }; + let mut recorder_bytes: u64 = 0; + // Stop writing after ~10 seconds @ 48kHz mono i16 = ~960KB. + const RECORDER_MAX_BYTES: u64 = 48_000 * 2 * 10; + let mut heartbeat = std::time::Instant::now(); let mut decoded_frames: u64 = 0; let mut written_samples: u64 = 0; @@ -372,6 +399,33 @@ impl CallEngine { ); } agc.process_frame(&mut pcm[..n]); + + // Dump to debug recorder before playout + // so we capture post-AGC samples that + // are exactly what we hand to Oboe. + if let Some(rec) = recorder.as_mut() { + if recorder_bytes < RECORDER_MAX_BYTES { + let slice = &pcm[..n]; + // SAFETY: i16 is Plain Old Data; + // writing its little-endian bytes + // is well-defined on all targets + // we build for. + let byte_slice: &[u8] = unsafe { + std::slice::from_raw_parts( + slice.as_ptr() as *const u8, + slice.len() * 2, + ) + }; + let _ = rec.write_all(byte_slice); + recorder_bytes = recorder_bytes + .saturating_add(byte_slice.len() as u64); + if recorder_bytes >= RECORDER_MAX_BYTES { + let _ = rec.flush(); + info!(recorder_bytes, "decoded-pcm recorder: stopped after limit"); + } + } + } + if !recv_spk.load(Ordering::Relaxed) { let w = crate::wzp_native::audio_write_playout(&pcm[..n]); last_written = w; @@ -379,6 +433,9 @@ impl CallEngine { if w < n && decoded_frames <= 10 { tracing::warn!(n, w, "recv: partial playout write (ring nearly full)"); } + } else if decoded_frames <= 3 || decoded_frames % 100 == 0 { + // User clicked spk-mute — log it so we don't chase ghost bugs + tracing::info!(decoded_frames, "recv: spk_muted=true, skipping playout write"); } } Err(e) => {