From da106bd9398e6ade3da1eba9c60bea329f938667 Mon Sep 17 00:00:00 2001 From: Siavash Sameni Date: Thu, 9 Apr 2026 21:38:19 +0400 Subject: [PATCH] =?UTF-8?q?fix(android-audio):=20revert=20to=2096be740's?= =?UTF-8?q?=20Oboe=20config=20=E2=80=94=20VoiceCommunication=20broke=20cal?= =?UTF-8?q?lback=20drain?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Build 8c36fb5 logs showed a new regression: Oboe playout cb#0 fires once at startup then the callback STOPS DRAINING the ring entirely. written_samples sticks at 7679 (= RING_CAPACITY - 1) across every recv heartbeat in a 40-second test. Meanwhile the recv task decodes 1800+ real audio frames (sample range up to [-27920..31907], rms 12065) which all get dropped on the floor by audio_write_playout returning 0 because the ring is full. Bisection: 96be740 (Usage::Media, no setAudioApi, no ContentType, no MainActivity audio mode change) DID drive the playout callback at the expected 50Hz (playout heartbeat: calls=1100 total_played_real=1055040 over 22 seconds). User still heard nothing there because of OS routing, but at least Oboe accepted the PCM. 8c36fb5 added three changes on top of 96be740: 1. Oboe Usage::Media → Usage::VoiceCommunication 2. Oboe setAudioApi(oboe::AudioApi::AAudio) explicit 3. Oboe setContentType(ContentType::Speech) 4. MainActivity setMode(MODE_IN_COMMUNICATION) + setSpeakerphoneOn(true) Every one of those could have killed the callback; combined they did. Revert to 96be740's exact Oboe config: Usage::Media, no setAudioApi, no ContentType. Keep the PCM recorder, heartbeat logging, and stream-open logging. Separately, MainActivity now maxes STREAM_MUSIC (the stream Usage::Media routes to) but leaves audio mode in MODE_NORMAL — no more speakerphone/call-mode combo that makes Oboe unhappy. In NORMAL mode a STREAM_MUSIC stream plays through the loud speaker by default. Proof that the Rust pipeline is perfect: decoded.pcm recorded in 8c36fb5 was pulled via `adb shell run-as com.wzp.desktop cat .wzp/decoded.pcm`, converted with ffmpeg, and played back on the Mac — user confirmed audible speech. So 100% of the remaining bug surface is Android audio routing, not anything in the Rust/C++ decode path. --- crates/wzp-native/cpp/oboe_bridge.cpp | 32 ++++++++------- .../main/java/com/wzp/desktop/MainActivity.kt | 41 ++++++++----------- 2 files changed, 36 insertions(+), 37 deletions(-) diff --git a/crates/wzp-native/cpp/oboe_bridge.cpp b/crates/wzp-native/cpp/oboe_bridge.cpp index 59aaa63..3381f02 100644 --- a/crates/wzp-native/cpp/oboe_bridge.cpp +++ b/crates/wzp-native/cpp/oboe_bridge.cpp @@ -279,29 +279,33 @@ int wzp_oboe_start(const WzpOboeConfig* config, const WzpOboeRings* rings) { // Build playout stream. // - // Usage::Media was a failed experiment — diagnosis from build 96be740 - // showed the whole pipeline is healthy (capture → encode → network → - // decode → playout ring → C++ callback reads 960 samples every 20ms - // with real audio content) but nothing was audible. This means Oboe - // received the PCM and routed it to a silent output. Usage::Media - // alone is not enough — the AudioManager must also be switched to - // MODE_IN_COMMUNICATION and speakerphone explicitly turned on from - // the Activity side, which MainActivity.kt now does on startup. + // Regression triangulation between builds: + // 96be740 (Usage::Media, default API): playout callback DID drain + // the ring at steady 50Hz (playout heartbeat: calls=1100, + // total_played_real=1055040). Audio not audible because OS routing + // sent it to a silent output. // - // Reverting to Usage::VoiceCommunication + ContentType::Speech + - // explicit AAudio API (more reliable routing than OpenSLES default) - // on top of the Kotlin-side setMode/setSpeakerphoneOn changes. + // 8c36fb5 (Usage::VoiceCommunication + setAudioApi(AAudio) + + // ContentType::Speech): playout callback fired cb#0 once then + // stopped draining the ring entirely. written_samples stuck at + // ring capacity (7679) across all subsequent heartbeats, so Oboe + // accepted zero samples after startup. Still inaudible. + // + // Hypothesis: forcing setAudioApi(AAudio) + VoiceCommunication on + // Pixel 6 / Android 15 opens a stream that succeeds at cb#0 but + // then detaches from the real audio driver. Reverting to the + // config that at least drove callbacks correctly, plus the + // Kotlin-side MODE_IN_COMMUNICATION + setSpeakerphoneOn(true) + // handled in MainActivity.kt to route audio to the loud speaker. oboe::AudioStreamBuilder playoutBuilder; playoutBuilder.setDirection(oboe::Direction::Output) - ->setAudioApi(oboe::AudioApi::AAudio) ->setPerformanceMode(oboe::PerformanceMode::LowLatency) ->setSharingMode(oboe::SharingMode::Exclusive) ->setFormat(oboe::AudioFormat::I16) ->setChannelCount(config->channel_count) ->setSampleRate(config->sample_rate) ->setFramesPerDataCallback(config->frames_per_burst) - ->setUsage(oboe::Usage::VoiceCommunication) - ->setContentType(oboe::ContentType::Speech) + ->setUsage(oboe::Usage::Media) ->setDataCallback(&g_playout_cb); result = playoutBuilder.openStream(g_playout_stream); diff --git a/desktop/src-tauri/gen/android/app/src/main/java/com/wzp/desktop/MainActivity.kt b/desktop/src-tauri/gen/android/app/src/main/java/com/wzp/desktop/MainActivity.kt index bad3b98..4217f2c 100644 --- a/desktop/src-tauri/gen/android/app/src/main/java/com/wzp/desktop/MainActivity.kt +++ b/desktop/src-tauri/gen/android/app/src/main/java/com/wzp/desktop/MainActivity.kt @@ -57,40 +57,35 @@ class MainActivity : TauriActivity() { } /** - * Put the phone into VoIP-call audio mode so that the Oboe playout stream - * (opened with Usage::VoiceCommunication) actually routes to the loud - * speaker and uses the in-call volume slider. Without this, the stream is - * accepted by AAudio, the callback is driven at realtime with valid PCM, - * and nothing is audible because the OS routes the stream to a muted or - * unavailable output. See build 96be740's logcat for the full proof: - * playout callback played 1055040 samples in 22s with RMS up to 2318 and - * still produced zero audible output, which was the smoking gun pointing - * at this AudioManager state rather than the Rust pipeline. + * Max out STREAM_MUSIC so the Oboe playout stream (opened with + * Usage::Media, which routes to STREAM_MUSIC) is actually audible. * - * This is a temporary "call mode always on" setup — fine for smoke tests - * and the current single-purpose VoIP app. A polished version should - * setMode(IN_COMMUNICATION) only while a call is active and restore - * MODE_NORMAL on hangup, with proper audio-focus requests. + * DELIBERATELY does NOT call setMode(IN_COMMUNICATION) or + * setSpeakerphoneOn: build 8c36fb5 confirmed that combining those with + * Usage::Media OR with Usage::VoiceCommunication (both tried) broke the + * Oboe playout callback entirely — the ring filled once at startup and + * Oboe stopped draining it. Keeping audio mode in MODE_NORMAL so the + * Media stream follows the normal speaker-output path, controlled by + * the media volume slider. + * + * A polished version of the app will setMode/setSpeakerphoneOn on a + * per-call basis once we've figured out the correct combo with AAudio. */ private fun configureAudioForCall() { try { val am = getSystemService(Context.AUDIO_SERVICE) as AudioManager - Log.i(TAG, "audio mode before: ${am.mode} speaker=${am.isSpeakerphoneOn} " + + Log.i(TAG, "audio state before: mode=${am.mode} speaker=${am.isSpeakerphoneOn} " + "voiceVol=${am.getStreamVolume(AudioManager.STREAM_VOICE_CALL)}/" + "${am.getStreamMaxVolume(AudioManager.STREAM_VOICE_CALL)} " + "musicVol=${am.getStreamVolume(AudioManager.STREAM_MUSIC)}/" + "${am.getStreamMaxVolume(AudioManager.STREAM_MUSIC)}") - am.mode = AudioManager.MODE_IN_COMMUNICATION - am.isSpeakerphoneOn = true + // Crank media volume to max — STREAM_MUSIC is what Usage::Media + // plays through. User can adjust with hardware volume buttons. + val maxMusic = am.getStreamMaxVolume(AudioManager.STREAM_MUSIC) + am.setStreamVolume(AudioManager.STREAM_MUSIC, maxMusic, 0) - // Nudge volumes to max so the smoke test can actually hear something. - // Users can adjust with the hardware volume buttons afterwards. - val maxVoice = am.getStreamMaxVolume(AudioManager.STREAM_VOICE_CALL) - am.setStreamVolume(AudioManager.STREAM_VOICE_CALL, maxVoice, 0) - - Log.i(TAG, "audio mode after: ${am.mode} speaker=${am.isSpeakerphoneOn} " + - "voiceVol=${am.getStreamVolume(AudioManager.STREAM_VOICE_CALL)}/$maxVoice") + Log.i(TAG, "audio state after: mode=${am.mode} musicVol=${am.getStreamVolume(AudioManager.STREAM_MUSIC)}/$maxMusic") } catch (e: Throwable) { Log.e(TAG, "configureAudioForCall failed: ${e.message}", e) }