diff --git a/crates/wzp-native/cpp/oboe_bridge.cpp b/crates/wzp-native/cpp/oboe_bridge.cpp index c8c4ece..c9e6de4 100644 --- a/crates/wzp-native/cpp/oboe_bridge.cpp +++ b/crates/wzp-native/cpp/oboe_bridge.cpp @@ -210,10 +210,13 @@ public: // Heartbeat every 50 callbacks (~1s at 20ms/burst) calls++; if ((calls % 50) == 0) { - LOGI("playout heartbeat: calls=%llu nonempty=%llu numFrames=%d ring_avail_read=%d to_read=%d underrun_frames=%llu total_played_real=%llu", + int state = (int)stream->getState(); + int xruns = stream->getXRunCount().value_or(-1); + LOGI("playout heartbeat: calls=%llu nonempty=%llu numFrames=%d ring_avail_read=%d to_read=%d underrun_frames=%llu total_played_real=%llu state=%d xruns=%d", (unsigned long long)calls, (unsigned long long)nonempty_calls, numFrames, avail, to_read, - (unsigned long long)underrun_frames, (unsigned long long)total_played_real); + (unsigned long long)underrun_frames, (unsigned long long)total_played_real, + state, xruns); } // Update latency estimate @@ -273,26 +276,30 @@ int wzp_oboe_start(const WzpOboeConfig* config, const WzpOboeRings* rings) { (int)g_capture_stream->getSharingMode(), (int)g_capture_stream->getPerformanceMode()); - // Build playout stream + // Build playout stream. // - // Usage::Media (NOT VoiceCommunication) routes to the media audio - // stream which plays through the loud speaker and uses the media - // volume slider. VoiceCommunication routes to the in-call earpiece - // stream which is silent unless AudioManager.setMode(IN_COMMUNICATION) - // has been called from the Activity, and even then only the earpiece - // (or a bluetooth headset) gets audio by default. For a debug-friendly - // smoke test we want loud speaker by default. A future polish step - // will wire setMode + setSpeakerphoneOn from MainActivity.kt so we - // can switch back to VoiceCommunication (for AEC benefits etc). + // Usage::Media was a failed experiment — diagnosis from build 96be740 + // showed the whole pipeline is healthy (capture → encode → network → + // decode → playout ring → C++ callback reads 960 samples every 20ms + // with real audio content) but nothing was audible. This means Oboe + // received the PCM and routed it to a silent output. Usage::Media + // alone is not enough — the AudioManager must also be switched to + // MODE_IN_COMMUNICATION and speakerphone explicitly turned on from + // the Activity side, which MainActivity.kt now does on startup. + // + // Reverting to Usage::VoiceCommunication + ContentType::Speech + + // explicit AAudio API (more reliable routing than OpenSLES default) + // on top of the Kotlin-side setMode/setSpeakerphoneOn changes. oboe::AudioStreamBuilder playoutBuilder; playoutBuilder.setDirection(oboe::Direction::Output) + ->setAudioApi(oboe::AudioApi::AAudio) ->setPerformanceMode(oboe::PerformanceMode::LowLatency) ->setSharingMode(oboe::SharingMode::Exclusive) ->setFormat(oboe::AudioFormat::I16) ->setChannelCount(config->channel_count) ->setSampleRate(config->sample_rate) ->setFramesPerDataCallback(config->frames_per_burst) - ->setUsage(oboe::Usage::Media) + ->setUsage(oboe::Usage::VoiceCommunication) ->setContentType(oboe::ContentType::Speech) ->setDataCallback(&g_playout_cb); diff --git a/desktop/src-tauri/gen/android/app/src/main/java/com/wzp/desktop/MainActivity.kt b/desktop/src-tauri/gen/android/app/src/main/java/com/wzp/desktop/MainActivity.kt index c1994a2..bad3b98 100644 --- a/desktop/src-tauri/gen/android/app/src/main/java/com/wzp/desktop/MainActivity.kt +++ b/desktop/src-tauri/gen/android/app/src/main/java/com/wzp/desktop/MainActivity.kt @@ -1,7 +1,9 @@ package com.wzp.desktop import android.Manifest +import android.content.Context import android.content.pm.PackageManager +import android.media.AudioManager import android.os.Bundle import android.util.Log import androidx.activity.enableEdgeToEdge @@ -25,8 +27,7 @@ class MainActivity : TauriActivity() { // Request RECORD_AUDIO early so Oboe (inside libwzp_native.so) can open // the AAudio input stream without silently failing. The grant is // persisted, so after the first launch the dialog no longer appears. - // MODIFY_AUDIO_SETTINGS is requested alongside because Oboe toggles the - // audio mode to communication on some devices. + // MODIFY_AUDIO_SETTINGS is needed to switch AudioManager mode + speaker. val needsRequest = REQUIRED_AUDIO_PERMISSIONS.any { ContextCompat.checkSelfPermission(this, it) != PackageManager.PERMISSION_GRANTED } @@ -35,6 +36,7 @@ class MainActivity : TauriActivity() { ActivityCompat.requestPermissions(this, REQUIRED_AUDIO_PERMISSIONS, AUDIO_PERMISSIONS_REQUEST) } else { Log.i(TAG, "audio permissions already granted") + configureAudioForCall() } } @@ -48,6 +50,49 @@ class MainActivity : TauriActivity() { val allGranted = grantResults.isNotEmpty() && grantResults.all { it == PackageManager.PERMISSION_GRANTED } Log.i(TAG, "audio permissions result: allGranted=$allGranted grants=${grantResults.toList()}") + if (allGranted) { + configureAudioForCall() + } + } + } + + /** + * Put the phone into VoIP-call audio mode so that the Oboe playout stream + * (opened with Usage::VoiceCommunication) actually routes to the loud + * speaker and uses the in-call volume slider. Without this, the stream is + * accepted by AAudio, the callback is driven at realtime with valid PCM, + * and nothing is audible because the OS routes the stream to a muted or + * unavailable output. See build 96be740's logcat for the full proof: + * playout callback played 1055040 samples in 22s with RMS up to 2318 and + * still produced zero audible output, which was the smoking gun pointing + * at this AudioManager state rather than the Rust pipeline. + * + * This is a temporary "call mode always on" setup — fine for smoke tests + * and the current single-purpose VoIP app. A polished version should + * setMode(IN_COMMUNICATION) only while a call is active and restore + * MODE_NORMAL on hangup, with proper audio-focus requests. + */ + private fun configureAudioForCall() { + try { + val am = getSystemService(Context.AUDIO_SERVICE) as AudioManager + Log.i(TAG, "audio mode before: ${am.mode} speaker=${am.isSpeakerphoneOn} " + + "voiceVol=${am.getStreamVolume(AudioManager.STREAM_VOICE_CALL)}/" + + "${am.getStreamMaxVolume(AudioManager.STREAM_VOICE_CALL)} " + + "musicVol=${am.getStreamVolume(AudioManager.STREAM_MUSIC)}/" + + "${am.getStreamMaxVolume(AudioManager.STREAM_MUSIC)}") + + am.mode = AudioManager.MODE_IN_COMMUNICATION + am.isSpeakerphoneOn = true + + // Nudge volumes to max so the smoke test can actually hear something. + // Users can adjust with the hardware volume buttons afterwards. + val maxVoice = am.getStreamMaxVolume(AudioManager.STREAM_VOICE_CALL) + am.setStreamVolume(AudioManager.STREAM_VOICE_CALL, maxVoice, 0) + + Log.i(TAG, "audio mode after: ${am.mode} speaker=${am.isSpeakerphoneOn} " + + "voiceVol=${am.getStreamVolume(AudioManager.STREAM_VOICE_CALL)}/$maxVoice") + } catch (e: Throwable) { + Log.e(TAG, "configureAudioForCall failed: ${e.message}", e) } } } diff --git a/desktop/src-tauri/src/engine.rs b/desktop/src-tauri/src/engine.rs index 8d82d8a..b6dc272 100644 --- a/desktop/src-tauri/src/engine.rs +++ b/desktop/src-tauri/src/engine.rs @@ -300,6 +300,33 @@ impl CallEngine { let mut pcm = vec![0i16; FRAME_SAMPLES_40MS]; info!(codec = ?current_codec, "recv task starting (android/oboe)"); + // ─── Decoded-PCM recorder (debug) ──────────────────────────── + // Dumps the first ~10 seconds of post-AGC PCM to a raw i16 LE + // file in the app's private data dir so we can adb pull it and + // play it back to prove the pipeline is producing real audio + // independent of Oboe routing. Convert locally with e.g. + // ffmpeg -f s16le -ar 48000 -ac 1 -i decoded.pcm decoded.wav + use std::io::Write; + let recorder_path = crate::APP_DATA_DIR + .get() + .map(|p| p.join("decoded.pcm")); + let mut recorder = match recorder_path.as_ref() { + Some(p) => match std::fs::File::create(p) { + Ok(f) => { + info!(path = %p.display(), "decoded-pcm recorder open"); + Some(std::io::BufWriter::new(f)) + } + Err(e) => { + tracing::warn!(path = %p.display(), error = %e, "decoded-pcm recorder open failed"); + None + } + }, + None => None, + }; + let mut recorder_bytes: u64 = 0; + // Stop writing after ~10 seconds @ 48kHz mono i16 = ~960KB. + const RECORDER_MAX_BYTES: u64 = 48_000 * 2 * 10; + let mut heartbeat = std::time::Instant::now(); let mut decoded_frames: u64 = 0; let mut written_samples: u64 = 0; @@ -372,6 +399,33 @@ impl CallEngine { ); } agc.process_frame(&mut pcm[..n]); + + // Dump to debug recorder before playout + // so we capture post-AGC samples that + // are exactly what we hand to Oboe. + if let Some(rec) = recorder.as_mut() { + if recorder_bytes < RECORDER_MAX_BYTES { + let slice = &pcm[..n]; + // SAFETY: i16 is Plain Old Data; + // writing its little-endian bytes + // is well-defined on all targets + // we build for. + let byte_slice: &[u8] = unsafe { + std::slice::from_raw_parts( + slice.as_ptr() as *const u8, + slice.len() * 2, + ) + }; + let _ = rec.write_all(byte_slice); + recorder_bytes = recorder_bytes + .saturating_add(byte_slice.len() as u64); + if recorder_bytes >= RECORDER_MAX_BYTES { + let _ = rec.flush(); + info!(recorder_bytes, "decoded-pcm recorder: stopped after limit"); + } + } + } + if !recv_spk.load(Ordering::Relaxed) { let w = crate::wzp_native::audio_write_playout(&pcm[..n]); last_written = w; @@ -379,6 +433,9 @@ impl CallEngine { if w < n && decoded_frames <= 10 { tracing::warn!(n, w, "recv: partial playout write (ring nearly full)"); } + } else if decoded_frames <= 3 || decoded_frames % 100 == 0 { + // User clicked spk-mute — log it so we don't chase ghost bugs + tracing::info!(decoded_frames, "recv: spk_muted=true, skipping playout write"); } } Err(e) => {