1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
//! A safe api into the Spatial Audio Framework.

use crate::saf_raw;
use libc::c_void;
use std::ptr::{addr_of_mut, null, null_mut};

// Sets all audio channel distances to 1 meter—— stretch goal to specify per channel
const SAMP_RATE: usize = 44100;
const NUM_OUT_CHANNELS: usize = 2;

/// The number of samples that can be processed in one frame by a [`Binauraliser`].
pub const FRAME_SIZE: usize = 128;

const RAD_TO_DEGREE: f32 = 180.0 / std::f32::consts::PI;

/// A Binauraliser is anything that can take an array of sound buffers, paired
/// with their associated metadata, and return a pair of freshly allocated
/// buffers representing the mixed stereo audio.
pub trait Binauraliser {
    /// Takes a slice of audio data tuples for each sound source. Each tuple
    /// contains 128 frames of float sound data and a [`BufferMetadata`],
    /// which encodes the sound source's location, range, and gain over that
    /// frame period.
    ///
    /// Returns a pair of vectors containing the mixed binaural audio.
    ///
    /// Invariant: All input buffers must be the same length, of 128
    fn process_frame(&mut self, buffers: &[(BufferMetadata, &[f32])]) -> (Vec<f32>, Vec<f32>);

    /// Takes a slice of audio data tuples for each sound source. Each tuple
    /// contains float sound data and a BufferMetadata, which encodes the
    /// sound source's location, range, and gain over that frame period.
    fn process(&mut self, buffers: &[(BufferMetadata, &[f32])]) -> (Vec<f32>, Vec<f32>) {
        let len = buffers
            .iter()
            .map(|(_tag, samples)| samples.len())
            .max()
            .unwrap_or(0);

        for (_tag, samples) in buffers.iter() {
            debug_assert_eq!(0, samples.len() % FRAME_SIZE);
        }

        let mut final_left_vec = Vec::with_capacity(len);
        let mut final_right_vec = Vec::with_capacity(len);

        for i in (0..len).step_by(FRAME_SIZE) {
            let buf_lo = i;
            let buf_hi = i + FRAME_SIZE;

            let frame = buffers
                .iter()
                .map(|(metadata, samples)| (*metadata, &samples[buf_lo..buf_hi]))
                .collect::<Vec<_>>();

            let (mut left_vec, mut right_vec) = self.process_frame(&frame);

            final_left_vec.append(&mut left_vec);
            final_right_vec.append(&mut right_vec);
        }

        (final_left_vec, final_right_vec)
    }
}

/// The metadata associated with an audio stream. Includes the buffer's angular
/// position, range, and gain.
#[derive(Clone, Copy, Debug)]
pub struct BufferMetadata {
    /// The azimuth of the sound from the listener, in degrees, with 0.0 being
    /// directly in front of the listener
    pub azimuth: f32,
    /// The elevation of the sound from the listener, in degrees, with 0.0
    /// indicating that the sound is level with the listener's head
    pub elevation: f32,
    /// The distance of the sound from the listener
    pub range: f32,
    /// Amount of amplification applied to a signal
    pub gain: f32,
}

/// Implementation of [`Binauraliser`] that uses SAF's BinauraliserNF (Near Field)
pub struct BinauraliserNF {
    /// stores C-style BinauraliserNF object, for use in libsaf
    h_bin: *mut c_void,
}

impl BinauraliserNF {
    /// Creates a new [`BinauraliserNF`]
    pub fn new() -> Self {
        let mut h_bin = null_mut();
        unsafe {
            saf_raw::binauraliserNF_create(addr_of_mut!(h_bin));

            // initialize sample rate
            saf_raw::binauraliserNF_init(h_bin, SAMP_RATE as i32);
            saf_raw::binauraliser_setUseDefaultHRIRsflag(h_bin, 1);
        }

        BinauraliserNF { h_bin }
    }
}

impl Binauraliser for BinauraliserNF {
    fn process_frame(&mut self, buffers: &[(BufferMetadata, &[f32])]) -> (Vec<f32>, Vec<f32>) {
        for (_, b) in buffers {
            debug_assert_eq!(b.len(), FRAME_SIZE);
        }
        // convert each slice in buffers to a raw pointer
        let num_channels: usize = buffers.len();

        // allocate input and output buffers for process() call
        let mut raw_input_ptrs: Vec<*const f32> = vec![null(); num_channels];

        let mut output_vec_1 = vec![0.0; FRAME_SIZE];
        let mut output_vec_2 = vec![0.0; FRAME_SIZE];

        let mut raw_output_ptrs: Vec<*mut f32> = vec![null_mut(); NUM_OUT_CHANNELS];

        raw_output_ptrs[0] = output_vec_1.as_mut_ptr();
        raw_output_ptrs[1] = output_vec_2.as_mut_ptr();

        unsafe {
            saf_raw::binauraliser_setNumSources(self.h_bin, num_channels as i32);

            for (i, &(metadata, audio_data)) in buffers.iter().enumerate() {
                // store raw pointer for channel in raw_data_ptrs
                raw_input_ptrs[i] = audio_data.as_ptr();

                // set distance, azimuth, and elevation for each channel
                saf_raw::binauraliserNF_setSourceDist_m(self.h_bin, i as i32, metadata.range);
                saf_raw::binauraliser_setSourceAzi_deg(
                    self.h_bin,
                    i as i32,
                    metadata.azimuth * RAD_TO_DEGREE,
                );
                saf_raw::binauraliser_setSourceElev_deg(
                    self.h_bin,
                    i as i32,
                    metadata.elevation * RAD_TO_DEGREE,
                );
                saf_raw::binauraliser_setSourceGain(self.h_bin, i as i32, metadata.gain);
            }

            // note: must initialize codec variables after setting positional
            // data for each of the sound sources
            saf_raw::binauraliserNF_initCodec(self.h_bin);

            // call process() to convert to binaural audio
            saf_raw::binauraliserNF_process(
                self.h_bin,
                raw_input_ptrs.as_ptr(),  // N inputs x K samples
                raw_output_ptrs.as_ptr(), // N inputs x K samples
                num_channels as i32,      // N inputs
                NUM_OUT_CHANNELS as i32,  // N outputs
                FRAME_SIZE as i32,        // K samples
            );
        }

        (output_vec_1, output_vec_2)
    }
}

impl Default for BinauraliserNF {
    fn default() -> Self {
        Self::new()
    }
}

/// Frees memory associated with BinauraliserNF struct
impl Drop for BinauraliserNF {
    fn drop(&mut self) {
        unsafe {
            saf_raw::binauraliserNF_destroy(addr_of_mut!(self.h_bin));
        }
    }
}

#[allow(dead_code)]
struct DummyBinauraliser;

impl Binauraliser for DummyBinauraliser {
    fn process_frame(&mut self, buffers: &[(BufferMetadata, &[f32])]) -> (Vec<f32>, Vec<f32>) {
        for (_, b) in buffers {
            debug_assert_eq!(b.len(), FRAME_SIZE);
        }
        assert!(buffers.len() == 2);
        (buffers[0].1.to_vec(), buffers[1].1.to_vec())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    use hound::{SampleFormat, WavSpec, WavWriter};
    use std::f32::consts::PI;

    const MOCK_METADATA: BufferMetadata = BufferMetadata {
        azimuth: 0.0,
        elevation: 0.0,
        range: 1.0,
        gain: 1.0,
    };

    const LEFT_METADATA: BufferMetadata = BufferMetadata {
        azimuth: 90.0,
        elevation: 10.0,
        range: 1.0,
        gain: 1.0,
    };

    const RIGHT_METADATA: BufferMetadata = BufferMetadata {
        azimuth: 90.0,
        elevation: 0.0,
        range: 1.0,
        gain: 1.0,
    };

    const C: f32 = 261.61;
    const G: f32 = 392.00;

    fn create_sine_wave(len: usize, note: f32) -> Vec<f32> {
        let snapped_len = len.div_ceil(FRAME_SIZE) * FRAME_SIZE;

        (0..snapped_len)
            .map(|x| (x % 44100) as f32 / 44100.0)
            .map(|t| (t * note * 2.0 * PI).sin() * (i16::MAX as f32))
            .collect()
    }

    #[allow(unused)]
    fn write_stereo_output(left_samps: Vec<f32>, right_samps: Vec<f32>, out_file: &'static str) {
        let spec = WavSpec {
            channels: 2,
            sample_rate: 44100,
            bits_per_sample: 16,
            sample_format: SampleFormat::Int,
        };

        let mut writer = WavWriter::create(out_file, spec).unwrap();

        for (left, right) in std::iter::zip(left_samps, right_samps) {
            writer.write_sample(left as i16).unwrap();
            writer.write_sample(right as i16).unwrap();
        }

        writer.finalize().unwrap();
    }

    #[test]
    ///
    /// Validate that runnning process_frame() doesn't segfault on mono
    /// audio data
    ///
    fn test_mono_single_frame() {
        let mut binauraliser_nf = BinauraliserNF::new();

        // 1 frame of audio (128 samples)
        let c_note_vec: Vec<f32> = create_sine_wave(FRAME_SIZE, C);
        let frame_slice = [(MOCK_METADATA, &c_note_vec[0..FRAME_SIZE])];

        // assert no segfault and that data is non-null
        let (left_samps, right_samps) = binauraliser_nf.process_frame(frame_slice.as_ref());
        assert!(left_samps.into_iter().all(|x| x != 0.0));
        assert!(right_samps.into_iter().all(|x| x != 0.0));
    }

    #[test]
    ///
    /// Validate that runnning process_frame() doesn't segfault on stereo
    /// audio data
    ///
    fn test_stereo_single_frame() {
        let mut binauraliser_nf = BinauraliserNF::new();

        // 1 frame of audio (128 samples)
        let c_note_vec: Vec<f32> = create_sine_wave(FRAME_SIZE, C);
        let g_note_vec: Vec<f32> = create_sine_wave(FRAME_SIZE, G);

        let frame_slice = [
            (LEFT_METADATA, c_note_vec.as_slice()),
            (RIGHT_METADATA, g_note_vec.as_slice()),
        ];

        // assert no segfault and that data is non-null
        let (left_samps, right_samps) = binauraliser_nf.process_frame(frame_slice.as_ref());
        assert!(left_samps.into_iter().all(|x| x != 0.0));
        assert!(right_samps.into_iter().all(|x| x != 0.0));
    }

    #[test]
    fn test_stereo_multi_frame() {
        let mut binauraliser_nf = BinauraliserNF::new();

        const THREE_SEC: usize = SAMP_RATE * 3;
        // 1 frame of audio (128 samples)
        let c_note_vec: Vec<f32> = create_sine_wave(THREE_SEC, C);
        let g_note_vec: Vec<f32> = create_sine_wave(THREE_SEC, G);

        assert_eq!(0, c_note_vec.len() % FRAME_SIZE);

        let frame_slice = [
            (LEFT_METADATA, c_note_vec.as_slice()),
            (RIGHT_METADATA, g_note_vec.as_slice()),
        ];

        // assert no segfault and that data is non-null
        let (left_samps, right_samps) = binauraliser_nf.process(frame_slice.as_ref());
        assert!(left_samps.clone().into_iter().all(|x| x != 0.0));
        assert!(right_samps.clone().into_iter().all(|x| x != 0.0));
    }
}