Source/WebCore/platform/audio/SincResampler.cpp - WebKit - Git at Google

 /*
  * Copyright (C) 2011 Google Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1.  Redistributions of source code must retain the above copyright
  *     notice, this list of conditions and the following disclaimer.
  * 2.  Redistributions in binary form must reproduce the above copyright
  *     notice, this list of conditions and the following disclaimer in the
  *     documentation and/or other materials provided with the distribution.
  * 3.  Neither the name of Apple Inc. ("Apple") nor the names of
  *     its contributors may be used to endorse or promote products derived
  *     from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 #include "config.h"

 #if ENABLE(WEB_AUDIO)

 #include "SincResampler.h"

 #include "AudioBus.h"
 #include "AudioUtilities.h"
 #include <wtf/MathExtras.h>

 #if USE(ACCELERATE)
 #include <Accelerate/Accelerate.h>
 #elif CPU(X86_SSE2)
 #include <xmmintrin.h>
 #elif HAVE(ARM_NEON_INTRINSICS)
 #include <arm_neon.h>
 #endif

 // Initial input buffer layout, dividing into regions r0 to r4 (note: r0, r3
 // and r4 will move after the first load):
 //
 // |----------------|----------------------------------------------------------------|----------------|
 //
 //                                              m_requestFrames
 //                   <-------------------------------------------------------------------------------->
 //                                           r0 (during first load)
 //
 //   kernelSize / 2   kernelSize / 2                                 kernelSize / 2     kernelSize / 2
 // <---------------> <--------------->                              <---------------> <--------------->
 //         r1                r2                                             r3                r4
 //
 //                             m_blockSize == r4 - r2
 //                   <--------------------------------------->
 //
 //                                                  m_requestFrames
 //                                    <------------------ ... ----------------->
 //                                               r0 (during second load)
 //
 // On the second request r0 slides to the right by kernelSize / 2 and r3, r4
 // and m_blockSize are reinitialized via step (3) in the algorithm below.
 //
 // These new regions remain constant until a Flush() occurs. While complicated,
 // this allows us to reduce jitter by always requesting the same amount from the
 // provided callback.

 // The Algorithm:
 //
 // 1) Allocate input_buffer of size: m_requestFrames + kernelSize; this ensures
 //    there's enough room to read m_requestFrames from the callback into region
 //    r0 (which will move between the first and subsequent passes).
 //
 // 2) Let r1, r2 each represent half the kernel centered around r0:
 //
 //        r0 = m_inputBuffer + kernelSize / 2
 //        r1 = m_inputBuffer
 //        r2 = r0
 //
 //    r0 is always m_requestFrames in size. r1, r2 are kernelSize / 2 in
 //    size. r1 must be zero initialized to avoid convolution with garbage (see
 //    step (5) for why).
 //
 // 3) Let r3, r4 each represent half the kernel right aligned with the end of
 //    r0 and choose m_blockSize as the distance in frames between r4 and r2:
 //
 //        r3 = r0 + m_requestFrames - kernelSize
 //        r4 = r0 + m_requestFrames - kernelSize / 2
 //        m_blockSize = r4 - r2 = m_requestFrames - kernelSize / 2
 //
 // 4) Consume m_requestFrames frames into r0.
 //
 // 5) Position kernel centered at start of r2 and generate output frames until
 //    the kernel is centered at the start of r4 or we've finished generating
 //    all the output frames.
 //
 // 6) Wrap left over data from the r3 to r1 and r4 to r2.
 //
 // 7) If we're on the second load, in order to avoid overwriting the frames we
 //    just wrapped from r4 we need to slide r0 to the right by the size of
 //    r4, which is kernelSize / 2:
 //
 //        r0 = r0 + kernelSize / 2 = m_inputBuffer + kernelSize
 //
 //    r3, r4, and m_blockSize then need to be reinitialized, so goto (3).
 //
 // 8) Else, if we're not on the second load, goto (4).
 //
 // note: we're glossing over how the sub-sample handling works with m_virtualSourceIndex, etc.

 namespace WebCore {

 constexpr unsigned kernelSize { 32 };
 constexpr unsigned numberOfKernelOffsets { 32 };
 constexpr unsigned kernelStorageSize { kernelSize * (numberOfKernelOffsets + 1) };

 static size_t calculateChunkSize(unsigned blockSize, double scaleFactor)
 {
     return blockSize / scaleFactor;
 }

 SincResampler::SincResampler(double scaleFactor, unsigned requestFrames, Function<void(float* buffer, size_t framesToProcess)>&& provideInput)
     : m_scaleFactor(scaleFactor)
     , m_kernelStorage(kernelStorageSize)
     , m_requestFrames(requestFrames)
     , m_provideInput(WTFMove(provideInput))
     , m_inputBuffer(m_requestFrames + kernelSize) // See input buffer layout above.
     , m_r1(m_inputBuffer.data())
     , m_r2(m_inputBuffer.data() + kernelSize / 2)
 {
     ASSERT(m_provideInput);
     ASSERT(m_requestFrames > 0);
     updateRegions(false);
     ASSERT(m_blockSize > kernelSize);
     initializeKernel();
 }

 void SincResampler::updateRegions(bool isSecondLoad)
 {
     // Setup various region pointers in the buffer (see diagram above). If we're
     // on the second load we need to slide m_r0 to the right by kernelSize / 2.
     m_r0 = m_inputBuffer.data() + (isSecondLoad ? kernelSize : kernelSize / 2);
     m_r3 = m_r0 + m_requestFrames - kernelSize;
     m_r4 = m_r0 + m_requestFrames - kernelSize / 2;
     m_blockSize = m_r4 - m_r2;
     m_chunkSize = calculateChunkSize(m_blockSize, m_scaleFactor);

     // m_r1 at the beginning of the buffer.
     ASSERT(m_r1 == m_inputBuffer.data());
     // m_r1 left of m_r2, m_r4 left of m_r3 and size correct.
     ASSERT((m_r2 - m_r1) == (m_r4 - m_r3));
     // m_r2 left of r3.
     ASSERT(m_r2 <= m_r3);
 }

 void SincResampler::initializeKernel()
 {
     // Blackman window parameters.
     double alpha = 0.16;
     double a0 = 0.5 * (1.0 - alpha);
     double a1 = 0.5;
     double a2 = 0.5 * alpha;

     // sincScaleFactor is basically the normalized cutoff frequency of the low-pass filter.
     double sincScaleFactor = m_scaleFactor > 1.0 ? 1.0 / m_scaleFactor : 1.0;

     // The sinc function is an idealized brick-wall filter, but since we're windowing it the
     // transition from pass to stop does not happen right away. So we should adjust the
     // lowpass filter cutoff slightly downward to avoid some aliasing at the very high-end.
     // FIXME: this value is empirical and to be more exact should vary depending on kernelSize.
     sincScaleFactor *= 0.9;

     int n = kernelSize;
     int halfSize = n / 2;

     // Generates a set of windowed sinc() kernels.
     // We generate a range of sub-sample offsets from 0.0 to 1.0.
     for (unsigned offsetIndex = 0; offsetIndex <= numberOfKernelOffsets; ++offsetIndex) {
         double subsampleOffset = static_cast<double>(offsetIndex) / numberOfKernelOffsets;

         for (int i = 0; i < n; ++i) {
             // Compute the sinc() with offset.
             double s = sincScaleFactor * piDouble * (i - halfSize - subsampleOffset);
             double sinc = !s ? 1.0 : sin(s) / s;
             sinc *= sincScaleFactor;

             // Compute Blackman window, matching the offset of the sinc().
             double x = (i - subsampleOffset) / n;
             double window = a0 - a1 * cos(2.0 * piDouble * x) + a2 * cos(4.0 * piDouble * x);

             // Window the sinc() function and store at the correct offset.
             m_kernelStorage[i + offsetIndex * kernelSize] = sinc * window;
         }
     }
 }

 void SincResampler::processBuffer(const float* source, float* destination, unsigned numberOfSourceFrames, double scaleFactor)
 {
     SincResampler resampler(scaleFactor, AudioUtilities::renderQuantumSize, [source, numberOfSourceFrames](float* buffer, size_t framesToProcess) mutable {
         // Clamp to number of frames available and zero-pad.
         size_t framesToCopy = std::min<size_t>(numberOfSourceFrames, framesToProcess);
         memcpy(buffer, source, sizeof(float) * framesToCopy);

         // Zero-pad if necessary.
         if (framesToCopy < framesToProcess)
             memset(buffer + framesToCopy, 0, sizeof(float) * (framesToProcess - framesToCopy));

         numberOfSourceFrames -= framesToCopy;
         source += framesToCopy;
     });

     unsigned numberOfDestinationFrames = static_cast<unsigned>(numberOfSourceFrames / scaleFactor);
     unsigned remaining = numberOfDestinationFrames;

     while (remaining) {
         unsigned framesThisTime = std::min<unsigned>(remaining, AudioUtilities::renderQuantumSize);
         resampler.process(destination, framesThisTime);

         destination += framesThisTime;
         remaining -= framesThisTime;
     }
 }

 void SincResampler::process(float* destination, size_t framesToProcess)
 {
     unsigned numberOfDestinationFrames = framesToProcess;

     // Step (1)
     // Prime the input buffer at the start of the input stream.
     if (!m_isBufferPrimed) {
         m_provideInput(m_r0, m_requestFrames);
         m_isBufferPrimed = true;
     }

     // Step (2)

     while (numberOfDestinationFrames) {
         while (m_virtualSourceIndex < m_blockSize) {
             // m_virtualSourceIndex lies in between two kernel offsets so figure out what they are.
             int sourceIndexI = static_cast<int>(m_virtualSourceIndex);
             double subsampleRemainder = m_virtualSourceIndex - sourceIndexI;

             double virtualOffsetIndex = subsampleRemainder * numberOfKernelOffsets;
             int offsetIndex = static_cast<int>(virtualOffsetIndex);

             float* k1 = m_kernelStorage.data() + offsetIndex * kernelSize;
             float* k2 = k1 + kernelSize;

             // Ensure |k1|, |k2| are 16-byte aligned for SIMD usage. Should always be true so long as kernelSize is a multiple of 16.
             ASSERT(!(reinterpret_cast<uintptr_t>(k1) & 0x0F));
             ASSERT(!(reinterpret_cast<uintptr_t>(k2) & 0x0F));

             // Initialize input pointer based on quantized m_virtualSourceIndex.
             float* inputP = m_r1 + sourceIndexI;

             // Figure out how much to weight each kernel's "convolution".
             double kernelInterpolationFactor = virtualOffsetIndex - offsetIndex;

             *destination++ = convolve(inputP, k1, k2, kernelInterpolationFactor);

             // Advance the virtual index.
             m_virtualSourceIndex += m_scaleFactor;

             --numberOfDestinationFrames;
             if (!numberOfDestinationFrames)
                 return;
         }

         // Wrap back around to the start.
         ASSERT(m_virtualSourceIndex >= m_blockSize);
         m_virtualSourceIndex -= m_blockSize;

         // Step (3) Copy r3 to r1.
         // This wraps the last input frames back to the start of the buffer.
         memcpy(m_r1, m_r3, sizeof(float) * kernelSize);

         // Step (4) -- Reinitialize regions if necessary.
         if (m_r0 == m_r2)
             updateRegions(true);

         // Step (5)
         // Refresh the buffer with more input.
         m_provideInput(m_r0, m_requestFrames);
     }
 }

 float SincResampler::convolve(const float* inputP, const float* k1, const float* k2, float kernelInterpolationFactor)
 {
 #if USE(ACCELERATE)
     float sum1;
     float sum2;
     vDSP_dotpr(inputP, 1, k1, 1, &sum1, kernelSize);
     vDSP_dotpr(inputP, 1, k2, 1, &sum2, kernelSize);

     // Linearly interpolate the two "convolutions".
     return (1.0f - kernelInterpolationFactor) * sum1 + kernelInterpolationFactor * sum2;
 #elif CPU(X86_SSE2)
     __m128 m_input;
     __m128 m_sums1 = _mm_setzero_ps();
     __m128 m_sums2 = _mm_setzero_ps();

     // Based on |inputP| alignment, we need to use loadu or load.
     if (reinterpret_cast<uintptr_t>(inputP) & 0x0F) {
         for (unsigned i = 0; i < kernelSize; i += 4) {
             m_input = _mm_loadu_ps(inputP + i);
             m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i)));
             m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i)));
         }
     } else {
         for (unsigned i = 0; i < kernelSize; i += 4) {
             m_input = _mm_load_ps(inputP + i);
             m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i)));
             m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i)));
         }
     }

     // Linearly interpolate the two "convolutions".
     m_sums1 = _mm_mul_ps(m_sums1, _mm_set_ps1(1.0f - kernelInterpolationFactor));
     m_sums2 = _mm_mul_ps(m_sums2, _mm_set_ps1(kernelInterpolationFactor));
     m_sums1 = _mm_add_ps(m_sums1, m_sums2);

     // Sum components together.
     float result;
     m_sums2 = _mm_add_ps(_mm_movehl_ps(m_sums1, m_sums1), m_sums1);
     _mm_store_ss(&result, _mm_add_ss(m_sums2, _mm_shuffle_ps(m_sums2, m_sums2, 1)));

     return result;
 #elif HAVE(ARM_NEON_INTRINSICS)
     float32x4_t m_input;
     float32x4_t m_sums1 = vmovq_n_f32(0);
     float32x4_t m_sums2 = vmovq_n_f32(0);

     const float* upper = inputP + kernelSize;
     for (; inputP < upper; ) {
         m_input = vld1q_f32(inputP);
         inputP += 4;
         m_sums1 = vmlaq_f32(m_sums1, m_input, vld1q_f32(k1));
         k1 += 4;
         m_sums2 = vmlaq_f32(m_sums2, m_input, vld1q_f32(k2));
         k2 += 4;
     }

     // Linearly interpolate the two "convolutions".
     m_sums1 = vmlaq_f32(vmulq_f32(m_sums1, vmovq_n_f32(1.0 - kernelInterpolationFactor)), m_sums2, vmovq_n_f32(kernelInterpolationFactor));

     // Sum components together.
     float32x2_t m_half = vadd_f32(vget_high_f32(m_sums1), vget_low_f32(m_sums1));
     return vget_lane_f32(vpadd_f32(m_half, m_half), 0);
 #else
     float sum1 = 0;
     float sum2 = 0;

     // Generate a single output sample.
     int n = kernelSize;
     while (n--) {
         sum1 += *inputP * *k1++;
         sum2 += *inputP++ * *k2++;
     }

     // Linearly interpolate the two "convolutions".
     return (1.0f - kernelInterpolationFactor) * sum1 + kernelInterpolationFactor * sum2;
 #endif
 }

 } // namespace WebCore

 #endif // ENABLE(WEB_AUDIO)
	/*
	* Copyright (C) 2011 Google Inc. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	*
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 3. Neither the name of Apple Inc. ("Apple") nor the names of
	* its contributors may be used to endorse or promote products derived
	* from this software without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
	* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	* DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
	* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
	* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
	* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/

	#include "config.h"

	#if ENABLE(WEB_AUDIO)

	#include "SincResampler.h"

	#include "AudioBus.h"
	#include "AudioUtilities.h"
	#include <wtf/MathExtras.h>

	#if USE(ACCELERATE)
	#include <Accelerate/Accelerate.h>
	#elif CPU(X86_SSE2)
	#include <xmmintrin.h>
	#elif HAVE(ARM_NEON_INTRINSICS)
	#include <arm_neon.h>
	#endif

	// Initial input buffer layout, dividing into regions r0 to r4 (note: r0, r3
	// and r4 will move after the first load):
	//
	// \|----------------\|----------------------------------------------------------------\|----------------\|
	//
	// m_requestFrames
	// <-------------------------------------------------------------------------------->
	// r0 (during first load)
	//
	// kernelSize / 2 kernelSize / 2 kernelSize / 2 kernelSize / 2
	// <---------------> <---------------> <---------------> <--------------->
	// r1 r2 r3 r4
	//
	// m_blockSize == r4 - r2
	// <--------------------------------------->
	//
	// m_requestFrames
	// <------------------ ... ----------------->
	// r0 (during second load)
	//
	// On the second request r0 slides to the right by kernelSize / 2 and r3, r4
	// and m_blockSize are reinitialized via step (3) in the algorithm below.
	//
	// These new regions remain constant until a Flush() occurs. While complicated,
	// this allows us to reduce jitter by always requesting the same amount from the
	// provided callback.

	// The Algorithm:
	//
	// 1) Allocate input_buffer of size: m_requestFrames + kernelSize; this ensures
	// there's enough room to read m_requestFrames from the callback into region
	// r0 (which will move between the first and subsequent passes).
	//
	// 2) Let r1, r2 each represent half the kernel centered around r0:
	//
	// r0 = m_inputBuffer + kernelSize / 2
	// r1 = m_inputBuffer
	// r2 = r0
	//
	// r0 is always m_requestFrames in size. r1, r2 are kernelSize / 2 in
	// size. r1 must be zero initialized to avoid convolution with garbage (see
	// step (5) for why).
	//
	// 3) Let r3, r4 each represent half the kernel right aligned with the end of
	// r0 and choose m_blockSize as the distance in frames between r4 and r2:
	//
	// r3 = r0 + m_requestFrames - kernelSize
	// r4 = r0 + m_requestFrames - kernelSize / 2
	// m_blockSize = r4 - r2 = m_requestFrames - kernelSize / 2
	//
	// 4) Consume m_requestFrames frames into r0.
	//
	// 5) Position kernel centered at start of r2 and generate output frames until
	// the kernel is centered at the start of r4 or we've finished generating
	// all the output frames.
	//
	// 6) Wrap left over data from the r3 to r1 and r4 to r2.
	//
	// 7) If we're on the second load, in order to avoid overwriting the frames we
	// just wrapped from r4 we need to slide r0 to the right by the size of
	// r4, which is kernelSize / 2:
	//
	// r0 = r0 + kernelSize / 2 = m_inputBuffer + kernelSize
	//
	// r3, r4, and m_blockSize then need to be reinitialized, so goto (3).
	//
	// 8) Else, if we're not on the second load, goto (4).
	//
	// note: we're glossing over how the sub-sample handling works with m_virtualSourceIndex, etc.

	namespace WebCore {

	constexpr unsigned kernelSize { 32 };
	constexpr unsigned numberOfKernelOffsets { 32 };
	constexpr unsigned kernelStorageSize { kernelSize * (numberOfKernelOffsets + 1) };

	static size_t calculateChunkSize(unsigned blockSize, double scaleFactor)
	{
	return blockSize / scaleFactor;
	}

	SincResampler::SincResampler(double scaleFactor, unsigned requestFrames, Function<void(float* buffer, size_t framesToProcess)>&& provideInput)
	: m_scaleFactor(scaleFactor)
	, m_kernelStorage(kernelStorageSize)
	, m_requestFrames(requestFrames)
	, m_provideInput(WTFMove(provideInput))
	, m_inputBuffer(m_requestFrames + kernelSize) // See input buffer layout above.
	, m_r1(m_inputBuffer.data())
	, m_r2(m_inputBuffer.data() + kernelSize / 2)
	{
	ASSERT(m_provideInput);
	ASSERT(m_requestFrames > 0);
	updateRegions(false);
	ASSERT(m_blockSize > kernelSize);
	initializeKernel();
	}

	void SincResampler::updateRegions(bool isSecondLoad)
	{
	// Setup various region pointers in the buffer (see diagram above). If we're
	// on the second load we need to slide m_r0 to the right by kernelSize / 2.
	m_r0 = m_inputBuffer.data() + (isSecondLoad ? kernelSize : kernelSize / 2);
	m_r3 = m_r0 + m_requestFrames - kernelSize;
	m_r4 = m_r0 + m_requestFrames - kernelSize / 2;
	m_blockSize = m_r4 - m_r2;
	m_chunkSize = calculateChunkSize(m_blockSize, m_scaleFactor);

	// m_r1 at the beginning of the buffer.
	ASSERT(m_r1 == m_inputBuffer.data());
	// m_r1 left of m_r2, m_r4 left of m_r3 and size correct.
	ASSERT((m_r2 - m_r1) == (m_r4 - m_r3));
	// m_r2 left of r3.
	ASSERT(m_r2 <= m_r3);
	}

	void SincResampler::initializeKernel()
	{
	// Blackman window parameters.
	double alpha = 0.16;
	double a0 = 0.5 * (1.0 - alpha);
	double a1 = 0.5;
	double a2 = 0.5 * alpha;

	// sincScaleFactor is basically the normalized cutoff frequency of the low-pass filter.
	double sincScaleFactor = m_scaleFactor > 1.0 ? 1.0 / m_scaleFactor : 1.0;

	// The sinc function is an idealized brick-wall filter, but since we're windowing it the
	// transition from pass to stop does not happen right away. So we should adjust the
	// lowpass filter cutoff slightly downward to avoid some aliasing at the very high-end.
	// FIXME: this value is empirical and to be more exact should vary depending on kernelSize.
	sincScaleFactor *= 0.9;

	int n = kernelSize;
	int halfSize = n / 2;

	// Generates a set of windowed sinc() kernels.
	// We generate a range of sub-sample offsets from 0.0 to 1.0.
	for (unsigned offsetIndex = 0; offsetIndex <= numberOfKernelOffsets; ++offsetIndex) {
	double subsampleOffset = static_cast<double>(offsetIndex) / numberOfKernelOffsets;

	for (int i = 0; i < n; ++i) {
	// Compute the sinc() with offset.
	double s = sincScaleFactor * piDouble * (i - halfSize - subsampleOffset);
	double sinc = !s ? 1.0 : sin(s) / s;
	sinc *= sincScaleFactor;

	// Compute Blackman window, matching the offset of the sinc().
	double x = (i - subsampleOffset) / n;
	double window = a0 - a1 * cos(2.0 * piDouble * x) + a2 * cos(4.0 * piDouble * x);

	// Window the sinc() function and store at the correct offset.
	m_kernelStorage[i + offsetIndex * kernelSize] = sinc * window;
	}
	}
	}

	void SincResampler::processBuffer(const float* source, float* destination, unsigned numberOfSourceFrames, double scaleFactor)
	{
	SincResampler resampler(scaleFactor, AudioUtilities::renderQuantumSize, [source, numberOfSourceFrames](float* buffer, size_t framesToProcess) mutable {
	// Clamp to number of frames available and zero-pad.
	size_t framesToCopy = std::min<size_t>(numberOfSourceFrames, framesToProcess);
	memcpy(buffer, source, sizeof(float) * framesToCopy);

	// Zero-pad if necessary.
	if (framesToCopy < framesToProcess)
	memset(buffer + framesToCopy, 0, sizeof(float) * (framesToProcess - framesToCopy));

	numberOfSourceFrames -= framesToCopy;
	source += framesToCopy;
	});

	unsigned numberOfDestinationFrames = static_cast<unsigned>(numberOfSourceFrames / scaleFactor);
	unsigned remaining = numberOfDestinationFrames;

	while (remaining) {
	unsigned framesThisTime = std::min<unsigned>(remaining, AudioUtilities::renderQuantumSize);
	resampler.process(destination, framesThisTime);

	destination += framesThisTime;
	remaining -= framesThisTime;
	}
	}

	void SincResampler::process(float* destination, size_t framesToProcess)
	{
	unsigned numberOfDestinationFrames = framesToProcess;

	// Step (1)
	// Prime the input buffer at the start of the input stream.
	if (!m_isBufferPrimed) {
	m_provideInput(m_r0, m_requestFrames);
	m_isBufferPrimed = true;
	}

	// Step (2)

	while (numberOfDestinationFrames) {
	while (m_virtualSourceIndex < m_blockSize) {
	// m_virtualSourceIndex lies in between two kernel offsets so figure out what they are.
	int sourceIndexI = static_cast<int>(m_virtualSourceIndex);
	double subsampleRemainder = m_virtualSourceIndex - sourceIndexI;

	double virtualOffsetIndex = subsampleRemainder * numberOfKernelOffsets;
	int offsetIndex = static_cast<int>(virtualOffsetIndex);

	float* k1 = m_kernelStorage.data() + offsetIndex * kernelSize;
	float* k2 = k1 + kernelSize;

	// Ensure \|k1\|, \|k2\| are 16-byte aligned for SIMD usage. Should always be true so long as kernelSize is a multiple of 16.
	ASSERT(!(reinterpret_cast<uintptr_t>(k1) & 0x0F));
	ASSERT(!(reinterpret_cast<uintptr_t>(k2) & 0x0F));

	// Initialize input pointer based on quantized m_virtualSourceIndex.
	float* inputP = m_r1 + sourceIndexI;

	// Figure out how much to weight each kernel's "convolution".
	double kernelInterpolationFactor = virtualOffsetIndex - offsetIndex;

	*destination++ = convolve(inputP, k1, k2, kernelInterpolationFactor);

	// Advance the virtual index.
	m_virtualSourceIndex += m_scaleFactor;

	--numberOfDestinationFrames;
	if (!numberOfDestinationFrames)
	return;
	}

	// Wrap back around to the start.
	ASSERT(m_virtualSourceIndex >= m_blockSize);
	m_virtualSourceIndex -= m_blockSize;

	// Step (3) Copy r3 to r1.
	// This wraps the last input frames back to the start of the buffer.
	memcpy(m_r1, m_r3, sizeof(float) * kernelSize);

	// Step (4) -- Reinitialize regions if necessary.
	if (m_r0 == m_r2)
	updateRegions(true);

	// Step (5)
	// Refresh the buffer with more input.
	m_provideInput(m_r0, m_requestFrames);
	}
	}

	float SincResampler::convolve(const float* inputP, const float* k1, const float* k2, float kernelInterpolationFactor)
	{
	#if USE(ACCELERATE)
	float sum1;
	float sum2;
	vDSP_dotpr(inputP, 1, k1, 1, &sum1, kernelSize);
	vDSP_dotpr(inputP, 1, k2, 1, &sum2, kernelSize);

	// Linearly interpolate the two "convolutions".
	return (1.0f - kernelInterpolationFactor) * sum1 + kernelInterpolationFactor * sum2;
	#elif CPU(X86_SSE2)
	__m128 m_input;
	__m128 m_sums1 = _mm_setzero_ps();
	__m128 m_sums2 = _mm_setzero_ps();

	// Based on \|inputP\| alignment, we need to use loadu or load.
	if (reinterpret_cast<uintptr_t>(inputP) & 0x0F) {
	for (unsigned i = 0; i < kernelSize; i += 4) {
	m_input = _mm_loadu_ps(inputP + i);
	m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i)));
	m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i)));
	}
	} else {
	for (unsigned i = 0; i < kernelSize; i += 4) {
	m_input = _mm_load_ps(inputP + i);
	m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i)));
	m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i)));
	}
	}

	// Linearly interpolate the two "convolutions".
	m_sums1 = _mm_mul_ps(m_sums1, _mm_set_ps1(1.0f - kernelInterpolationFactor));
	m_sums2 = _mm_mul_ps(m_sums2, _mm_set_ps1(kernelInterpolationFactor));
	m_sums1 = _mm_add_ps(m_sums1, m_sums2);

	// Sum components together.
	float result;
	m_sums2 = _mm_add_ps(_mm_movehl_ps(m_sums1, m_sums1), m_sums1);
	_mm_store_ss(&result, _mm_add_ss(m_sums2, _mm_shuffle_ps(m_sums2, m_sums2, 1)));

	return result;
	#elif HAVE(ARM_NEON_INTRINSICS)
	float32x4_t m_input;
	float32x4_t m_sums1 = vmovq_n_f32(0);
	float32x4_t m_sums2 = vmovq_n_f32(0);

	const float* upper = inputP + kernelSize;
	for (; inputP < upper; ) {
	m_input = vld1q_f32(inputP);
	inputP += 4;
	m_sums1 = vmlaq_f32(m_sums1, m_input, vld1q_f32(k1));
	k1 += 4;
	m_sums2 = vmlaq_f32(m_sums2, m_input, vld1q_f32(k2));
	k2 += 4;
	}

	// Linearly interpolate the two "convolutions".
	m_sums1 = vmlaq_f32(vmulq_f32(m_sums1, vmovq_n_f32(1.0 - kernelInterpolationFactor)), m_sums2, vmovq_n_f32(kernelInterpolationFactor));

	// Sum components together.
	float32x2_t m_half = vadd_f32(vget_high_f32(m_sums1), vget_low_f32(m_sums1));
	return vget_lane_f32(vpadd_f32(m_half, m_half), 0);
	#else
	float sum1 = 0;
	float sum2 = 0;

	// Generate a single output sample.
	int n = kernelSize;
	while (n--) {
	sum1 += inputP *k1++;
	sum2 += inputP++ *k2++;
	}

	// Linearly interpolate the two "convolutions".
	return (1.0f - kernelInterpolationFactor) * sum1 + kernelInterpolationFactor * sum2;
	#endif
	}

	} // namespace WebCore

	#endif // ENABLE(WEB_AUDIO)