| /* |
| * Copyright (C) 2004, 2005, 2006, 2007 Nikolas Zimmermann <zimmermann@kde.org> |
| * Copyright (C) 2004, 2005 Rob Buis <buis@kde.org> |
| * Copyright (C) 2005 Eric Seidel <eric@webkit.org> |
| * Copyright (C) 2009 Dirk Schulze <krit@webkit.org> |
| * Copyright (C) 2010 Igalia, S.L. |
| * Copyright (C) Research In Motion Limited 2010. All rights reserved. |
| * Copyright (C) 2015-2021 Apple, Inc. All rights reserved. |
| * |
| * This library is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Library General Public |
| * License as published by the Free Software Foundation; either |
| * version 2 of the License, or (at your option) any later version. |
| * |
| * This library is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Library General Public License for more details. |
| * |
| * You should have received a copy of the GNU Library General Public License |
| * along with this library; see the file COPYING.LIB. If not, write to |
| * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| * Boston, MA 02110-1301, USA. |
| */ |
| |
| #include "config.h" |
| #include "FEGaussianBlurSoftwareApplier.h" |
| |
| #include "FEGaussianBlur.h" |
| #include "GraphicsContext.h" |
| #include "PixelBuffer.h" |
| #include <wtf/MathExtras.h> |
| |
| #if USE(ACCELERATE) |
| #include <Accelerate/Accelerate.h> |
| #else |
| #include <JavaScriptCore/TypedArrayInlines.h> |
| #include <wtf/ParallelJobs.h> |
| #endif |
| |
| namespace WebCore { |
| |
| inline void FEGaussianBlurSoftwareApplier::kernelPosition(int blurIteration, unsigned& radius, int& deltaLeft, int& deltaRight) |
| { |
| // Check http://www.w3.org/TR/SVG/filters.html#feGaussianBlurElement for details. |
| switch (blurIteration) { |
| case 0: |
| if (!(radius % 2)) { |
| deltaLeft = radius / 2 - 1; |
| deltaRight = radius - deltaLeft; |
| } else { |
| deltaLeft = radius / 2; |
| deltaRight = radius - deltaLeft; |
| } |
| break; |
| case 1: |
| if (!(radius % 2)) { |
| deltaLeft++; |
| deltaRight--; |
| } |
| break; |
| case 2: |
| if (!(radius % 2)) { |
| deltaRight++; |
| radius++; |
| } |
| break; |
| } |
| } |
| |
| // This function only operates on Alpha channel. |
| inline void FEGaussianBlurSoftwareApplier::boxBlurAlphaOnly(const Uint8ClampedArray& srcPixelArray, Uint8ClampedArray& dstPixelArray, unsigned dx, int& dxLeft, int& dxRight, int& stride, int& strideLine, int& effectWidth, int& effectHeight, const int& maxKernelSize) |
| { |
| const uint8_t* srcData = srcPixelArray.data(); |
| uint8_t* dstData = dstPixelArray.data(); |
| // Memory alignment is: RGBA, zero-index based. |
| const int channel = 3; |
| |
| for (int y = 0; y < effectHeight; ++y) { |
| int line = y * strideLine; |
| int sum = 0; |
| |
| // Fill the kernel. |
| for (int i = 0; i < maxKernelSize; ++i) { |
| unsigned offset = line + i * stride; |
| const uint8_t* srcPtr = srcData + offset; |
| sum += srcPtr[channel]; |
| } |
| |
| // Blurring. |
| for (int x = 0; x < effectWidth; ++x) { |
| unsigned pixelByteOffset = line + x * stride + channel; |
| uint8_t* dstPtr = dstData + pixelByteOffset; |
| *dstPtr = static_cast<uint8_t>(sum / dx); |
| |
| // Shift kernel. |
| if (x >= dxLeft) { |
| unsigned leftOffset = pixelByteOffset - dxLeft * stride; |
| const uint8_t* srcPtr = srcData + leftOffset; |
| sum -= *srcPtr; |
| } |
| |
| if (x + dxRight < effectWidth) { |
| unsigned rightOffset = pixelByteOffset + dxRight * stride; |
| const uint8_t* srcPtr = srcData + rightOffset; |
| sum += *srcPtr; |
| } |
| } |
| } |
| } |
| |
| inline void FEGaussianBlurSoftwareApplier::boxBlur(const Uint8ClampedArray& srcPixelArray, Uint8ClampedArray& dstPixelArray, unsigned dx, int dxLeft, int dxRight, int stride, int strideLine, int effectWidth, int effectHeight, bool alphaImage, EdgeModeType edgeMode) |
| { |
| const int maxKernelSize = std::min(dxRight, effectWidth); |
| if (alphaImage) |
| return boxBlurAlphaOnly(srcPixelArray, dstPixelArray, dx, dxLeft, dxRight, stride, strideLine, effectWidth, effectHeight, maxKernelSize); |
| |
| const uint8_t* srcData = srcPixelArray.data(); |
| uint8_t* dstData = dstPixelArray.data(); |
| |
| // Concerning the array width/length: it is Element size + Margin + Border. The number of pixels will be |
| // P = width * height * channels. |
| for (int y = 0; y < effectHeight; ++y) { |
| int line = y * strideLine; |
| int sumR = 0, sumG = 0, sumB = 0, sumA = 0; |
| |
| if (edgeMode == EdgeModeType::None) { |
| // Fill the kernel. |
| for (int i = 0; i < maxKernelSize; ++i) { |
| unsigned offset = line + i * stride; |
| const uint8_t* srcPtr = srcData + offset; |
| sumR += *srcPtr++; |
| sumG += *srcPtr++; |
| sumB += *srcPtr++; |
| sumA += *srcPtr; |
| } |
| |
| // Blurring. |
| for (int x = 0; x < effectWidth; ++x) { |
| unsigned pixelByteOffset = line + x * stride; |
| uint8_t* dstPtr = dstData + pixelByteOffset; |
| |
| *dstPtr++ = static_cast<uint8_t>(sumR / dx); |
| *dstPtr++ = static_cast<uint8_t>(sumG / dx); |
| *dstPtr++ = static_cast<uint8_t>(sumB / dx); |
| *dstPtr = static_cast<uint8_t>(sumA / dx); |
| |
| // Shift kernel. |
| if (x >= dxLeft) { |
| unsigned leftOffset = pixelByteOffset - dxLeft * stride; |
| const uint8_t* srcPtr = srcData + leftOffset; |
| sumR -= srcPtr[0]; |
| sumG -= srcPtr[1]; |
| sumB -= srcPtr[2]; |
| sumA -= srcPtr[3]; |
| } |
| |
| if (x + dxRight < effectWidth) { |
| unsigned rightOffset = pixelByteOffset + dxRight * stride; |
| const uint8_t* srcPtr = srcData + rightOffset; |
| sumR += srcPtr[0]; |
| sumG += srcPtr[1]; |
| sumB += srcPtr[2]; |
| sumA += srcPtr[3]; |
| } |
| } |
| |
| } else { |
| // FIXME: Add support for 'wrap' here. |
| // Get edge values for edgeMode 'duplicate'. |
| const uint8_t* edgeValueLeft = srcData + line; |
| const uint8_t* edgeValueRight = srcData + (line + (effectWidth - 1) * stride); |
| |
| // Fill the kernel. |
| for (int i = dxLeft * -1; i < dxRight; ++i) { |
| // Is this right for negative values of 'i'? |
| unsigned offset = line + i * stride; |
| const uint8_t* srcPtr = srcData + offset; |
| |
| if (i < 0) { |
| sumR += edgeValueLeft[0]; |
| sumG += edgeValueLeft[1]; |
| sumB += edgeValueLeft[2]; |
| sumA += edgeValueLeft[3]; |
| } else if (i >= effectWidth) { |
| sumR += edgeValueRight[0]; |
| sumG += edgeValueRight[1]; |
| sumB += edgeValueRight[2]; |
| sumA += edgeValueRight[3]; |
| } else { |
| sumR += *srcPtr++; |
| sumG += *srcPtr++; |
| sumB += *srcPtr++; |
| sumA += *srcPtr; |
| } |
| } |
| |
| // Blurring. |
| for (int x = 0; x < effectWidth; ++x) { |
| unsigned pixelByteOffset = line + x * stride; |
| uint8_t* dstPtr = dstData + pixelByteOffset; |
| |
| *dstPtr++ = static_cast<uint8_t>(sumR / dx); |
| *dstPtr++ = static_cast<uint8_t>(sumG / dx); |
| *dstPtr++ = static_cast<uint8_t>(sumB / dx); |
| *dstPtr = static_cast<uint8_t>(sumA / dx); |
| |
| // Shift kernel. |
| if (x < dxLeft) { |
| sumR -= edgeValueLeft[0]; |
| sumG -= edgeValueLeft[1]; |
| sumB -= edgeValueLeft[2]; |
| sumA -= edgeValueLeft[3]; |
| } else { |
| unsigned leftOffset = pixelByteOffset - dxLeft * stride; |
| const uint8_t* srcPtr = srcData + leftOffset; |
| sumR -= srcPtr[0]; |
| sumG -= srcPtr[1]; |
| sumB -= srcPtr[2]; |
| sumA -= srcPtr[3]; |
| } |
| |
| if (x + dxRight >= effectWidth) { |
| sumR += edgeValueRight[0]; |
| sumG += edgeValueRight[1]; |
| sumB += edgeValueRight[2]; |
| sumA += edgeValueRight[3]; |
| } else { |
| unsigned rightOffset = pixelByteOffset + dxRight * stride; |
| const uint8_t* srcPtr = srcData + rightOffset; |
| sumR += srcPtr[0]; |
| sumG += srcPtr[1]; |
| sumB += srcPtr[2]; |
| sumA += srcPtr[3]; |
| } |
| } |
| } |
| } |
| } |
| |
| #if USE(ACCELERATE) |
| inline void FEGaussianBlurSoftwareApplier::boxBlurAccelerated(Uint8ClampedArray& ioBuffer, Uint8ClampedArray& tempBuffer, unsigned kernelSize, int stride, int effectWidth, int effectHeight) |
| { |
| if (!ioBuffer.data() || !tempBuffer.data()) { |
| ASSERT_NOT_REACHED(); |
| return; |
| } |
| |
| if (effectWidth <= 0 || effectHeight <= 0 || stride <= 0) { |
| ASSERT_NOT_REACHED(); |
| return; |
| } |
| |
| // We must always use an odd radius. |
| if (kernelSize % 2 != 1) |
| kernelSize += 1; |
| |
| vImage_Buffer effectInBuffer; |
| effectInBuffer.data = static_cast<void*>(ioBuffer.data()); |
| effectInBuffer.width = effectWidth; |
| effectInBuffer.height = effectHeight; |
| effectInBuffer.rowBytes = stride; |
| |
| vImage_Buffer effectOutBuffer; |
| effectOutBuffer.data = tempBuffer.data(); |
| effectOutBuffer.width = effectWidth; |
| effectOutBuffer.height = effectHeight; |
| effectOutBuffer.rowBytes = stride; |
| |
| // Determine the size of a temporary buffer by calling the function first with a special flag. vImage will return |
| // the size needed, or an error (which are all negative). |
| size_t tmpBufferSize = vImageBoxConvolve_ARGB8888(&effectInBuffer, &effectOutBuffer, 0, 0, 0, kernelSize, kernelSize, 0, kvImageEdgeExtend | kvImageGetTempBufferSize); |
| if (tmpBufferSize <= 0) |
| return; |
| |
| void* tmpBuffer = fastMalloc(tmpBufferSize); |
| vImageBoxConvolve_ARGB8888(&effectInBuffer, &effectOutBuffer, tmpBuffer, 0, 0, kernelSize, kernelSize, 0, kvImageEdgeExtend); |
| vImageBoxConvolve_ARGB8888(&effectOutBuffer, &effectInBuffer, tmpBuffer, 0, 0, kernelSize, kernelSize, 0, kvImageEdgeExtend); |
| vImageBoxConvolve_ARGB8888(&effectInBuffer, &effectOutBuffer, tmpBuffer, 0, 0, kernelSize, kernelSize, 0, kvImageEdgeExtend); |
| fastFree(tmpBuffer); |
| |
| // The final result should be stored in ioBuffer. |
| ASSERT(ioBuffer.length() == tempBuffer.length()); |
| memcpy(ioBuffer.data(), tempBuffer.data(), ioBuffer.length()); |
| } |
| #endif |
| |
| inline void FEGaussianBlurSoftwareApplier::boxBlurUnaccelerated(Uint8ClampedArray& ioBuffer, Uint8ClampedArray& tempBuffer, unsigned kernelSizeX, unsigned kernelSizeY, int stride, IntSize& paintSize, bool isAlphaImage, EdgeModeType edgeMode) |
| { |
| int dxLeft = 0; |
| int dxRight = 0; |
| int dyLeft = 0; |
| int dyRight = 0; |
| |
| Uint8ClampedArray* fromBuffer = &ioBuffer; |
| Uint8ClampedArray* toBuffer = &tempBuffer; |
| |
| for (int i = 0; i < 3; ++i) { |
| if (kernelSizeX) { |
| kernelPosition(i, kernelSizeX, dxLeft, dxRight); |
| #if HAVE(ARM_NEON_INTRINSICS) |
| if (!isAlphaImage) |
| boxBlurNEON(*fromBuffer, *toBuffer, kernelSizeX, dxLeft, dxRight, 4, stride, paintSize.width(), paintSize.height()); |
| else |
| boxBlur(*fromBuffer, *toBuffer, kernelSizeX, dxLeft, dxRight, 4, stride, paintSize.width(), paintSize.height(), true, edgeMode); |
| #else |
| boxBlur(*fromBuffer, *toBuffer, kernelSizeX, dxLeft, dxRight, 4, stride, paintSize.width(), paintSize.height(), isAlphaImage, edgeMode); |
| #endif |
| std::swap(fromBuffer, toBuffer); |
| } |
| |
| if (kernelSizeY) { |
| kernelPosition(i, kernelSizeY, dyLeft, dyRight); |
| #if HAVE(ARM_NEON_INTRINSICS) |
| if (!isAlphaImage) |
| boxBlurNEON(*fromBuffer, *toBuffer, kernelSizeY, dyLeft, dyRight, stride, 4, paintSize.height(), paintSize.width()); |
| else |
| boxBlur(*fromBuffer, *toBuffer, kernelSizeY, dyLeft, dyRight, stride, 4, paintSize.height(), paintSize.width(), true, edgeMode); |
| #else |
| boxBlur(*fromBuffer, *toBuffer, kernelSizeY, dyLeft, dyRight, stride, 4, paintSize.height(), paintSize.width(), isAlphaImage, edgeMode); |
| #endif |
| std::swap(fromBuffer, toBuffer); |
| } |
| } |
| |
| // The final result should be stored in ioBuffer. |
| if (&ioBuffer != fromBuffer) { |
| ASSERT(ioBuffer.length() == fromBuffer->length()); |
| memcpy(ioBuffer.data(), fromBuffer->data(), ioBuffer.length()); |
| } |
| } |
| |
| inline void FEGaussianBlurSoftwareApplier::boxBlurGeneric(Uint8ClampedArray& ioBuffer, Uint8ClampedArray& tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize, bool isAlphaImage, EdgeModeType edgeMode) |
| { |
| int stride = 4 * paintSize.width(); |
| |
| #if USE(ACCELERATE) |
| if (kernelSizeX == kernelSizeY && (edgeMode == EdgeModeType::None || edgeMode == EdgeModeType::Duplicate)) { |
| boxBlurAccelerated(ioBuffer, tmpPixelArray, kernelSizeX, stride, paintSize.width(), paintSize.height()); |
| return; |
| } |
| #endif |
| |
| boxBlurUnaccelerated(ioBuffer, tmpPixelArray, kernelSizeX, kernelSizeY, stride, paintSize, isAlphaImage, edgeMode); |
| } |
| |
| #if !USE(ACCELERATE) |
| inline void FEGaussianBlurSoftwareApplier::boxBlurWorker(ApplyParameters* parameters) |
| { |
| IntSize paintSize(parameters->width, parameters->height); |
| boxBlurGeneric(*parameters->ioPixelArray, *parameters->tmpPixelArray, parameters->kernelSizeX, parameters->kernelSizeY, paintSize, parameters->isAlphaImage, parameters->edgeMode); |
| } |
| #endif |
| |
| inline void FEGaussianBlurSoftwareApplier::applyPlatform(Uint8ClampedArray& ioBuffer, Uint8ClampedArray& tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize, bool isAlphaImage, EdgeModeType edgeMode) |
| { |
| #if !USE(ACCELERATE) |
| int scanline = 4 * paintSize.width(); |
| int extraHeight = 3 * kernelSizeY * 0.5f; |
| |
| static constexpr int minimalRectDimension = 100 * 100; // Empirical data limit for parallel jobs |
| int optimalThreadNumber = (paintSize.width() * paintSize.height()) / (minimalRectDimension + extraHeight * paintSize.width()); |
| |
| if (optimalThreadNumber > 1) { |
| ParallelJobs<ApplyParameters> parallelJobs(&boxBlurWorker, optimalThreadNumber); |
| |
| int jobs = parallelJobs.numberOfJobs(); |
| if (jobs > 1) { |
| // Split the job into "blockHeight"-sized jobs but there a few jobs that need to be slightly larger since |
| // blockHeight * jobs < total size. These extras are handled by the remainder "jobsWithExtra". |
| const int blockHeight = paintSize.height() / jobs; |
| const int jobsWithExtra = paintSize.height() % jobs; |
| |
| int currentY = 0; |
| for (int job = 0; job < jobs; job++) { |
| ApplyParameters& params = parallelJobs.parameter(job); |
| |
| int startY = !job ? 0 : currentY - extraHeight; |
| currentY += job < jobsWithExtra ? blockHeight + 1 : blockHeight; |
| int endY = job == jobs - 1 ? currentY : currentY + extraHeight; |
| |
| int blockSize = (endY - startY) * scanline; |
| if (!job) { |
| params.ioPixelArray = &ioBuffer; |
| params.tmpPixelArray = &tmpPixelArray; |
| } else { |
| params.ioPixelArray = Uint8ClampedArray::createUninitialized(blockSize); |
| params.tmpPixelArray = Uint8ClampedArray::createUninitialized(blockSize); |
| memcpy(params.ioPixelArray->data(), ioBuffer.data() + startY * scanline, blockSize); |
| } |
| |
| params.width = paintSize.width(); |
| params.height = endY - startY; |
| params.kernelSizeX = kernelSizeX; |
| params.kernelSizeY = kernelSizeY; |
| params.isAlphaImage = isAlphaImage; |
| params.edgeMode = edgeMode; |
| } |
| |
| parallelJobs.execute(); |
| |
| // Copy together the parts of the image. |
| currentY = 0; |
| for (int job = 1; job < jobs; job++) { |
| ApplyParameters& params = parallelJobs.parameter(job); |
| int sourceOffset; |
| int destinationOffset; |
| int size; |
| int adjustedBlockHeight = job < jobsWithExtra ? blockHeight + 1 : blockHeight; |
| |
| currentY += adjustedBlockHeight; |
| sourceOffset = extraHeight * scanline; |
| destinationOffset = currentY * scanline; |
| size = adjustedBlockHeight * scanline; |
| |
| memcpy(ioBuffer.data() + destinationOffset, params.ioPixelArray->data() + sourceOffset, size); |
| } |
| return; |
| } |
| // Fallback to single threaded mode. |
| } |
| #endif |
| |
| // The selection here eventually should happen dynamically on some platforms. |
| boxBlurGeneric(ioBuffer, tmpPixelArray, kernelSizeX, kernelSizeY, paintSize, isAlphaImage, edgeMode); |
| } |
| |
| bool FEGaussianBlurSoftwareApplier::apply(const Filter& filter, const FilterImageVector& inputs, FilterImage& result) const |
| { |
| auto& input = inputs[0].get(); |
| |
| auto destinationPixelBuffer = result.pixelBuffer(AlphaPremultiplication::Premultiplied); |
| if (!destinationPixelBuffer) |
| return false; |
| |
| auto effectDrawingRect = result.absoluteImageRectRelativeTo(input); |
| input.copyPixelBuffer(*destinationPixelBuffer, effectDrawingRect); |
| if (!m_effect.stdDeviationX() && !m_effect.stdDeviationY()) |
| return true; |
| |
| auto kernelSize = m_effect.calculateKernelSize(filter, { m_effect.stdDeviationX(), m_effect.stdDeviationY() }); |
| |
| IntSize paintSize = result.absoluteImageRect().size(); |
| auto tmpImageData = Uint8ClampedArray::tryCreateUninitialized(paintSize.area() * 4); |
| if (!tmpImageData) |
| return false; |
| |
| auto& destinationPixelArray = destinationPixelBuffer->data(); |
| applyPlatform(destinationPixelArray, *tmpImageData, kernelSize.width(), kernelSize.height(), paintSize, result.isAlphaImage(), m_effect.edgeMode()); |
| return true; |
| } |
| |
| } // namespace WebCore |