| // GENERATED FILE - DO NOT EDIT. |
| // Generated by gen_mtl_internal_shaders.py |
| // |
| // Copyright 2020 The ANGLE Project Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| // |
| |
| // Metal version of combined Metal default shaders. |
| |
| # 1 "temp_master_source.metal" |
| # 1 "<built-in>" 1 |
| # 1 "<built-in>" 3 |
| # 396 "<built-in>" 3 |
| # 1 "<command line>" 1 |
| # 1 "<built-in>" 2 |
| # 1 "temp_master_source.metal" 2 |
| # 1 "./blit.metal" 1 |
| |
| |
| |
| |
| |
| |
| |
| # 1 "./common.h" 1 |
| # 13 "./common.h" |
| # include <simd/simd.h> |
| # include <metal_stdlib> |
| |
| |
| # 1 "./constants.h" 1 |
| # 11 "./constants.h" |
| namespace rx |
| { |
| namespace mtl_shader |
| { |
| |
| enum |
| { |
| kTextureType2D = 0, |
| kTextureType2DMultisample = 1, |
| kTextureType2DArray = 2, |
| kTextureTypeCube = 3, |
| kTextureType3D = 4, |
| kTextureTypeCount = 5, |
| }; |
| |
| |
| |
| |
| } |
| } |
| # 18 "./common.h" 2 |
| |
| |
| |
| |
| |
| |
| |
| using namespace metal; |
| |
| |
| |
| constant uint32_t kNumColorOutputs [[function_constant(0)]]; |
| constant bool kColorOutputAvailable0 = kNumColorOutputs > 0; |
| constant bool kColorOutputAvailable1 = kNumColorOutputs > 1; |
| constant bool kColorOutputAvailable2 = kNumColorOutputs > 2; |
| constant bool kColorOutputAvailable3 = kNumColorOutputs > 3; |
| |
| namespace rx |
| { |
| namespace mtl_shader |
| { |
| |
| |
| constant float2 gCorners[3] = {float2(-1.0f, -1.0f), float2(3.0f, -1.0f), float2(-1.0f, 3.0f)}; |
| |
| template <typename T> |
| struct MultipleColorOutputs |
| { |
| vec<T, 4> color0 [[color(0), function_constant(kColorOutputAvailable0)]]; |
| vec<T, 4> color1 [[color(1), function_constant(kColorOutputAvailable1)]]; |
| vec<T, 4> color2 [[color(2), function_constant(kColorOutputAvailable2)]]; |
| vec<T, 4> color3 [[color(3), function_constant(kColorOutputAvailable3)]]; |
| }; |
| # 61 "./common.h" |
| template <typename T> |
| static inline MultipleColorOutputs<T> toMultipleColorOutputs(vec<T, 4> color) |
| { |
| MultipleColorOutputs<T> re; |
| |
| do { if (kColorOutputAvailable0) { re.color0 = color; } } while (0); |
| do { if (kColorOutputAvailable1) { re.color1 = color; } } while (0); |
| do { if (kColorOutputAvailable2) { re.color2 = color; } } while (0); |
| do { if (kColorOutputAvailable3) { re.color3 = color; } } while (0); |
| |
| return re; |
| } |
| |
| static inline float3 cubeTexcoords(float2 texcoords, int face) |
| { |
| texcoords = 2.0 * texcoords - 1.0; |
| switch (face) |
| { |
| case 0: |
| return float3(1.0, -texcoords.y, -texcoords.x); |
| case 1: |
| return float3(-1.0, -texcoords.y, texcoords.x); |
| case 2: |
| return float3(texcoords.x, 1.0, texcoords.y); |
| case 3: |
| return float3(texcoords.x, -1.0, -texcoords.y); |
| case 4: |
| return float3(texcoords.x, -texcoords.y, 1.0); |
| case 5: |
| return float3(-texcoords.x, -texcoords.y, -1.0); |
| } |
| return float3(texcoords, 0); |
| } |
| |
| template <typename T> |
| static inline vec<T, 4> resolveTextureMS(texture2d_ms<T> srcTexture, uint2 coords) |
| { |
| uint samples = srcTexture.get_num_samples(); |
| |
| vec<T, 4> output(0); |
| |
| for (uint sample = 0; sample < samples; ++sample) |
| { |
| output += srcTexture.read(coords, sample); |
| } |
| |
| output = output / samples; |
| |
| return output; |
| } |
| |
| static inline float4 sRGBtoLinear(float4 color) |
| { |
| float3 linear1 = color.rgb / 12.92; |
| float3 linear2 = pow((color.rgb + float3(0.055)) / 1.055, 2.4); |
| float3 factor = float3(color.rgb <= float3(0.04045)); |
| float4 linear = float4(factor * linear1 + float3(1.0 - factor) * linear2, color.a); |
| |
| return linear; |
| } |
| |
| static inline float linearToSRGB(float color) |
| { |
| if (color <= 0.0f) |
| return 0.0f; |
| else if (color < 0.0031308f) |
| return 12.92f * color; |
| else if (color < 1.0f) |
| return 1.055f * pow(color, 0.41666f) - 0.055f; |
| else |
| return 1.0f; |
| } |
| |
| static inline float4 linearToSRGB(float4 color) |
| { |
| return float4(linearToSRGB(color.r), linearToSRGB(color.g), linearToSRGB(color.b), color.a); |
| } |
| |
| template <typename Short> |
| static inline Short bytesToShort(constant uchar *input, uint offset) |
| { |
| Short inputLo = input[offset]; |
| Short inputHi = input[offset + 1]; |
| |
| return inputLo | (inputHi << 8); |
| } |
| |
| template <typename Int> |
| static inline Int bytesToInt(constant uchar *input, uint offset) |
| { |
| Int input0 = input[offset]; |
| Int input1 = input[offset + 1]; |
| Int input2 = input[offset + 2]; |
| Int input3 = input[offset + 3]; |
| |
| return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24); |
| } |
| |
| template <typename Short> |
| static inline void shortToBytes(Short val, uint offset, device uchar *output) |
| { |
| ushort valUnsigned = as_type<ushort>(val); |
| output[offset] = valUnsigned & 0xff; |
| output[offset + 1] = (valUnsigned >> 8) & 0xff; |
| } |
| |
| template <typename Int> |
| static inline void intToBytes(Int val, uint offset, device uchar *output) |
| { |
| uint valUnsigned = as_type<uint>(val); |
| output[offset] = valUnsigned & 0xff; |
| output[offset + 1] = (valUnsigned >> 8) & 0xff; |
| output[offset + 2] = (valUnsigned >> 16) & 0xff; |
| output[offset + 3] = (valUnsigned >> 24) & 0xff; |
| } |
| |
| static inline void floatToBytes(float val, uint offset, device uchar *output) |
| { |
| intToBytes(as_type<uint>(val), offset, output); |
| } |
| |
| static inline void int24bitToBytes(uint val, uint offset, device uchar *output) |
| { |
| output[offset] = val & 0xff; |
| output[offset + 1] = (val >> 8) & 0xff; |
| output[offset + 2] = (val >> 16) & 0xff; |
| } |
| |
| template <unsigned int inputBitCount, unsigned int inputBitStart, typename T> |
| static inline T getShiftedData(T input) |
| { |
| static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8), |
| "T must have at least as many bits as inputBitCount + inputBitStart."); |
| const T mask = (1 << inputBitCount) - 1; |
| return (input >> inputBitStart) & mask; |
| } |
| |
| template <unsigned int inputBitCount, unsigned int inputBitStart, typename T> |
| static inline T shiftData(T input) |
| { |
| static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8), |
| "T must have at least as many bits as inputBitCount + inputBitStart."); |
| const T mask = (1 << inputBitCount) - 1; |
| return (input & mask) << inputBitStart; |
| } |
| |
| template <unsigned int inputBitCount, typename T> |
| static inline float normalizedToFloat(T input) |
| { |
| static_assert(inputBitCount <= (sizeof(T) * 8), |
| "T must have more bits than or same bits as inputBitCount."); |
| static_assert(inputBitCount <= 23, "Only single precision is supported"); |
| |
| constexpr float inverseMax = 1.0f / ((1 << inputBitCount) - 1); |
| return input * inverseMax; |
| } |
| |
| template <typename T> |
| static inline float normalizedToFloat(T input) |
| { |
| return normalizedToFloat<sizeof(T) * 8, T>(input); |
| } |
| |
| template <> |
| inline float normalizedToFloat(short input) |
| { |
| constexpr float inverseMax = 1.0f / 0x7fff; |
| return static_cast<float>(input) * inverseMax; |
| } |
| |
| template <> |
| inline float normalizedToFloat(int input) |
| { |
| constexpr float inverseMax = 1.0f / 0x7fffffff; |
| return static_cast<float>(input) * inverseMax; |
| } |
| |
| template <> |
| inline float normalizedToFloat(uint input) |
| { |
| constexpr float inverseMax = 1.0f / 0xffffffff; |
| return static_cast<float>(input) * inverseMax; |
| } |
| |
| template <unsigned int outputBitCount, typename T> |
| static inline T floatToNormalized(float input) |
| { |
| static_assert(outputBitCount <= (sizeof(T) * 8), |
| "T must have more bits than or same bits as inputBitCount."); |
| static_assert(outputBitCount <= 23, "Only single precision is supported"); |
| |
| return static_cast<T>(((1 << outputBitCount) - 1) * input + 0.5f); |
| } |
| |
| template <typename T> |
| static inline T floatToNormalized(float input) |
| { |
| return floatToNormalized<sizeof(T) * 8, T>(input); |
| } |
| |
| } |
| } |
| # 9 "./blit.metal" 2 |
| |
| using namespace rx::mtl_shader; |
| |
| |
| constant bool kPremultiplyAlpha [[function_constant(1)]]; |
| constant bool kUnmultiplyAlpha [[function_constant(2)]]; |
| constant int kSourceTextureType [[function_constant(3)]]; |
| constant int kSourceTexture2Type [[function_constant(4)]]; |
| |
| constant bool kSourceTextureType2D = kSourceTextureType == kTextureType2D; |
| constant bool kSourceTextureType2DArray = kSourceTextureType == kTextureType2DArray; |
| constant bool kSourceTextureType2DMS = kSourceTextureType == kTextureType2DMultisample; |
| constant bool kSourceTextureTypeCube = kSourceTextureType == kTextureTypeCube; |
| constant bool kSourceTextureType3D = kSourceTextureType == kTextureType3D; |
| |
| constant bool kSourceTexture2Type2D = kSourceTexture2Type == kTextureType2D; |
| constant bool kSourceTexture2Type2DArray = kSourceTexture2Type == kTextureType2DArray; |
| constant bool kSourceTexture2Type2DMS = kSourceTexture2Type == kTextureType2DMultisample; |
| constant bool kSourceTexture2TypeCube = kSourceTexture2Type == kTextureTypeCube; |
| |
| struct BlitParams |
| { |
| |
| float2 srcTexCoords[3]; |
| int srcLevel; |
| int srcLayer; |
| bool dstFlipViewportX; |
| bool dstFlipViewportY; |
| bool dstLuminance; |
| uint8_t padding[13]; |
| }; |
| |
| struct BlitVSOut |
| { |
| float4 position [[position]]; |
| float2 texCoords [[user(locn1)]]; |
| }; |
| |
| vertex BlitVSOut blitVS(unsigned int vid [[vertex_id]], constant BlitParams &options [[buffer(0)]]) |
| { |
| BlitVSOut output; |
| output.position = float4(gCorners[vid], 0.0, 1.0); |
| output.texCoords = options.srcTexCoords[vid]; |
| |
| if (options.dstFlipViewportX) |
| { |
| output.position.x = -output.position.x; |
| } |
| if (!options.dstFlipViewportY) |
| { |
| |
| |
| output.position.y = -output.position.y; |
| } |
| |
| return output; |
| } |
| |
| template <typename SrcTexture2d> |
| static uint2 getImageCoords(SrcTexture2d srcTexture, float2 texCoords) |
| { |
| uint2 dimens(srcTexture.get_width(), srcTexture.get_height()); |
| uint2 coords = uint2(texCoords * float2(dimens)); |
| |
| return coords; |
| } |
| |
| template <typename T> |
| static inline vec<T, 4> blitSampleTextureMS(texture2d_ms<T> srcTexture, float2 texCoords) |
| { |
| uint2 coords = getImageCoords(srcTexture, texCoords); |
| return resolveTextureMS(srcTexture, coords); |
| } |
| |
| template <typename T> |
| static inline vec<T, 4> blitSampleTexture3D(texture3d<T> srcTexture, |
| sampler textureSampler, |
| float2 texCoords, |
| constant BlitParams &options) |
| { |
| uint depth = srcTexture.get_depth(options.srcLevel); |
| float zCoord = (float(options.srcLayer) + 0.5) / float(depth); |
| |
| return srcTexture.sample(textureSampler, float3(texCoords, zCoord), level(options.srcLevel)); |
| } |
| # 112 "./blit.metal" |
| template <typename T> |
| static inline vec<T, 4> blitReadTexture(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) |
| { |
| vec<T, 4> output; |
| |
| switch (kSourceTextureType) |
| { |
| case kTextureType2D: |
| output = srcTexture2d.sample(textureSampler, input.texCoords, level(options.srcLevel)); |
| break; |
| case kTextureType2DArray: |
| output = srcTexture2dArray.sample(textureSampler, input.texCoords, options.srcLayer, |
| level(options.srcLevel)); |
| break; |
| case kTextureType2DMultisample: |
| output = blitSampleTextureMS(srcTexture2dMS, input.texCoords); |
| break; |
| case kTextureTypeCube: |
| output = srcTextureCube.sample(textureSampler, |
| cubeTexcoords(input.texCoords, options.srcLayer), |
| level(options.srcLevel)); |
| break; |
| case kTextureType3D: |
| output = blitSampleTexture3D(srcTexture3d, textureSampler, input.texCoords, options); |
| break; |
| } |
| |
| if (kPremultiplyAlpha) |
| { |
| output.xyz *= output.a; |
| } |
| else if (kUnmultiplyAlpha) |
| { |
| if (output.a != 0.0) |
| { |
| output.xyz /= output.a; |
| } |
| } |
| |
| if (options.dstLuminance) |
| { |
| output.g = output.b = output.r; |
| } |
| |
| return output; |
| } |
| |
| template <typename T> |
| static inline MultipleColorOutputs<T> blitFS(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) |
| { |
| vec<T, 4> output = blitReadTexture(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); |
| |
| return toMultipleColorOutputs(output); |
| } |
| |
| fragment MultipleColorOutputs<float> blitFloatFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) |
| { |
| return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); |
| } |
| fragment MultipleColorOutputs<int> blitIntFS(BlitVSOut input [[stage_in]], texture2d<int> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<int> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<int> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<int> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<int> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) |
| { |
| return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); |
| } |
| fragment MultipleColorOutputs<uint> blitUIntFS(BlitVSOut input [[stage_in]], texture2d<uint> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<uint> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<uint> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<uint> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<uint> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) |
| { |
| return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); |
| } |
| |
| fragment MultipleColorOutputs<uint> copyTextureFloatToUIntFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]]) |
| { |
| float4 inputColor = blitReadTexture<>(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options); |
| uint4 output = uint4(inputColor * float4(255.0)); |
| |
| return toMultipleColorOutputs(output); |
| } |
| |
| |
| struct FragmentDepthOut |
| { |
| float depth [[depth(any)]]; |
| }; |
| |
| static inline float sampleDepth( |
| texture2d<float> srcTexture2d [[function_constant(kSourceTextureType2D)]], |
| texture2d_array<float> srcTexture2dArray [[function_constant(kSourceTextureType2DArray)]], |
| texture2d_ms<float> srcTexture2dMS [[function_constant(kSourceTextureType2DMS)]], |
| texturecube<float> srcTextureCube [[function_constant(kSourceTextureTypeCube)]], |
| float2 texCoords, |
| constant BlitParams &options) |
| { |
| float4 output; |
| |
| constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest); |
| |
| switch (kSourceTextureType) |
| { |
| case kTextureType2D: |
| output = srcTexture2d.sample(textureSampler, texCoords, level(options.srcLevel)); |
| break; |
| case kTextureType2DArray: |
| output = srcTexture2dArray.sample(textureSampler, texCoords, options.srcLayer, |
| level(options.srcLevel)); |
| break; |
| case kTextureType2DMultisample: |
| |
| output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0); |
| break; |
| case kTextureTypeCube: |
| output = |
| srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, options.srcLayer), |
| level(options.srcLevel)); |
| break; |
| } |
| |
| return output.r; |
| } |
| |
| fragment FragmentDepthOut blitDepthFS(BlitVSOut input [[stage_in]], |
| texture2d<float> srcTexture2d |
| [[texture(0), function_constant(kSourceTextureType2D)]], |
| texture2d_array<float> srcTexture2dArray |
| [[texture(0), function_constant(kSourceTextureType2DArray)]], |
| texture2d_ms<float> srcTexture2dMS |
| [[texture(0), function_constant(kSourceTextureType2DMS)]], |
| texturecube<float> srcTextureCube |
| [[texture(0), function_constant(kSourceTextureTypeCube)]], |
| constant BlitParams &options [[buffer(0)]]) |
| { |
| FragmentDepthOut re; |
| |
| re.depth = sampleDepth(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, |
| input.texCoords, options); |
| |
| return re; |
| } |
| |
| static inline uint32_t sampleStencil( |
| texture2d<uint32_t> srcTexture2d [[function_constant(kSourceTexture2Type2D)]], |
| texture2d_array<uint32_t> srcTexture2dArray [[function_constant(kSourceTexture2Type2DArray)]], |
| texture2d_ms<uint32_t> srcTexture2dMS [[function_constant(kSourceTexture2Type2DMS)]], |
| texturecube<uint32_t> srcTextureCube [[function_constant(kSourceTexture2TypeCube)]], |
| float2 texCoords, |
| int srcLevel, |
| int srcLayer) |
| { |
| uint4 output; |
| constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest); |
| |
| switch (kSourceTexture2Type) |
| { |
| case kTextureType2D: |
| output = srcTexture2d.sample(textureSampler, texCoords, level(srcLevel)); |
| break; |
| case kTextureType2DArray: |
| output = srcTexture2dArray.sample(textureSampler, texCoords, srcLayer, level(srcLevel)); |
| break; |
| case kTextureType2DMultisample: |
| |
| output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0); |
| break; |
| case kTextureTypeCube: |
| output = srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, srcLayer), |
| level(srcLevel)); |
| break; |
| } |
| |
| return output.r; |
| } |
| |
| |
| struct BlitStencilToBufferParams |
| { |
| float2 srcStartTexCoords; |
| float2 srcTexCoordSteps; |
| int srcLevel; |
| int srcLayer; |
| |
| uint2 dstSize; |
| uint dstBufferRowPitch; |
| |
| bool resolveMS; |
| }; |
| |
| kernel void blitStencilToBufferCS(ushort2 gIndices [[thread_position_in_grid]], |
| texture2d<uint32_t> srcTexture2d |
| [[texture(1), function_constant(kSourceTexture2Type2D)]], |
| texture2d_array<uint32_t> srcTexture2dArray |
| [[texture(1), function_constant(kSourceTexture2Type2DArray)]], |
| texture2d_ms<uint32_t> srcTexture2dMS |
| [[texture(1), function_constant(kSourceTexture2Type2DMS)]], |
| texturecube<uint32_t> srcTextureCube |
| [[texture(1), function_constant(kSourceTexture2TypeCube)]], |
| constant BlitStencilToBufferParams &options [[buffer(0)]], |
| device uchar *buffer [[buffer(1)]]) |
| { |
| if (gIndices.x >= options.dstSize.x || gIndices.y >= options.dstSize.y) |
| { |
| return; |
| } |
| |
| float2 srcTexCoords = options.srcStartTexCoords + float2(gIndices) * options.srcTexCoordSteps; |
| |
| if (kSourceTexture2Type == kTextureType2DMultisample && !options.resolveMS) |
| { |
| uint samples = srcTexture2dMS.get_num_samples(); |
| uint2 imageCoords = getImageCoords(srcTexture2dMS, srcTexCoords); |
| uint bufferOffset = options.dstBufferRowPitch * gIndices.y + samples * gIndices.x; |
| |
| for (uint sample = 0; sample < samples; ++sample) |
| { |
| uint stencilPerSample = srcTexture2dMS.read(imageCoords, sample).r; |
| buffer[bufferOffset + sample] = static_cast<uchar>(stencilPerSample); |
| } |
| } |
| else |
| { |
| uint32_t stencil = |
| sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, |
| srcTexCoords, options.srcLevel, options.srcLayer); |
| |
| buffer[options.dstBufferRowPitch * gIndices.y + gIndices.x] = static_cast<uchar>(stencil); |
| } |
| } |
| |
| |
| #if __METAL_VERSION__ >= 210 |
| |
| struct FragmentStencilOut |
| { |
| uint32_t stencil [[stencil]]; |
| }; |
| |
| struct FragmentDepthStencilOut |
| { |
| float depth [[depth(any)]]; |
| uint32_t stencil [[stencil]]; |
| }; |
| |
| fragment FragmentStencilOut blitStencilFS( |
| BlitVSOut input [[stage_in]], |
| texture2d<uint32_t> srcTexture2d [[texture(1), function_constant(kSourceTexture2Type2D)]], |
| texture2d_array<uint32_t> srcTexture2dArray |
| [[texture(1), function_constant(kSourceTexture2Type2DArray)]], |
| texture2d_ms<uint32_t> srcTexture2dMS |
| [[texture(1), function_constant(kSourceTexture2Type2DMS)]], |
| texturecube<uint32_t> srcTextureCube [[texture(1), function_constant(kSourceTexture2TypeCube)]], |
| constant BlitParams &options [[buffer(0)]]) |
| { |
| FragmentStencilOut re; |
| |
| re.stencil = sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, |
| input.texCoords, options.srcLevel, options.srcLayer); |
| |
| return re; |
| } |
| |
| fragment FragmentDepthStencilOut blitDepthStencilFS( |
| BlitVSOut input [[stage_in]], |
| |
| texture2d<float> srcDepthTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], |
| texture2d_array<float> srcDepthTexture2dArray |
| [[texture(0), function_constant(kSourceTextureType2DArray)]], |
| texture2d_ms<float> srcDepthTexture2dMS |
| [[texture(0), function_constant(kSourceTextureType2DMS)]], |
| texturecube<float> srcDepthTextureCube |
| [[texture(0), function_constant(kSourceTextureTypeCube)]], |
| |
| |
| texture2d<uint32_t> srcStencilTexture2d |
| [[texture(1), function_constant(kSourceTexture2Type2D)]], |
| texture2d_array<uint32_t> srcStencilTexture2dArray |
| [[texture(1), function_constant(kSourceTexture2Type2DArray)]], |
| texture2d_ms<uint32_t> srcStencilTexture2dMS |
| [[texture(1), function_constant(kSourceTexture2Type2DMS)]], |
| texturecube<uint32_t> srcStencilTextureCube |
| [[texture(1), function_constant(kSourceTexture2TypeCube)]], |
| |
| constant BlitParams &options [[buffer(0)]]) |
| { |
| FragmentDepthStencilOut re; |
| |
| re.depth = sampleDepth(srcDepthTexture2d, srcDepthTexture2dArray, srcDepthTexture2dMS, |
| srcDepthTextureCube, input.texCoords, options); |
| re.stencil = |
| sampleStencil(srcStencilTexture2d, srcStencilTexture2dArray, srcStencilTexture2dMS, |
| srcStencilTextureCube, input.texCoords, options.srcLevel, options.srcLayer); |
| return re; |
| } |
| #endif |
| # 2 "temp_master_source.metal" 2 |
| # 1 "./clear.metal" 1 |
| # 10 "./clear.metal" |
| using namespace rx::mtl_shader; |
| |
| struct ClearParams |
| { |
| float4 clearColor; |
| float clearDepth; |
| }; |
| |
| vertex float4 clearVS(unsigned int vid [[ vertex_id ]], |
| constant ClearParams &clearParams [[buffer(0)]]) |
| { |
| return float4(gCorners[vid], clearParams.clearDepth, 1.0); |
| } |
| |
| fragment MultipleColorOutputs<float> clearFloatFS(constant ClearParams &clearParams [[buffer(0)]]) |
| { |
| return toMultipleColorOutputs(clearParams.clearColor); |
| } |
| |
| fragment MultipleColorOutputs<int> clearIntFS(constant ClearParams &clearParams [[buffer(0)]]) |
| { |
| return toMultipleColorOutputs(as_type<int4>(clearParams.clearColor)); |
| } |
| |
| fragment MultipleColorOutputs<uint> clearUIntFS(constant ClearParams &clearParams [[buffer(0)]]) |
| { |
| return toMultipleColorOutputs(as_type<uint4>(clearParams.clearColor)); |
| } |
| # 3 "temp_master_source.metal" 2 |
| # 1 "./gen_indices.metal" 1 |
| |
| |
| |
| |
| |
| |
| |
| |
| using namespace rx::mtl_shader; |
| |
| |
| constant bool kSourceBufferAligned[[function_constant(100)]]; |
| constant bool kSourceIndexIsU8[[function_constant(200)]]; |
| constant bool kSourceIndexIsU16[[function_constant(300)]]; |
| constant bool kSourceIndexIsU32[[function_constant(400)]]; |
| constant bool kSourceBufferUnaligned = !kSourceBufferAligned; |
| constant bool kUseSourceBufferU8 = kSourceIndexIsU8 || kSourceBufferUnaligned; |
| constant bool kUseSourceBufferU16 = kSourceIndexIsU16 && kSourceBufferAligned; |
| constant bool kUseSourceBufferU32 = kSourceIndexIsU32 && kSourceBufferAligned; |
| |
| struct IndexConversionParams |
| { |
| uint32_t srcOffset; |
| uint32_t indexCount; |
| bool primitiveRestartEnabled; |
| }; |
| |
| |
| |
| inline ushort getIndexAligned(constant ushort *inputAligned, uint offset, uint idx) |
| { |
| return inputAligned[offset / 2 + idx]; |
| } |
| inline uint getIndexAligned(constant uint *inputAligned, uint offset, uint idx) |
| { |
| return inputAligned[offset / 4 + idx]; |
| } |
| inline uchar getIndexAligned(constant uchar *input, uint offset, uint idx) |
| { |
| return input[offset + idx]; |
| } |
| inline ushort getIndexUnalignedU16(constant uchar *input, uint offset, uint idx) |
| { |
| ushort inputLo = input[offset + 2 * idx]; |
| ushort inputHi = input[offset + 2 * idx + 1]; |
| |
| return inputLo | (inputHi << 8); |
| } |
| inline uint getIndexUnalignedU32(constant uchar *input, uint offset, uint idx) |
| { |
| uint input0 = input[offset + 4 * idx]; |
| uint input1 = input[offset + 4 * idx + 1]; |
| uint input2 = input[offset + 4 * idx + 2]; |
| uint input3 = input[offset + 4 * idx + 3]; |
| |
| return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24); |
| } |
| |
| kernel void convertIndexU8ToU16(uint idx [[thread_position_in_grid]], |
| constant IndexConversionParams &options [[buffer(0)]], |
| constant uchar *input [[buffer(1)]], |
| device ushort *output [[buffer(2)]]) |
| { |
| if (idx >= options.indexCount) { return; }; |
| |
| uchar value = getIndexAligned(input, options.srcOffset, idx); |
| |
| if (options.primitiveRestartEnabled && value == 0xff) |
| { |
| output[idx] = 0xffff; |
| } |
| else |
| { |
| output[idx] = value; |
| } |
| } |
| |
| kernel void convertIndexU16(uint idx [[thread_position_in_grid]], |
| constant IndexConversionParams &options [[buffer(0)]], |
| constant uchar *input |
| [[buffer(1), function_constant(kSourceBufferUnaligned)]], |
| constant ushort *inputAligned |
| [[buffer(1), function_constant(kSourceBufferAligned)]], |
| device ushort *output [[buffer(2)]]) |
| { |
| if (idx >= options.indexCount) { return; }; |
| |
| ushort value; |
| if (kSourceBufferAligned) |
| { |
| value = getIndexAligned(inputAligned, options.srcOffset, idx); |
| } |
| else |
| { |
| value = getIndexUnalignedU16(input, options.srcOffset, idx); |
| } |
| output[idx] = value; |
| } |
| |
| kernel void convertIndexU32(uint idx [[thread_position_in_grid]], |
| constant IndexConversionParams &options [[buffer(0)]], |
| constant uchar *input |
| [[buffer(1), function_constant(kSourceBufferUnaligned)]], |
| constant uint *inputAligned |
| [[buffer(1), function_constant(kSourceBufferAligned)]], |
| device uint *output [[buffer(2)]]) |
| { |
| if (idx >= options.indexCount) { return; }; |
| |
| uint value; |
| if (kSourceBufferAligned) |
| { |
| value = getIndexAligned(inputAligned, options.srcOffset, idx); |
| } |
| else |
| { |
| value = getIndexUnalignedU32(input, options.srcOffset, idx); |
| } |
| output[idx] = value; |
| } |
| |
| struct IndexFromArrayParams |
| { |
| uint firstVertex; |
| |
| uint vertexCount; |
| }; |
| |
| |
| kernel void genTriFanIndicesFromArray(uint idx [[thread_position_in_grid]], |
| constant IndexFromArrayParams &options [[buffer(0)]], |
| device uint *output [[buffer(2)]]) |
| { |
| if (idx >= options.vertexCount) { return; }; |
| |
| uint vertexIdx = options.firstVertex + 2 + idx; |
| |
| output[3 * idx] = options.firstVertex; |
| output[3 * idx + 1] = vertexIdx - 1; |
| output[3 * idx + 2] = vertexIdx; |
| } |
| |
| inline uint getIndexU32(uint offset, |
| uint idx, |
| constant uchar *inputU8 [[function_constant(kUseSourceBufferU8)]], |
| constant ushort *inputU16 [[function_constant(kUseSourceBufferU16)]], |
| constant uint *inputU32 [[function_constant(kUseSourceBufferU32)]]) |
| { |
| if (kUseSourceBufferU8) |
| { |
| if (kSourceIndexIsU16) |
| { |
| return getIndexUnalignedU16(inputU8, offset, idx); |
| } |
| else if (kSourceIndexIsU32) |
| { |
| return getIndexUnalignedU32(inputU8, offset, idx); |
| } |
| return getIndexAligned(inputU8, offset, idx); |
| } |
| else if (kUseSourceBufferU16) |
| { |
| return getIndexAligned(inputU16, offset, idx); |
| } |
| else if (kUseSourceBufferU32) |
| { |
| return getIndexAligned(inputU32, offset, idx); |
| } |
| return 0; |
| } |
| |
| |
| |
| |
| kernel void genTriFanIndicesFromElements(uint idx [[thread_position_in_grid]], |
| constant IndexConversionParams &options [[buffer(0)]], |
| constant uchar *inputU8 |
| [[buffer(1), function_constant(kUseSourceBufferU8)]], |
| constant ushort *inputU16 |
| [[buffer(1), function_constant(kUseSourceBufferU16)]], |
| constant uint *inputU32 |
| [[buffer(1), function_constant(kUseSourceBufferU32)]], |
| device uint *output [[buffer(2)]]) |
| { |
| if (idx >= options.indexCount) { return; }; |
| |
| uint elemIdx = 2 + idx; |
| |
| output[3 * idx] = getIndexU32(options.srcOffset, 0, inputU8, inputU16, inputU32); |
| output[3 * idx + 1] = getIndexU32(options.srcOffset, elemIdx - 1, inputU8, inputU16, inputU32); |
| output[3 * idx + 2] = getIndexU32(options.srcOffset, elemIdx, inputU8, inputU16, inputU32); |
| } |
| |
| |
| kernel void genLineLoopIndicesFromArray(uint idx [[thread_position_in_grid]], |
| constant IndexFromArrayParams &options [[buffer(0)]], |
| device uint *output [[buffer(2)]]) |
| { |
| uint totalIndices = options.vertexCount + 1; |
| if (idx >= totalIndices) { return; }; |
| |
| output[idx] = options.firstVertex + idx % options.vertexCount; |
| } |
| |
| |
| |
| kernel void genLineLoopIndicesFromElements(uint idx [[thread_position_in_grid]], |
| constant IndexConversionParams &options [[buffer(0)]], |
| constant uchar *inputU8 |
| [[buffer(1), function_constant(kUseSourceBufferU8)]], |
| constant ushort *inputU16 |
| [[buffer(1), function_constant(kUseSourceBufferU16)]], |
| constant uint *inputU32 |
| [[buffer(1), function_constant(kUseSourceBufferU32)]], |
| device uint *output [[buffer(2)]]) |
| { |
| uint totalTargetIndices = options.indexCount + 1; |
| if (idx >= totalTargetIndices) { return; }; |
| |
| output[idx] = |
| getIndexU32(options.srcOffset, idx % options.indexCount, inputU8, inputU16, inputU32); |
| } |
| # 4 "temp_master_source.metal" 2 |
| # 1 "./gen_mipmap.metal" 1 |
| |
| |
| |
| |
| |
| |
| |
| |
| using namespace rx::mtl_shader; |
| # 31 "./gen_mipmap.metal" |
| struct GenMipParams |
| { |
| uint srcLevel; |
| uint numMipLevelsToGen; |
| bool sRGB; |
| }; |
| |
| |
| |
| kernel void generate3DMipmaps(uint lIndex [[thread_index_in_threadgroup]], |
| ushort3 gIndices [[thread_position_in_grid]], |
| texture3d<float> srcTexture [[texture(0)]], |
| texture3d<float, access::write> dstMip1 [[texture(1)]], |
| texture3d<float, access::write> dstMip2 [[texture(2)]], |
| texture3d<float, access::write> dstMip3 [[texture(3)]], |
| texture3d<float, access::write> dstMip4 [[texture(4)]], |
| constant GenMipParams &options [[buffer(0)]]) |
| { |
| ushort3 mipSize = ushort3(dstMip1.get_width(), dstMip1.get_height(), dstMip1.get_depth()); |
| bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y && gIndices.z < mipSize.z; |
| |
| constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); |
| |
| |
| |
| |
| threadgroup float sR[(8 * 8 * 8)]; |
| threadgroup float sG[(8 * 8 * 8)]; |
| threadgroup float sB[(8 * 8 * 8)]; |
| threadgroup float sA[(8 * 8 * 8)]; |
| |
| |
| float4 texel1; |
| if (validThread) |
| { |
| float3 texCoords = (float3(gIndices) + float3(0.5, 0.5, 0.5)) / float3(mipSize); |
| texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); |
| |
| |
| dstMip1.write(texel1, gIndices); |
| } |
| else |
| { |
| |
| lIndex = 0xffffffff; |
| } |
| |
| if (options.numMipLevelsToGen == 1) |
| { |
| return; |
| } |
| |
| |
| |
| |
| if (options.sRGB) |
| { |
| texel1 = linearToSRGB(texel1); |
| } |
| sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; |
| |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| |
| |
| if ((lIndex & 0x49) == 0) |
| { |
| bool3 atEdge = gIndices == (mipSize - ushort3(1)); |
| |
| |
| |
| float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); |
| |
| float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); |
| |
| float4 texel4 = (atEdge.z) ? (texel1) : (float4(sR[lIndex + (8 * 8)], sG[lIndex + (8 * 8)], sB[lIndex + (8 * 8)], sA[lIndex + (8 * 8)])); |
| |
| float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); |
| |
| |
| float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + ((8 * 8) + 1)], sG[lIndex + ((8 * 8) + 1)], sB[lIndex + ((8 * 8) + 1)], sA[lIndex + ((8 * 8) + 1)])); |
| |
| |
| float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + ((8 * 8) + 8)], sG[lIndex + ((8 * 8) + 8)], sB[lIndex + ((8 * 8) + 8)], sA[lIndex + ((8 * 8) + 8)])); |
| |
| |
| float4 texel8 = |
| (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + ((8 * 8) + 8 + 1)], sG[lIndex + ((8 * 8) + 8 + 1)], sB[lIndex + ((8 * 8) + 8 + 1)], sA[lIndex + ((8 * 8) + 8 + 1)])); |
| |
| |
| texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; |
| |
| dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1); |
| |
| |
| sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; |
| } |
| |
| if (options.numMipLevelsToGen == 2) |
| { |
| return; |
| } |
| |
| |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| |
| |
| if ((lIndex & 0xdb) == 0) |
| { |
| mipSize = max(mipSize >> 1, ushort3(1)); |
| bool3 atEdge = (gIndices >> 1) == (mipSize - ushort3(1)); |
| |
| |
| |
| float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); |
| |
| float4 texel3 = |
| (atEdge.y) ? (texel1) : (float4(sR[lIndex + (2 * 8)], sG[lIndex + (2 * 8)], sB[lIndex + (2 * 8)], sA[lIndex + (2 * 8)])); |
| |
| float4 texel4 = |
| (atEdge.z) ? (texel1) : (float4(sR[lIndex + (2 * (8 * 8))], sG[lIndex + (2 * (8 * 8))], sB[lIndex + (2 * (8 * 8))], sA[lIndex + (2 * (8 * 8))])); |
| |
| float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); |
| |
| |
| float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (2 * (8 * 8) + 2)], sG[lIndex + (2 * (8 * 8) + 2)], sB[lIndex + (2 * (8 * 8) + 2)], sA[lIndex + (2 * (8 * 8) + 2)])); |
| |
| |
| float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8)], sG[lIndex + (2 * (8 * 8) + 2 * 8)], sB[lIndex + (2 * (8 * 8) + 2 * 8)], sA[lIndex + (2 * (8 * 8) + 2 * 8)])); |
| |
| |
| |
| float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sG[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sB[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sA[lIndex + (2 * (8 * 8) + 2 * 8 + 2)])); |
| |
| |
| |
| texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; |
| |
| dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2); |
| |
| |
| sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; |
| } |
| |
| if (options.numMipLevelsToGen == 3) |
| { |
| return; |
| } |
| |
| |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| |
| |
| if ((lIndex & 0x1ff) == 0) |
| { |
| mipSize = max(mipSize >> 1, ushort3(1)); |
| bool3 atEdge = (gIndices >> 2) == (mipSize - ushort3(1)); |
| |
| |
| |
| float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); |
| |
| float4 texel3 = |
| (atEdge.y) ? (texel1) : (float4(sR[lIndex + (4 * 8)], sG[lIndex + (4 * 8)], sB[lIndex + (4 * 8)], sA[lIndex + (4 * 8)])); |
| |
| float4 texel4 = |
| (atEdge.z) ? (texel1) : (float4(sR[lIndex + (4 * (8 * 8))], sG[lIndex + (4 * (8 * 8))], sB[lIndex + (4 * (8 * 8))], sA[lIndex + (4 * (8 * 8))])); |
| |
| float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); |
| |
| |
| float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (4 * (8 * 8) + 4)], sG[lIndex + (4 * (8 * 8) + 4)], sB[lIndex + (4 * (8 * 8) + 4)], sA[lIndex + (4 * (8 * 8) + 4)])); |
| |
| |
| float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8)], sG[lIndex + (4 * (8 * 8) + 4 * 8)], sB[lIndex + (4 * (8 * 8) + 4 * 8)], sA[lIndex + (4 * (8 * 8) + 4 * 8)])); |
| |
| |
| |
| float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sG[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sB[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sA[lIndex + (4 * (8 * 8) + 4 * 8 + 4)])); |
| |
| |
| |
| texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; |
| |
| dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3); |
| } |
| } |
| |
| kernel void generate2DMipmaps(uint lIndex [[thread_index_in_threadgroup]], |
| ushort2 gIndices [[thread_position_in_grid]], |
| texture2d<float> srcTexture [[texture(0)]], |
| texture2d<float, access::write> dstMip1 [[texture(1)]], |
| texture2d<float, access::write> dstMip2 [[texture(2)]], |
| texture2d<float, access::write> dstMip3 [[texture(3)]], |
| texture2d<float, access::write> dstMip4 [[texture(4)]], |
| constant GenMipParams &options [[buffer(0)]]) |
| { |
| uint firstMipLevel = options.srcLevel + 1; |
| ushort2 mipSize = |
| ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); |
| bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y; |
| |
| constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); |
| |
| |
| |
| |
| threadgroup float sR[(8 * 8)]; |
| threadgroup float sG[(8 * 8)]; |
| threadgroup float sB[(8 * 8)]; |
| threadgroup float sA[(8 * 8)]; |
| |
| |
| float4 texel1; |
| if (validThread) |
| { |
| float2 texCoords = (float2(gIndices) + float2(0.5, 0.5)) / float2(mipSize); |
| texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); |
| |
| |
| dstMip1.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices); |
| } |
| else |
| { |
| |
| lIndex = 0xffffffff; |
| } |
| |
| if (options.numMipLevelsToGen == 1) |
| { |
| return; |
| } |
| |
| |
| |
| |
| sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; |
| |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| |
| |
| if ((lIndex & 0x09) == 0) |
| { |
| bool2 atEdge = gIndices == (mipSize - ushort2(1)); |
| |
| |
| |
| float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); |
| |
| float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); |
| |
| float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); |
| |
| |
| texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; |
| |
| dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1); |
| |
| |
| sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; |
| } |
| |
| if (options.numMipLevelsToGen == 2) |
| { |
| return; |
| } |
| |
| |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| |
| |
| if ((lIndex & 0x1b) == 0) |
| { |
| mipSize = max(mipSize >> 1, ushort2(1)); |
| bool2 atEdge = (gIndices >> 1) == (mipSize - ushort2(1)); |
| |
| |
| |
| float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); |
| |
| float4 texel3 = |
| (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8])); |
| |
| float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); |
| |
| |
| texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; |
| |
| dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2); |
| |
| |
| sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; |
| } |
| |
| if (options.numMipLevelsToGen == 3) |
| { |
| return; |
| } |
| |
| |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| |
| |
| if ((lIndex & 0x3f) == 0) |
| { |
| mipSize = max(mipSize >> 1, ushort2(1)); |
| bool2 atEdge = (gIndices >> 2) == (mipSize - ushort2(1)); |
| |
| |
| |
| float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); |
| |
| float4 texel3 = |
| (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8])); |
| |
| float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); |
| |
| |
| texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; |
| |
| dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3); |
| } |
| } |
| |
| template <typename TextureTypeR, typename TextureTypeW> |
| static __attribute__((always_inline)) void generateCubeOr2DArray2ndAndMoreMipmaps( |
| uint lIndex, |
| ushort3 gIndices, |
| TextureTypeR srcTexture, |
| TextureTypeW dstMip2, |
| TextureTypeW dstMip3, |
| TextureTypeW dstMip4, |
| ushort2 mip1Size, |
| float4 mip1Texel, |
| threadgroup float *sR, |
| threadgroup float *sG, |
| threadgroup float *sB, |
| threadgroup float *sA, |
| constant GenMipParams &options) |
| { |
| ushort2 mipSize = mip1Size; |
| float4 texel1 = mip1Texel; |
| |
| |
| |
| |
| sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; |
| |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| |
| |
| if ((lIndex & 0x09) == 0) |
| { |
| bool2 atEdge = gIndices.xy == (mipSize - ushort2(1)); |
| |
| |
| |
| float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1])); |
| |
| float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8])); |
| |
| float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)])); |
| |
| |
| texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; |
| |
| dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 1, gIndices.z); |
| |
| |
| sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; |
| } |
| |
| if (options.numMipLevelsToGen == 2) |
| { |
| return; |
| } |
| |
| |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| |
| |
| if ((lIndex & 0x1b) == 0) |
| { |
| mipSize = max(mipSize >> 1, ushort2(1)); |
| bool2 atEdge = (gIndices.xy >> 1) == (mipSize - ushort2(1)); |
| |
| |
| |
| float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2])); |
| |
| float4 texel3 = |
| (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8])); |
| |
| float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)])); |
| |
| |
| texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; |
| |
| dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 2, gIndices.z); |
| |
| |
| sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;; |
| } |
| |
| if (options.numMipLevelsToGen == 3) |
| { |
| return; |
| } |
| |
| |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| |
| |
| if ((lIndex & 0x3f) == 0) |
| { |
| mipSize = max(mipSize >> 1, ushort2(1)); |
| bool2 atEdge = (gIndices.xy >> 2) == (mipSize - ushort2(1)); |
| |
| |
| |
| float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4])); |
| |
| float4 texel3 = |
| (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8])); |
| |
| float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)])); |
| |
| |
| texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; |
| |
| dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 3, gIndices.z); |
| } |
| } |
| |
| kernel void generateCubeMipmaps(uint lIndex [[thread_index_in_threadgroup]], |
| ushort3 gIndices [[thread_position_in_grid]], |
| texturecube<float> srcTexture [[texture(0)]], |
| texturecube<float, access::write> dstMip1 [[texture(1)]], |
| texturecube<float, access::write> dstMip2 [[texture(2)]], |
| texturecube<float, access::write> dstMip3 [[texture(3)]], |
| texturecube<float, access::write> dstMip4 [[texture(4)]], |
| constant GenMipParams &options [[buffer(0)]]) |
| { |
| uint firstMipLevel = options.srcLevel + 1; |
| ushort2 mip1Size = |
| ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); |
| bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; |
| |
| constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); |
| |
| |
| float4 mip1Texel; |
| if (validThread) |
| { |
| float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); |
| mip1Texel = srcTexture.sample(textureSampler, cubeTexcoords(texCoords, int(gIndices.z)), |
| level(options.srcLevel)); |
| |
| |
| dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z); |
| } |
| else |
| { |
| |
| lIndex = 0xffffffff; |
| } |
| |
| if (options.numMipLevelsToGen == 1) |
| { |
| return; |
| } |
| |
| |
| threadgroup float sR[(8 * 8)]; |
| threadgroup float sG[(8 * 8)]; |
| threadgroup float sB[(8 * 8)]; |
| threadgroup float sA[(8 * 8)]; |
| |
| generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, |
| mip1Size, mip1Texel, sR, sG, sB, sA, options); |
| } |
| |
| kernel void generate2DArrayMipmaps(uint lIndex [[thread_index_in_threadgroup]], |
| ushort3 gIndices [[thread_position_in_grid]], |
| texture2d_array<float> srcTexture [[texture(0)]], |
| texture2d_array<float, access::write> dstMip1 [[texture(1)]], |
| texture2d_array<float, access::write> dstMip2 [[texture(2)]], |
| texture2d_array<float, access::write> dstMip3 [[texture(3)]], |
| texture2d_array<float, access::write> dstMip4 [[texture(4)]], |
| constant GenMipParams &options [[buffer(0)]]) |
| { |
| uint firstMipLevel = options.srcLevel + 1; |
| ushort2 mip1Size = |
| ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); |
| bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; |
| |
| constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); |
| |
| |
| float4 mip1Texel; |
| if (validThread) |
| { |
| float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); |
| mip1Texel = |
| srcTexture.sample(textureSampler, texCoords, gIndices.z, level(options.srcLevel)); |
| |
| |
| dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z); |
| } |
| else |
| { |
| |
| lIndex = 0xffffffff; |
| } |
| |
| if (options.numMipLevelsToGen == 1) |
| { |
| return; |
| } |
| |
| |
| threadgroup float sR[(8 * 8)]; |
| threadgroup float sG[(8 * 8)]; |
| threadgroup float sB[(8 * 8)]; |
| threadgroup float sA[(8 * 8)]; |
| |
| generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, |
| mip1Size, mip1Texel, sR, sG, sB, sA, options); |
| } |
| # 5 "temp_master_source.metal" 2 |
| # 1 "./copy_buffer.metal" 1 |
| # 12 "./copy_buffer.metal" |
| #include <metal_pack> |
| |
| |
| # 1 "./format_autogen.h" 1 |
| # 11 "./format_autogen.h" |
| namespace rx |
| { |
| namespace mtl_shader |
| { |
| |
| namespace FormatID |
| { |
| enum |
| { |
| NONE, |
| D16_UNORM, |
| D24_UNORM_S8_UINT, |
| D24_UNORM_X8_UINT, |
| D32_FLOAT, |
| D32_FLOAT_S8X24_UINT, |
| D32_UNORM, |
| S8_UINT, |
| A16_FLOAT, |
| A1R5G5B5_UNORM, |
| A2R10G10B10_SINT_VERTEX, |
| A2R10G10B10_SNORM_VERTEX, |
| A2R10G10B10_SSCALED_VERTEX, |
| A2R10G10B10_UINT_VERTEX, |
| A2R10G10B10_UNORM_VERTEX, |
| A2R10G10B10_USCALED_VERTEX, |
| A32_FLOAT, |
| A8_UNORM, |
| ASTC_10x10_SRGB_BLOCK, |
| ASTC_10x10_UNORM_BLOCK, |
| ASTC_10x5_SRGB_BLOCK, |
| ASTC_10x5_UNORM_BLOCK, |
| ASTC_10x6_SRGB_BLOCK, |
| ASTC_10x6_UNORM_BLOCK, |
| ASTC_10x8_SRGB_BLOCK, |
| ASTC_10x8_UNORM_BLOCK, |
| ASTC_12x10_SRGB_BLOCK, |
| ASTC_12x10_UNORM_BLOCK, |
| ASTC_12x12_SRGB_BLOCK, |
| ASTC_12x12_UNORM_BLOCK, |
| ASTC_3x3x3_UNORM_BLOCK, |
| ASTC_3x3x3_UNORM_SRGB_BLOCK, |
| ASTC_4x3x3_UNORM_BLOCK, |
| ASTC_4x3x3_UNORM_SRGB_BLOCK, |
| ASTC_4x4_SRGB_BLOCK, |
| ASTC_4x4_UNORM_BLOCK, |
| ASTC_4x4x3_UNORM_BLOCK, |
| ASTC_4x4x3_UNORM_SRGB_BLOCK, |
| ASTC_4x4x4_UNORM_BLOCK, |
| ASTC_4x4x4_UNORM_SRGB_BLOCK, |
| ASTC_5x4_SRGB_BLOCK, |
| ASTC_5x4_UNORM_BLOCK, |
| ASTC_5x4x4_UNORM_BLOCK, |
| ASTC_5x4x4_UNORM_SRGB_BLOCK, |
| ASTC_5x5_SRGB_BLOCK, |
| ASTC_5x5_UNORM_BLOCK, |
| ASTC_5x5x4_UNORM_BLOCK, |
| ASTC_5x5x4_UNORM_SRGB_BLOCK, |
| ASTC_5x5x5_UNORM_BLOCK, |
| ASTC_5x5x5_UNORM_SRGB_BLOCK, |
| ASTC_6x5_SRGB_BLOCK, |
| ASTC_6x5_UNORM_BLOCK, |
| ASTC_6x5x5_UNORM_BLOCK, |
| ASTC_6x5x5_UNORM_SRGB_BLOCK, |
| ASTC_6x6_SRGB_BLOCK, |
| ASTC_6x6_UNORM_BLOCK, |
| ASTC_6x6x5_UNORM_BLOCK, |
| ASTC_6x6x5_UNORM_SRGB_BLOCK, |
| ASTC_6x6x6_UNORM_BLOCK, |
| ASTC_6x6x6_UNORM_SRGB_BLOCK, |
| ASTC_8x5_SRGB_BLOCK, |
| ASTC_8x5_UNORM_BLOCK, |
| ASTC_8x6_SRGB_BLOCK, |
| ASTC_8x6_UNORM_BLOCK, |
| ASTC_8x8_SRGB_BLOCK, |
| ASTC_8x8_UNORM_BLOCK, |
| B10G10R10A2_UNORM, |
| B4G4R4A4_UNORM, |
| B5G5R5A1_UNORM, |
| B5G6R5_UNORM, |
| B8G8R8A8_TYPELESS, |
| B8G8R8A8_TYPELESS_SRGB, |
| B8G8R8A8_UNORM, |
| B8G8R8A8_UNORM_SRGB, |
| B8G8R8X8_UNORM, |
| BC1_RGBA_UNORM_BLOCK, |
| BC1_RGBA_UNORM_SRGB_BLOCK, |
| BC1_RGB_UNORM_BLOCK, |
| BC1_RGB_UNORM_SRGB_BLOCK, |
| BC2_RGBA_UNORM_BLOCK, |
| BC2_RGBA_UNORM_SRGB_BLOCK, |
| BC3_RGBA_UNORM_BLOCK, |
| BC3_RGBA_UNORM_SRGB_BLOCK, |
| BC4_RED_SNORM_BLOCK, |
| BC4_RED_UNORM_BLOCK, |
| BC5_RG_SNORM_BLOCK, |
| BC5_RG_UNORM_BLOCK, |
| BC6H_RGB_SFLOAT_BLOCK, |
| BC6H_RGB_UFLOAT_BLOCK, |
| BC7_RGBA_UNORM_BLOCK, |
| BC7_RGBA_UNORM_SRGB_BLOCK, |
| EAC_R11G11_SNORM_BLOCK, |
| EAC_R11G11_UNORM_BLOCK, |
| EAC_R11_SNORM_BLOCK, |
| EAC_R11_UNORM_BLOCK, |
| ETC1_LOSSY_DECODE_R8G8B8_UNORM_BLOCK, |
| ETC1_R8G8B8_UNORM_BLOCK, |
| ETC2_R8G8B8A1_SRGB_BLOCK, |
| ETC2_R8G8B8A1_UNORM_BLOCK, |
| ETC2_R8G8B8A8_SRGB_BLOCK, |
| ETC2_R8G8B8A8_UNORM_BLOCK, |
| ETC2_R8G8B8_SRGB_BLOCK, |
| ETC2_R8G8B8_UNORM_BLOCK, |
| G8_B8R8_2PLANE_420_UNORM, |
| G8_B8_R8_3PLANE_420_UNORM, |
| L16A16_FLOAT, |
| L16_FLOAT, |
| L32A32_FLOAT, |
| L32_FLOAT, |
| L8A8_UNORM, |
| L8_UNORM, |
| PVRTC1_RGBA_2BPP_UNORM_BLOCK, |
| PVRTC1_RGBA_2BPP_UNORM_SRGB_BLOCK, |
| PVRTC1_RGBA_4BPP_UNORM_BLOCK, |
| PVRTC1_RGBA_4BPP_UNORM_SRGB_BLOCK, |
| PVRTC1_RGB_2BPP_UNORM_BLOCK, |
| PVRTC1_RGB_2BPP_UNORM_SRGB_BLOCK, |
| PVRTC1_RGB_4BPP_UNORM_BLOCK, |
| PVRTC1_RGB_4BPP_UNORM_SRGB_BLOCK, |
| R10G10B10A2_SINT, |
| R10G10B10A2_SNORM, |
| R10G10B10A2_SSCALED, |
| R10G10B10A2_UINT, |
| R10G10B10A2_UNORM, |
| R10G10B10A2_USCALED, |
| R10G10B10X2_UNORM, |
| R11G11B10_FLOAT, |
| R16G16B16A16_FLOAT, |
| R16G16B16A16_SINT, |
| R16G16B16A16_SNORM, |
| R16G16B16A16_SSCALED, |
| R16G16B16A16_UINT, |
| R16G16B16A16_UNORM, |
| R16G16B16A16_USCALED, |
| R16G16B16_FLOAT, |
| R16G16B16_SINT, |
| R16G16B16_SNORM, |
| R16G16B16_SSCALED, |
| R16G16B16_UINT, |
| R16G16B16_UNORM, |
| R16G16B16_USCALED, |
| R16G16_FLOAT, |
| R16G16_SINT, |
| R16G16_SNORM, |
| R16G16_SSCALED, |
| R16G16_UINT, |
| R16G16_UNORM, |
| R16G16_USCALED, |
| R16_FLOAT, |
| R16_SINT, |
| R16_SNORM, |
| R16_SSCALED, |
| R16_UINT, |
| R16_UNORM, |
| R16_USCALED, |
| R32G32B32A32_FIXED, |
| R32G32B32A32_FLOAT, |
| R32G32B32A32_SINT, |
| R32G32B32A32_SNORM, |
| R32G32B32A32_SSCALED, |
| R32G32B32A32_UINT, |
| R32G32B32A32_UNORM, |
| R32G32B32A32_USCALED, |
| R32G32B32_FIXED, |
| R32G32B32_FLOAT, |
| R32G32B32_SINT, |
| R32G32B32_SNORM, |
| R32G32B32_SSCALED, |
| R32G32B32_UINT, |
| R32G32B32_UNORM, |
| R32G32B32_USCALED, |
| R32G32_FIXED, |
| R32G32_FLOAT, |
| R32G32_SINT, |
| R32G32_SNORM, |
| R32G32_SSCALED, |
| R32G32_UINT, |
| R32G32_UNORM, |
| R32G32_USCALED, |
| R32_FIXED, |
| R32_FLOAT, |
| R32_SINT, |
| R32_SNORM, |
| R32_SSCALED, |
| R32_UINT, |
| R32_UNORM, |
| R32_USCALED, |
| R4G4B4A4_UNORM, |
| R5G5B5A1_UNORM, |
| R5G6B5_UNORM, |
| R8G8B8A8_SINT, |
| R8G8B8A8_SNORM, |
| R8G8B8A8_SSCALED, |
| R8G8B8A8_TYPELESS, |
| R8G8B8A8_TYPELESS_SRGB, |
| R8G8B8A8_UINT, |
| R8G8B8A8_UNORM, |
| R8G8B8A8_UNORM_SRGB, |
| R8G8B8A8_USCALED, |
| R8G8B8X8_UNORM, |
| R8G8B8_SINT, |
| R8G8B8_SNORM, |
| R8G8B8_SSCALED, |
| R8G8B8_UINT, |
| R8G8B8_UNORM, |
| R8G8B8_UNORM_SRGB, |
| R8G8B8_USCALED, |
| R8G8_SINT, |
| R8G8_SNORM, |
| R8G8_SSCALED, |
| R8G8_UINT, |
| R8G8_UNORM, |
| R8G8_UNORM_SRGB, |
| R8G8_USCALED, |
| R8_SINT, |
| R8_SNORM, |
| R8_SSCALED, |
| R8_UINT, |
| R8_UNORM, |
| R8_UNORM_SRGB, |
| R8_USCALED, |
| R9G9B9E5_SHAREDEXP, |
| X2R10G10B10_SINT_VERTEX, |
| X2R10G10B10_SNORM_VERTEX, |
| X2R10G10B10_SSCALED_VERTEX, |
| X2R10G10B10_UINT_VERTEX, |
| X2R10G10B10_UNORM_VERTEX, |
| X2R10G10B10_USCALED_VERTEX |
| }; |
| |
| } |
| |
| } |
| } |
| # 16 "./copy_buffer.metal" 2 |
| |
| using namespace rx::mtl_shader; |
| |
| constant int kCopyFormatType [[function_constant(10)]]; |
| |
| |
| constant int kCopyTextureType [[function_constant(20)]]; |
| constant bool kCopyTextureType2D = kCopyTextureType == kTextureType2D; |
| constant bool kCopyTextureType2DArray = kCopyTextureType == kTextureType2DArray; |
| constant bool kCopyTextureType2DMS = kCopyTextureType == kTextureType2DMultisample; |
| constant bool kCopyTextureTypeCube = kCopyTextureType == kTextureTypeCube; |
| constant bool kCopyTextureType3D = kCopyTextureType == kTextureType3D; |
| |
| struct CopyPixelParams |
| { |
| uint3 copySize; |
| uint3 textureOffset; |
| |
| uint bufferStartOffset; |
| uint pixelSize; |
| uint bufferRowPitch; |
| uint bufferDepthPitch; |
| }; |
| |
| struct WritePixelParams |
| { |
| uint2 copySize; |
| uint2 textureOffset; |
| |
| uint bufferStartOffset; |
| |
| uint pixelSize; |
| uint bufferRowPitch; |
| |
| uint textureLevel; |
| uint textureLayer; |
| |
| bool reverseTextureRowOrder; |
| }; |
| # 120 "./copy_buffer.metal" |
| template <typename T> |
| static inline void textureWrite(ushort3 gIndices, |
| constant CopyPixelParams &options, |
| vec<T, 4> color, |
| texture2d<T, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) |
| { |
| uint3 writeIndices = options.textureOffset + uint3(gIndices); |
| switch (kCopyTextureType) |
| { |
| case kTextureType2D: |
| dstTexture2d.write(color, writeIndices.xy); |
| break; |
| case kTextureType2DArray: |
| dstTexture2dArray.write(color, writeIndices.xy, writeIndices.z); |
| break; |
| case kTextureType3D: |
| dstTexture3d.write(color, writeIndices); |
| break; |
| case kTextureTypeCube: |
| dstTextureCube.write(color, writeIndices.xy, writeIndices.z); |
| break; |
| } |
| } |
| |
| |
| template <typename T> |
| static inline vec<T, 4> textureRead(ushort2 gIndices, |
| constant WritePixelParams &options, |
| texture2d<T, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<T, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]]) |
| { |
| vec<T, 4> color; |
| uint2 coords = uint2(gIndices); |
| if (options.reverseTextureRowOrder) |
| { |
| coords.y = options.copySize.y - 1 - gIndices.y; |
| } |
| coords += options.textureOffset; |
| switch (kCopyTextureType) |
| { |
| case kTextureType2D: |
| color = srcTexture2d.read(coords.xy, options.textureLevel); |
| break; |
| case kTextureType2DArray: |
| color = srcTexture2dArray.read(coords.xy, options.textureLayer, options.textureLevel); |
| break; |
| case kTextureType2DMultisample: |
| color = resolveTextureMS(srcTexture2dMS, coords.xy); |
| break; |
| case kTextureType3D: |
| color = srcTexture3d.read(uint3(coords, options.textureLayer), options.textureLevel); |
| break; |
| case kTextureTypeCube: |
| color = srcTextureCube.read(coords.xy, options.textureLayer, options.textureLevel); |
| break; |
| } |
| return color; |
| } |
| # 215 "./copy_buffer.metal" |
| static inline float4 readR5G6B5_UNORM(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| ushort src = bytesToShort<ushort>(buffer, bufferOffset); |
| |
| color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src)); |
| color.g = normalizedToFloat<6>(getShiftedData<6, 5>(src)); |
| color.b = normalizedToFloat<5>(getShiftedData<5, 0>(src)); |
| color.a = 1.0; |
| return color; |
| } |
| static inline void writeR5G6B5_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) | |
| shiftData<6, 5>(floatToNormalized<6, ushort>(color.g)) | |
| shiftData<5, 0>(floatToNormalized<5, ushort>(color.b)); |
| |
| shortToBytes(dst, bufferOffset, buffer); |
| } |
| |
| |
| static inline float4 readR4G4B4A4_UNORM(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| ushort src = bytesToShort<ushort>(buffer, bufferOffset); |
| |
| color.r = normalizedToFloat<4>(getShiftedData<4, 12>(src)); |
| color.g = normalizedToFloat<4>(getShiftedData<4, 8>(src)); |
| color.b = normalizedToFloat<4>(getShiftedData<4, 4>(src)); |
| color.a = normalizedToFloat<4>(getShiftedData<4, 0>(src)); |
| return color; |
| } |
| static inline void writeR4G4B4A4_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| ushort dst = shiftData<4, 12>(floatToNormalized<4, ushort>(color.r)) | |
| shiftData<4, 8>(floatToNormalized<4, ushort>(color.g)) | |
| shiftData<4, 4>(floatToNormalized<4, ushort>(color.b)) | |
| shiftData<4, 0>(floatToNormalized<4, ushort>(color.a)); |
| ; |
| |
| shortToBytes(dst, bufferOffset, buffer); |
| } |
| |
| |
| static inline float4 readR5G5B5A1_UNORM(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| ushort src = bytesToShort<ushort>(buffer, bufferOffset); |
| |
| color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src)); |
| color.g = normalizedToFloat<5>(getShiftedData<5, 6>(src)); |
| color.b = normalizedToFloat<5>(getShiftedData<5, 1>(src)); |
| color.a = normalizedToFloat<1>(getShiftedData<1, 0>(src)); |
| return color; |
| } |
| static inline void writeR5G5B5A1_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) | |
| shiftData<5, 6>(floatToNormalized<5, ushort>(color.g)) | |
| shiftData<5, 1>(floatToNormalized<5, ushort>(color.b)) | |
| shiftData<1, 0>(floatToNormalized<1, ushort>(color.a)); |
| ; |
| |
| shortToBytes(dst, bufferOffset, buffer); |
| } |
| |
| |
| static inline int4 readR10G10B10A2_SINT(uint bufferOffset, constant uchar *buffer) |
| { |
| int4 color; |
| int src = bytesToInt<int>(buffer, bufferOffset); |
| |
| constexpr int3 rgbSignMask(0x200); |
| constexpr int3 negativeMask(0xfffffc00); |
| constexpr int alphaSignMask = 0x2; |
| constexpr int alphaNegMask = 0xfffffffc; |
| |
| color.r = getShiftedData<10, 0>(src); |
| color.g = getShiftedData<10, 10>(src); |
| color.b = getShiftedData<10, 20>(src); |
| |
| int3 isRgbNegative = (color.rgb & rgbSignMask) >> 9; |
| color.rgb = (isRgbNegative * negativeMask) | color.rgb; |
| |
| color.a = getShiftedData<2, 30>(src); |
| int isAlphaNegative = color.a & alphaSignMask >> 1; |
| color.a = (isAlphaNegative * alphaNegMask) | color.a; |
| return color; |
| } |
| |
| static inline uint4 readR10G10B10A2_UINT(uint bufferOffset, constant uchar *buffer) |
| { |
| uint4 color; |
| uint src = bytesToInt<uint>(buffer, bufferOffset); |
| |
| color.r = getShiftedData<10, 0>(src); |
| color.g = getShiftedData<10, 10>(src); |
| color.b = getShiftedData<10, 20>(src); |
| color.a = getShiftedData<2, 30>(src); |
| return color; |
| } |
| |
| |
| static inline float4 readR8G8B8A8(uint bufferOffset, constant uchar *buffer, bool isSRGB) |
| { |
| float4 color; |
| uint src = bytesToInt<uint>(buffer, bufferOffset); |
| |
| if (isSRGB) |
| { |
| color = unpack_unorm4x8_srgb_to_float(src); |
| } |
| else |
| { |
| color = unpack_unorm4x8_to_float(src); |
| } |
| return color; |
| } |
| static inline void writeR8G8B8A8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB) |
| { |
| uint dst; |
| |
| if (isSRGB) |
| { |
| dst = pack_float_to_srgb_unorm4x8(color); |
| } |
| else |
| { |
| dst = pack_float_to_unorm4x8(color); |
| } |
| |
| intToBytes(dst, bufferOffset, buffer); |
| } |
| |
| static inline float4 readR8G8B8(uint bufferOffset, constant uchar *buffer, bool isSRGB) |
| { |
| float4 color; |
| color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); |
| color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); |
| color.b = normalizedToFloat<uchar>(buffer[bufferOffset + 2]); |
| color.a = 1.0; |
| |
| if (isSRGB) |
| { |
| color = sRGBtoLinear(color); |
| } |
| return color; |
| } |
| static inline void writeR8G8B8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB) |
| { |
| color.a = 1.0; |
| uint dst; |
| |
| if (isSRGB) |
| { |
| dst = pack_float_to_srgb_unorm4x8(color); |
| } |
| else |
| { |
| dst = pack_float_to_unorm4x8(color); |
| } |
| int24bitToBytes(dst, bufferOffset, buffer); |
| } |
| |
| |
| static inline float4 readR8G8B8A8_SNORM(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| uint src = bytesToInt<uint>(buffer, bufferOffset); |
| |
| color = unpack_snorm4x8_to_float(src); |
| |
| return color; |
| } |
| static inline void writeR8G8B8A8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| uint dst = pack_float_to_snorm4x8(color); |
| |
| intToBytes(dst, bufferOffset, buffer); |
| } |
| |
| |
| static inline float4 readR8G8B8_SNORM(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); |
| color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]); |
| color.b = normalizedToFloat<7, char>(buffer[bufferOffset + 2]); |
| color.a = 1.0; |
| |
| return color; |
| } |
| static inline void writeR8G8B8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| uint dst = pack_float_to_snorm4x8(color); |
| |
| int24bitToBytes(dst, bufferOffset, buffer); |
| } |
| |
| |
| static inline float4 readR8G8B8A8_UNORM(uint bufferOffset, constant uchar *buffer) |
| { |
| return readR8G8B8A8(bufferOffset, buffer, false); |
| } |
| static inline void writeR8G8B8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false); |
| } |
| |
| static inline float4 readR8G8B8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) |
| { |
| return readR8G8B8A8(bufferOffset, buffer, true); |
| } |
| static inline void writeR8G8B8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true); |
| } |
| |
| |
| static inline float4 readB8G8R8A8_UNORM(uint bufferOffset, constant uchar *buffer) |
| { |
| return readR8G8B8A8(bufferOffset, buffer, false).bgra; |
| } |
| static inline void writeB8G8R8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| color.rgba = color.bgra; |
| return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false); |
| } |
| |
| static inline float4 readB8G8R8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) |
| { |
| return readR8G8B8A8(bufferOffset, buffer, true).bgra; |
| } |
| static inline void writeB8G8R8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| color.rgba = color.bgra; |
| return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true); |
| } |
| |
| |
| static inline float4 readR8G8B8_UNORM(uint bufferOffset, constant uchar *buffer) |
| { |
| return readR8G8B8(bufferOffset, buffer, false); |
| } |
| static inline void writeR8G8B8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, false); |
| } |
| |
| static inline float4 readR8G8B8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer) |
| { |
| return readR8G8B8(bufferOffset, buffer, true); |
| } |
| static inline void writeR8G8B8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, true); |
| } |
| |
| |
| static inline float4 readL8_UNORM(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset])); |
| color.a = 1.0; |
| return color; |
| } |
| static inline void writeL8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| buffer[bufferOffset] = floatToNormalized<uchar>(color.r); |
| } |
| |
| |
| static inline void writeA8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| buffer[bufferOffset] = floatToNormalized<uchar>(color.a); |
| } |
| |
| |
| static inline float4 readL8A8_UNORM(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset])); |
| color.a = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); |
| return color; |
| } |
| static inline void writeL8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| buffer[bufferOffset] = floatToNormalized<uchar>(color.r); |
| buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.a); |
| } |
| |
| |
| static inline float4 readR8_UNORM(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); |
| color.g = color.b = 0.0; |
| color.a = 1.0; |
| return color; |
| } |
| static inline void writeR8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| buffer[bufferOffset] = floatToNormalized<uchar>(color.r); |
| } |
| |
| static inline float4 readR8_SNORM(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); |
| color.g = color.b = 0.0; |
| color.a = 1.0; |
| return color; |
| } |
| static inline void writeR8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r)); |
| } |
| |
| |
| static inline int4 readR8_SINT(uint bufferOffset, constant uchar *buffer) |
| { |
| int4 color; |
| color.r = as_type<char>(buffer[bufferOffset]); |
| color.g = color.b = 0; |
| color.a = 1; |
| return color; |
| } |
| static inline void writeR8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) |
| { |
| buffer[bufferOffset] = static_cast<uchar>(color.r); |
| } |
| |
| |
| static inline uint4 readR8_UINT(uint bufferOffset, constant uchar *buffer) |
| { |
| uint4 color; |
| color.r = as_type<uchar>(buffer[bufferOffset]); |
| color.g = color.b = 0; |
| color.a = 1; |
| return color; |
| } |
| static inline void writeR8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) |
| { |
| buffer[bufferOffset] = static_cast<uchar>(color.r); |
| } |
| |
| |
| static inline float4 readR8G8_UNORM(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.r = normalizedToFloat<uchar>(buffer[bufferOffset]); |
| color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]); |
| color.b = 0.0; |
| color.a = 1.0; |
| return color; |
| } |
| static inline void writeR8G8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| buffer[bufferOffset] = floatToNormalized<uchar>(color.r); |
| buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.g); |
| } |
| |
| static inline float4 readR8G8_SNORM(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.r = normalizedToFloat<7, char>(buffer[bufferOffset]); |
| color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]); |
| color.b = 0.0; |
| color.a = 1.0; |
| return color; |
| } |
| static inline void writeR8G8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r)); |
| buffer[bufferOffset + 1] = as_type<uchar>(floatToNormalized<7, char>(color.g)); |
| } |
| |
| |
| static inline int4 readR8G8_SINT(uint bufferOffset, constant uchar *buffer) |
| { |
| int4 color; |
| color.r = as_type<char>(buffer[bufferOffset]); |
| color.g = as_type<char>(buffer[bufferOffset + 1]); |
| color.b = 0; |
| color.a = 1; |
| return color; |
| } |
| static inline void writeR8G8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) |
| { |
| buffer[bufferOffset] = static_cast<uchar>(color.r); |
| buffer[bufferOffset + 1] = static_cast<uchar>(color.g); |
| } |
| |
| |
| static inline uint4 readR8G8_UINT(uint bufferOffset, constant uchar *buffer) |
| { |
| uint4 color; |
| color.r = as_type<uchar>(buffer[bufferOffset]); |
| color.g = as_type<uchar>(buffer[bufferOffset + 1]); |
| color.b = 0; |
| color.a = 1; |
| return color; |
| } |
| static inline void writeR8G8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) |
| { |
| buffer[bufferOffset] = static_cast<uchar>(color.r); |
| buffer[bufferOffset + 1] = static_cast<uchar>(color.g); |
| } |
| |
| |
| static inline int4 readR8G8B8_SINT(uint bufferOffset, constant uchar *buffer) |
| { |
| int4 color; |
| color.r = as_type<char>(buffer[bufferOffset]); |
| color.g = as_type<char>(buffer[bufferOffset + 1]); |
| color.b = as_type<char>(buffer[bufferOffset + 2]); |
| color.a = 1; |
| return color; |
| } |
| |
| |
| static inline uint4 readR8G8B8_UINT(uint bufferOffset, constant uchar *buffer) |
| { |
| uint4 color; |
| color.r = as_type<uchar>(buffer[bufferOffset]); |
| color.g = as_type<uchar>(buffer[bufferOffset + 1]); |
| color.b = as_type<uchar>(buffer[bufferOffset + 2]); |
| color.a = 1; |
| return color; |
| } |
| |
| |
| static inline int4 readR8G8B8A8_SINT(uint bufferOffset, constant uchar *buffer) |
| { |
| int4 color; |
| color.r = as_type<char>(buffer[bufferOffset]); |
| color.g = as_type<char>(buffer[bufferOffset + 1]); |
| color.b = as_type<char>(buffer[bufferOffset + 2]); |
| color.a = as_type<char>(buffer[bufferOffset + 3]); |
| return color; |
| } |
| static inline void writeR8G8B8A8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) |
| { |
| buffer[bufferOffset] = static_cast<uchar>(color.r); |
| buffer[bufferOffset + 1] = static_cast<uchar>(color.g); |
| buffer[bufferOffset + 2] = static_cast<uchar>(color.b); |
| buffer[bufferOffset + 3] = static_cast<uchar>(color.a); |
| } |
| |
| |
| static inline uint4 readR8G8B8A8_UINT(uint bufferOffset, constant uchar *buffer) |
| { |
| uint4 color; |
| color.r = as_type<uchar>(buffer[bufferOffset]); |
| color.g = as_type<uchar>(buffer[bufferOffset + 1]); |
| color.b = as_type<uchar>(buffer[bufferOffset + 2]); |
| color.a = as_type<uchar>(buffer[bufferOffset + 3]); |
| return color; |
| } |
| static inline void writeR8G8B8A8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) |
| { |
| buffer[bufferOffset] = static_cast<uchar>(color.r); |
| buffer[bufferOffset + 1] = static_cast<uchar>(color.g); |
| buffer[bufferOffset + 2] = static_cast<uchar>(color.b); |
| buffer[bufferOffset + 3] = static_cast<uchar>(color.a); |
| } |
| |
| |
| static inline float4 readR16_FLOAT(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); |
| color.g = color.b = 0.0; |
| color.a = 1.0; |
| return color; |
| } |
| static inline void writeR16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); |
| } |
| |
| template <typename ShortType> |
| static inline float4 readR16_NORM(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); |
| color.g = color.b = 0.0; |
| color.a = 1.0; |
| return color; |
| } |
| |
| |
| |
| |
| static inline int4 readR16_SINT(uint bufferOffset, constant uchar *buffer) |
| { |
| int4 color; |
| color.r = bytesToShort<short>(buffer, bufferOffset); |
| color.g = color.b = 0; |
| color.a = 1; |
| return color; |
| } |
| static inline void writeR16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) |
| { |
| shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); |
| } |
| |
| |
| static inline uint4 readR16_UINT(uint bufferOffset, constant uchar *buffer) |
| { |
| uint4 color; |
| color.r = bytesToShort<ushort>(buffer, bufferOffset); |
| color.g = color.b = 0; |
| color.a = 1; |
| return color; |
| } |
| static inline void writeR16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) |
| { |
| shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); |
| } |
| |
| |
| static inline float4 readA16_FLOAT(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); |
| color.rgb = 0.0; |
| return color; |
| } |
| static inline void writeA16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset, buffer); |
| } |
| |
| |
| static inline float4 readL16_FLOAT(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); |
| color.a = 1.0; |
| return color; |
| } |
| static inline void writeL16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); |
| } |
| |
| |
| static inline float4 readL16A16_FLOAT(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); |
| color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); |
| return color; |
| } |
| static inline void writeL16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); |
| shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 2, buffer); |
| } |
| |
| |
| static inline float4 readR16G16_FLOAT(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); |
| color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); |
| color.b = 0.0; |
| color.a = 1.0; |
| return color; |
| } |
| static inline void writeR16G16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); |
| shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer); |
| } |
| |
| |
| template <typename ShortType> |
| static inline float4 readR16G16_NORM(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); |
| color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); |
| color.b = 0.0; |
| color.a = 1.0; |
| return color; |
| } |
| |
| |
| |
| |
| static inline int4 readR16G16_SINT(uint bufferOffset, constant uchar *buffer) |
| { |
| int4 color; |
| color.r = bytesToShort<short>(buffer, bufferOffset); |
| color.g = bytesToShort<short>(buffer, bufferOffset + 2); |
| color.b = 0; |
| color.a = 1; |
| return color; |
| } |
| static inline void writeR16G16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) |
| { |
| shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); |
| shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer); |
| } |
| |
| |
| static inline uint4 readR16G16_UINT(uint bufferOffset, constant uchar *buffer) |
| { |
| uint4 color; |
| color.r = bytesToShort<ushort>(buffer, bufferOffset); |
| color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); |
| color.b = 0; |
| color.a = 1; |
| return color; |
| } |
| static inline void writeR16G16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) |
| { |
| shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); |
| shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer); |
| } |
| |
| |
| static inline float4 readR16G16B16_FLOAT(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); |
| color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); |
| color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4)); |
| color.a = 1.0; |
| return color; |
| } |
| |
| |
| template <typename ShortType> |
| static inline float4 readR16G16B16_NORM(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); |
| color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); |
| color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4)); |
| color.a = 1.0; |
| return color; |
| } |
| |
| |
| |
| static inline int4 readR16G16B16_SINT(uint bufferOffset, constant uchar *buffer) |
| { |
| int4 color; |
| color.r = bytesToShort<short>(buffer, bufferOffset); |
| color.g = bytesToShort<short>(buffer, bufferOffset + 2); |
| color.b = bytesToShort<short>(buffer, bufferOffset + 4); |
| color.a = 1; |
| return color; |
| } |
| |
| |
| static inline uint4 readR16G16B16_UINT(uint bufferOffset, constant uchar *buffer) |
| { |
| uint4 color; |
| color.r = bytesToShort<ushort>(buffer, bufferOffset); |
| color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); |
| color.b = bytesToShort<ushort>(buffer, bufferOffset + 4); |
| color.a = 1; |
| return color; |
| } |
| |
| |
| static inline float4 readR16G16B16A16_FLOAT(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset)); |
| color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2)); |
| color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4)); |
| color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 6)); |
| return color; |
| } |
| static inline void writeR16G16B16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer); |
| shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer); |
| shortToBytes(as_type<ushort>(static_cast<half>(color.b)), bufferOffset + 4, buffer); |
| shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 6, buffer); |
| } |
| |
| |
| template <typename ShortType> |
| static inline float4 readR16G16B16A16_NORM(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset)); |
| color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2)); |
| color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4)); |
| color.a = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 6)); |
| return color; |
| } |
| |
| |
| |
| |
| static inline int4 readR16G16B16A16_SINT(uint bufferOffset, constant uchar *buffer) |
| { |
| int4 color; |
| color.r = bytesToShort<short>(buffer, bufferOffset); |
| color.g = bytesToShort<short>(buffer, bufferOffset + 2); |
| color.b = bytesToShort<short>(buffer, bufferOffset + 4); |
| color.a = bytesToShort<short>(buffer, bufferOffset + 6); |
| return color; |
| } |
| static inline void writeR16G16B16A16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) |
| { |
| shortToBytes(static_cast<short>(color.r), bufferOffset, buffer); |
| shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer); |
| shortToBytes(static_cast<short>(color.b), bufferOffset + 4, buffer); |
| shortToBytes(static_cast<short>(color.a), bufferOffset + 6, buffer); |
| } |
| |
| |
| static inline uint4 readR16G16B16A16_UINT(uint bufferOffset, constant uchar *buffer) |
| { |
| uint4 color; |
| color.r = bytesToShort<ushort>(buffer, bufferOffset); |
| color.g = bytesToShort<ushort>(buffer, bufferOffset + 2); |
| color.b = bytesToShort<ushort>(buffer, bufferOffset + 4); |
| color.a = bytesToShort<ushort>(buffer, bufferOffset + 6); |
| return color; |
| } |
| static inline void writeR16G16B16A16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) |
| { |
| shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer); |
| shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer); |
| shortToBytes(static_cast<ushort>(color.b), bufferOffset + 4, buffer); |
| shortToBytes(static_cast<ushort>(color.a), bufferOffset + 6, buffer); |
| } |
| |
| |
| static inline float4 readR32_FLOAT(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); |
| color.g = color.b = 0.0; |
| color.a = 1.0; |
| return color; |
| } |
| static inline void writeR32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| intToBytes(as_type<uint>(color.r), bufferOffset, buffer); |
| } |
| |
| |
| template <typename IntType> |
| static inline float4 readR32_NORM(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); |
| color.g = color.b = 0.0; |
| color.a = 1.0; |
| return color; |
| } |
| |
| |
| |
| |
| static inline float4 readA32_FLOAT(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); |
| color.rgb = 0.0; |
| return color; |
| } |
| static inline void writeA32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| intToBytes(as_type<uint>(color.a), bufferOffset, buffer); |
| } |
| |
| |
| static inline float4 readL32_FLOAT(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); |
| color.a = 1.0; |
| return color; |
| } |
| static inline void writeL32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| intToBytes(as_type<uint>(color.r), bufferOffset, buffer); |
| } |
| |
| |
| static inline int4 readR32_SINT(uint bufferOffset, constant uchar *buffer) |
| { |
| int4 color; |
| color.r = bytesToInt<int>(buffer, bufferOffset); |
| color.g = color.b = 0; |
| color.a = 1; |
| return color; |
| } |
| static inline void writeR32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) |
| { |
| intToBytes(color.r, bufferOffset, buffer); |
| } |
| |
| |
| static inline float4 readR32_FIXED(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| constexpr float kDivisor = 1.0f / (1 << 16); |
| color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; |
| color.g = color.b = 0.0; |
| color.a = 1.0; |
| return color; |
| } |
| |
| |
| static inline uint4 readR32_UINT(uint bufferOffset, constant uchar *buffer) |
| { |
| uint4 color; |
| color.r = bytesToInt<uint>(buffer, bufferOffset); |
| color.g = color.b = 0; |
| color.a = 1; |
| return color; |
| } |
| static inline void writeR32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) |
| { |
| intToBytes(color.r, bufferOffset, buffer); |
| } |
| |
| |
| static inline float4 readL32A32_FLOAT(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); |
| color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); |
| return color; |
| } |
| static inline void writeL32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| intToBytes(as_type<uint>(color.r), bufferOffset, buffer); |
| intToBytes(as_type<uint>(color.a), bufferOffset + 4, buffer); |
| } |
| |
| |
| static inline float4 readR32G32_FLOAT(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); |
| color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); |
| color.b = 0.0; |
| color.a = 1.0; |
| return color; |
| } |
| static inline void writeR32G32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| intToBytes(as_type<uint>(color.r), bufferOffset, buffer); |
| intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer); |
| } |
| |
| |
| template <typename IntType> |
| static inline float4 readR32G32_NORM(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); |
| color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); |
| color.b = 0.0; |
| color.a = 1.0; |
| return color; |
| } |
| |
| |
| |
| |
| static inline int4 readR32G32_SINT(uint bufferOffset, constant uchar *buffer) |
| { |
| int4 color; |
| color.r = bytesToInt<int>(buffer, bufferOffset); |
| color.g = bytesToInt<int>(buffer, bufferOffset + 4); |
| color.b = 0; |
| color.a = 1; |
| return color; |
| } |
| static inline void writeR32G32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) |
| { |
| intToBytes(color.r, bufferOffset, buffer); |
| intToBytes(color.g, bufferOffset + 4, buffer); |
| } |
| |
| |
| static inline float4 readR32G32_FIXED(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| constexpr float kDivisor = 1.0f / (1 << 16); |
| color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; |
| color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; |
| color.b = 0.0; |
| color.a = 1.0; |
| return color; |
| } |
| |
| |
| static inline uint4 readR32G32_UINT(uint bufferOffset, constant uchar *buffer) |
| { |
| uint4 color; |
| color.r = bytesToInt<uint>(buffer, bufferOffset); |
| color.g = bytesToInt<uint>(buffer, bufferOffset + 4); |
| color.b = 0; |
| color.a = 1; |
| return color; |
| } |
| static inline void writeR32G32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) |
| { |
| intToBytes(color.r, bufferOffset, buffer); |
| intToBytes(color.g, bufferOffset + 4, buffer); |
| } |
| |
| |
| static inline float4 readR32G32B32_FLOAT(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); |
| color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); |
| color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8)); |
| color.a = 1.0; |
| return color; |
| } |
| |
| |
| template <typename IntType> |
| static inline float4 readR32G32B32_NORM(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); |
| color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); |
| color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8)); |
| color.a = 1.0; |
| return color; |
| } |
| |
| |
| |
| |
| static inline int4 readR32G32B32_SINT(uint bufferOffset, constant uchar *buffer) |
| { |
| int4 color; |
| color.r = bytesToInt<int>(buffer, bufferOffset); |
| color.g = bytesToInt<int>(buffer, bufferOffset + 4); |
| color.b = bytesToInt<int>(buffer, bufferOffset + 8); |
| color.a = 1; |
| return color; |
| } |
| |
| |
| static inline float4 readR32G32B32_FIXED(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| constexpr float kDivisor = 1.0f / (1 << 16); |
| color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; |
| color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; |
| color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor; |
| color.a = 1.0; |
| return color; |
| } |
| |
| |
| static inline uint4 readR32G32B32_UINT(uint bufferOffset, constant uchar *buffer) |
| { |
| uint4 color; |
| color.r = bytesToInt<uint>(buffer, bufferOffset); |
| color.g = bytesToInt<uint>(buffer, bufferOffset + 4); |
| color.b = bytesToInt<uint>(buffer, bufferOffset + 8); |
| color.a = 1; |
| return color; |
| } |
| |
| |
| static inline float4 readR32G32B32A32_FLOAT(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset)); |
| color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4)); |
| color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8)); |
| color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 12)); |
| return color; |
| } |
| static inline void writeR32G32B32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer) |
| { |
| intToBytes(as_type<uint>(color.r), bufferOffset, buffer); |
| intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer); |
| intToBytes(as_type<uint>(color.b), bufferOffset + 8, buffer); |
| intToBytes(as_type<uint>(color.a), bufferOffset + 12, buffer); |
| } |
| |
| |
| template <typename IntType> |
| static inline float4 readR32G32B32A32_NORM(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset)); |
| color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4)); |
| color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8)); |
| color.a = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 12)); |
| return color; |
| } |
| |
| |
| |
| |
| static inline int4 readR32G32B32A32_SINT(uint bufferOffset, constant uchar *buffer) |
| { |
| int4 color; |
| color.r = bytesToInt<int>(buffer, bufferOffset); |
| color.g = bytesToInt<int>(buffer, bufferOffset + 4); |
| color.b = bytesToInt<int>(buffer, bufferOffset + 8); |
| color.a = bytesToInt<int>(buffer, bufferOffset + 12); |
| return color; |
| } |
| static inline void writeR32G32B32A32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer) |
| { |
| intToBytes(color.r, bufferOffset, buffer); |
| intToBytes(color.g, bufferOffset + 4, buffer); |
| intToBytes(color.b, bufferOffset + 8, buffer); |
| intToBytes(color.a, bufferOffset + 12, buffer); |
| } |
| |
| static inline float4 readR32G32B32A32_FIXED(uint bufferOffset, constant uchar *buffer) |
| { |
| float4 color; |
| constexpr float kDivisor = 1.0f / (1 << 16); |
| color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor; |
| color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor; |
| color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor; |
| color.a = bytesToInt<int>(buffer, bufferOffset + 12) * kDivisor; |
| return color; |
| } |
| |
| |
| static inline uint4 readR32G32B32A32_UINT(uint bufferOffset, constant uchar *buffer) |
| { |
| uint4 color; |
| color.r = bytesToInt<uint>(buffer, bufferOffset); |
| color.g = bytesToInt<uint>(buffer, bufferOffset + 4); |
| color.b = bytesToInt<uint>(buffer, bufferOffset + 8); |
| color.a = bytesToInt<uint>(buffer, bufferOffset + 12); |
| return color; |
| } |
| static inline void writeR32G32B32A32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer) |
| { |
| intToBytes(color.r, bufferOffset, buffer); |
| intToBytes(color.g, bufferOffset + 4, buffer); |
| intToBytes(color.b, bufferOffset + 8, buffer); |
| intToBytes(color.a, bufferOffset + 12, buffer); |
| } |
| # 1292 "./copy_buffer.metal" |
| static inline int4 readR8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8_SINT(bufferOffset, buffer); } static inline uint4 readR8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8_UINT(bufferOffset, buffer); } static inline int4 readR8G8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8A8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8A8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_UINT(bufferOffset, buffer); } |
| static inline int4 readR16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16_SINT(bufferOffset, buffer); } static inline uint4 readR16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16_UINT(bufferOffset, buffer); } static inline int4 readR16G16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16A16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16A16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_UINT(bufferOffset, buffer); } |
| static inline int4 readR32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32_SINT(bufferOffset, buffer); } static inline uint4 readR32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32_UINT(bufferOffset, buffer); } static inline int4 readR32G32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32A32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32A32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_UINT(bufferOffset, buffer); } |
| |
| static inline int4 readR10G10B10A2_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_SINT(bufferOffset, buffer); } static inline uint4 readR10G10B10A2_USCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_UINT(bufferOffset, buffer); } |
| |
| |
| kernel void readFromBufferToFloatTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<float, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) |
| { |
| if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } |
| # 1336 "./copy_buffer.metal" |
| uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); |
| |
| switch (kCopyFormatType) |
| { |
| case FormatID::R5G6B5_UNORM: { auto color = readR5G6B5_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM: { auto color = readR8G8B8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = readR8G8B8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SNORM: { auto color = readR8G8B8A8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM: { auto color = readB8G8R8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = readB8G8R8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM: { auto color = readR8G8B8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = readR8G8B8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SNORM: { auto color = readR8G8B8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8_UNORM: { auto color = readL8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8A8_UNORM: { auto color = readL8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R5G5B5A1_UNORM: { auto color = readR5G5B5A1_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R4G4B4A4_UNORM: { auto color = readR4G4B4A4_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_UNORM: { auto color = readR8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_SNORM: { auto color = readR8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UNORM: { auto color = readR8G8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SNORM: { auto color = readR8G8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_FLOAT: { auto color = readR16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A16_FLOAT: { auto color = readA16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16_FLOAT: { auto color = readL16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16A16_FLOAT: { auto color = readL16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_FLOAT: { auto color = readR16G16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_FLOAT: { auto color = readR16G16B16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = readR16G16B16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_FLOAT: { auto color = readR32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A32_FLOAT: { auto color = readA32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32_FLOAT: { auto color = readL32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32A32_FLOAT: { auto color = readL32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_FLOAT: { auto color = readR32G32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_FLOAT: { auto color = readR32G32B32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = readR32G32B32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; |
| } |
| |
| |
| } |
| |
| kernel void readFromBufferToIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<int, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) |
| { |
| if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } |
| # 1364 "./copy_buffer.metal" |
| uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); |
| |
| switch (kCopyFormatType) |
| { |
| case FormatID::R8_SINT: { auto color = readR8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SINT: { auto color = readR8G8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SINT: { auto color = readR8G8B8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SINT: { auto color = readR8G8B8A8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SINT: { auto color = readR16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SINT: { auto color = readR16G16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SINT: { auto color = readR16G16B16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SINT: { auto color = readR16G16B16A16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_SINT: { auto color = readR32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_SINT: { auto color = readR32G32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_SINT: { auto color = readR32G32B32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_SINT: { auto color = readR32G32B32A32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; |
| } |
| |
| |
| } |
| |
| kernel void readFromBufferToUIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<uint, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]]) |
| { |
| if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; } |
| # 1392 "./copy_buffer.metal" |
| uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); |
| |
| switch (kCopyFormatType) |
| { |
| case FormatID::R8_UINT: { auto color = readR8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UINT: { auto color = readR8G8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UINT: { auto color = readR8G8B8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UINT: { auto color = readR8G8B8A8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UINT: { auto color = readR16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UINT: { auto color = readR16G16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UINT: { auto color = readR16G16B16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UINT: { auto color = readR16G16B16A16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_UINT: { auto color = readR32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_UINT: { auto color = readR32G32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_UINT: { auto color = readR32G32B32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_UINT: { auto color = readR32G32B32A32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; |
| } |
| |
| |
| } |
| |
| |
| kernel void writeFromFloatTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<float, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<float, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) |
| { |
| if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } |
| # 1439 "./copy_buffer.metal" |
| uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); |
| |
| switch (kCopyFormatType) |
| { |
| case FormatID::R5G6B5_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G6B5_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R5G5B5A1_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G5B5A1_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R4G4B4A4_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR4G4B4A4_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; |
| } |
| |
| |
| } |
| |
| kernel void writeFromIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<int, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<int, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) |
| { |
| if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } |
| # 1464 "./copy_buffer.metal" |
| uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); |
| |
| switch (kCopyFormatType) |
| { |
| case FormatID::R8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_SINT(gIndices, options, bufferOffset, color, buffer); } break; |
| } |
| |
| |
| } |
| |
| kernel void writeFromUIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<uint, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<uint, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]]) |
| { |
| if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; } |
| # 1489 "./copy_buffer.metal" |
| uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize); |
| |
| switch (kCopyFormatType) |
| { |
| case FormatID::R8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_UINT(gIndices, options, bufferOffset, color, buffer); } break; |
| } |
| |
| |
| } |
| |
| |
| struct CopyVertexParams |
| { |
| uint srcBufferStartOffset; |
| uint srcStride; |
| uint srcComponentBytes; |
| uint srcComponents; |
| |
| |
| |
| uchar4 srcDefaultAlphaData; |
| |
| uint dstBufferStartOffset; |
| uint dstStride; |
| uint dstComponents; |
| |
| uint vertexCount; |
| }; |
| # 1539 "./copy_buffer.metal" |
| template <typename IntType> |
| static inline void writeFloatVertex(constant CopyVertexParams &options, |
| uint idx, |
| vec<IntType, 4> data, |
| device uchar *dst) |
| { |
| uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset; |
| |
| for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4) |
| { |
| floatToBytes(static_cast<float>(data[component]), dstOffset, dst); |
| } |
| } |
| |
| template <> |
| inline void writeFloatVertex(constant CopyVertexParams &options, |
| uint idx, |
| vec<float, 4> data, |
| device uchar *dst) |
| { |
| uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset; |
| |
| for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4) |
| { |
| floatToBytes(data[component], dstOffset, dst); |
| } |
| } |
| |
| |
| static inline void convertToFloatVertexFormat(uint index, |
| constant CopyVertexParams &options, |
| constant uchar *srcBuffer, |
| device uchar *dstBuffer) |
| { |
| # 1585 "./copy_buffer.metal" |
| uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index; |
| # 1594 "./copy_buffer.metal" |
| switch (kCopyFormatType) |
| { |
| case FormatID::R8_UNORM: { auto data = readR8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SNORM: { auto data = readR8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_UINT: { auto data = readR8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SINT: { auto data = readR8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_USCALED: { auto data = readR8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SSCALED: { auto data = readR8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UNORM: { auto data = readR8G8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SNORM: { auto data = readR8G8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UINT: { auto data = readR8G8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SINT: { auto data = readR8G8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_USCALED: { auto data = readR8G8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SSCALED: { auto data = readR8G8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UNORM: { auto data = readR8G8B8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SNORM: { auto data = readR8G8B8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UINT: { auto data = readR8G8B8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SINT: { auto data = readR8G8B8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_USCALED: { auto data = readR8G8B8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SSCALED: { auto data = readR8G8B8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UNORM: { auto data = readR8G8B8A8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SNORM: { auto data = readR8G8B8A8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UINT: { auto data = readR8G8B8A8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SINT: { auto data = readR8G8B8A8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_USCALED: { auto data = readR8G8B8A8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SSCALED: { auto data = readR8G8B8A8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UNORM: { auto data = readR16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SNORM: { auto data = readR16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UINT: { auto data = readR16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SINT: { auto data = readR16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_USCALED: { auto data = readR16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SSCALED: { auto data = readR16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UNORM: { auto data = readR16G16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SNORM: { auto data = readR16G16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UINT: { auto data = readR16G16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SINT: { auto data = readR16G16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_USCALED: { auto data = readR16G16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SSCALED: { auto data = readR16G16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UNORM: { auto data = readR16G16B16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SNORM: { auto data = readR16G16B16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UINT: { auto data = readR16G16B16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SINT: { auto data = readR16G16B16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_USCALED: { auto data = readR16G16B16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SSCALED: { auto data = readR16G16B16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UNORM: { auto data = readR16G16B16A16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SNORM: { auto data = readR16G16B16A16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UINT: { auto data = readR16G16B16A16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SINT: { auto data = readR16G16B16A16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_USCALED: { auto data = readR16G16B16A16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SSCALED: { auto data = readR16G16B16A16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UNORM: { auto data = readR32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SNORM: { auto data = readR32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UINT: { auto data = readR32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SINT: { auto data = readR32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_USCALED: { auto data = readR32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SSCALED: { auto data = readR32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UNORM: { auto data = readR32G32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SNORM: { auto data = readR32G32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UINT: { auto data = readR32G32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SINT: { auto data = readR32G32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_USCALED: { auto data = readR32G32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SSCALED: { auto data = readR32G32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UNORM: { auto data = readR32G32B32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SNORM: { auto data = readR32G32B32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UINT: { auto data = readR32G32B32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SINT: { auto data = readR32G32B32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_USCALED: { auto data = readR32G32B32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SSCALED: { auto data = readR32G32B32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UNORM: { auto data = readR32G32B32A32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SNORM: { auto data = readR32G32B32A32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UINT: { auto data = readR32G32B32A32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SINT: { auto data = readR32G32B32A32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_USCALED: { auto data = readR32G32B32A32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SSCALED: { auto data = readR32G32B32A32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_FLOAT: { auto data = readR16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_FLOAT: { auto data = readR16G16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_FLOAT: { auto data = readR16G16B16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto data = readR16G16B16A16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FLOAT: { auto data = readR32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FLOAT: { auto data = readR32G32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FLOAT: { auto data = readR32G32B32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto data = readR32G32B32A32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FIXED: { auto data = readR32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FIXED: { auto data = readR32G32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FIXED: { auto data = readR32G32B32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FIXED: { auto data = readR32G32B32A32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SINT: { auto data = readR10G10B10A2_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_UINT: { auto data = readR10G10B10A2_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SSCALED: { auto data = readR10G10B10A2_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_USCALED: { auto data = readR10G10B10A2_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; |
| } |
| |
| |
| } |
| |
| |
| kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]], |
| constant CopyVertexParams &options [[buffer(0)]], |
| constant uchar *srcBuffer [[buffer(1)]], |
| device uchar *dstBuffer [[buffer(2)]]) |
| { |
| if (index >= options.vertexCount) { return; }; |
| convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer); |
| } |
| |
| |
| vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]], |
| constant CopyVertexParams &options [[buffer(0)]], |
| constant uchar *srcBuffer [[buffer(1)]], |
| device uchar *dstBuffer [[buffer(2)]]) |
| { |
| convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer); |
| } |
| |
| |
| static inline void expandVertexFormatComponents(uint index, |
| constant CopyVertexParams &options, |
| constant uchar *srcBuffer, |
| device uchar *dstBuffer) |
| { |
| uint srcOffset = options.srcBufferStartOffset + options.srcStride * index; |
| uint dstOffset = options.dstBufferStartOffset + options.dstStride * index; |
| |
| uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u); |
| uint component; |
| for (component = 0; component < options.srcComponents; ++component, |
| srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes) |
| { |
| for (uint byte = 0; byte < options.srcComponentBytes; ++byte) |
| { |
| dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte]; |
| } |
| } |
| |
| for (; component < dstComponentsBeforeAlpha; |
| ++component, dstOffset += options.srcComponentBytes) |
| { |
| for (uint byte = 0; byte < options.srcComponentBytes; ++byte) |
| { |
| dstBuffer[dstOffset + byte] = 0; |
| } |
| } |
| |
| if (component < options.dstComponents) |
| { |
| |
| for (uint byte = 0; byte < options.srcComponentBytes; ++byte) |
| { |
| dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte]; |
| } |
| } |
| } |
| |
| |
| kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]], |
| constant CopyVertexParams &options [[buffer(0)]], |
| constant uchar *srcBuffer [[buffer(1)]], |
| device uchar *dstBuffer [[buffer(2)]]) |
| { |
| if (index >= options.vertexCount) { return; }; |
| |
| expandVertexFormatComponents(index, options, srcBuffer, dstBuffer); |
| } |
| |
| |
| vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]], |
| constant CopyVertexParams &options [[buffer(0)]], |
| constant uchar *srcBuffer [[buffer(1)]], |
| device uchar *dstBuffer [[buffer(2)]]) |
| { |
| expandVertexFormatComponents(index, options, srcBuffer, dstBuffer); |
| } |
| # 6 "temp_master_source.metal" 2 |
| # 1 "./visibility.metal" 1 |
| |
| |
| |
| |
| |
| |
| |
| |
| constant bool kCombineWithExistingResult [[function_constant(1000)]]; |
| |
| |
| |
| struct CombineVisibilityResultOptions |
| { |
| |
| uint startOffset; |
| |
| uint numOffsets; |
| }; |
| |
| kernel void combineVisibilityResult(uint idx [[thread_position_in_grid]], |
| constant CombineVisibilityResultOptions &options [[buffer(0)]], |
| constant ushort4 *renderpassVisibilityResult [[buffer(1)]], |
| device ushort4 *finalResults [[buffer(2)]]) |
| { |
| if (idx > 0) |
| { |
| |
| |
| |
| return; |
| } |
| ushort4 finalResult16x4; |
| |
| if (kCombineWithExistingResult) |
| { |
| finalResult16x4 = finalResults[0]; |
| } |
| else |
| { |
| finalResult16x4 = ushort4(0, 0, 0, 0); |
| } |
| |
| for (uint i = 0; i < options.numOffsets; ++i) |
| { |
| uint offset = options.startOffset + i; |
| ushort4 renderpassResult = renderpassVisibilityResult[offset]; |
| |
| |
| finalResult16x4 = finalResult16x4 | renderpassResult; |
| } |
| finalResults[0] = finalResult16x4; |
| } |
| # 7 "temp_master_source.metal" 2 |
| # 1 "./rewrite_indices.metal" 1 |
| # 11 "./rewrite_indices.metal" |
| # 1 "./rewrite_indices_shared.h" 1 |
| # 12 "./rewrite_indices.metal" 2 |
| using namespace metal; |
| |
| constant uint fixIndexBufferKey [[ function_constant(2000) ]]; |
| constant bool indexBufferIsUint16 = (((fixIndexBufferKey >> 0U) & 0x03U) == 2U); |
| constant bool indexBufferIsUint32 = (((fixIndexBufferKey >> 0U) & 0x03U) == 3U); |
| constant bool outIndexBufferIsUint16 = (((fixIndexBufferKey >> 2U) & 0x03U) == 2U); |
| constant bool outIndexBufferIsUint32 = (((fixIndexBufferKey >> 2U) & 0x03U) == 3U); |
| constant bool doPrimRestart = (fixIndexBufferKey & 0x00100U); |
| constant uint fixIndexBufferMode = (fixIndexBufferKey >> 4U) & 0x0FU; |
| |
| |
| static inline uint readIdx( |
| const device ushort *indexBufferUint16, |
| const device uint *indexBufferUint32, |
| const uint restartIndex, |
| const uint indexCount, |
| uint idx, |
| thread bool &foundRestart, |
| thread uint &indexThatRestartedFirst |
| ) |
| { |
| uint inIndex = idx; |
| if(inIndex < indexCount) |
| { |
| if(indexBufferIsUint16) |
| { |
| inIndex = indexBufferUint16[inIndex]; |
| } |
| else if(indexBufferIsUint32) |
| { |
| inIndex = indexBufferUint32[inIndex]; |
| } |
| } |
| else |
| { |
| foundRestart = true; |
| indexThatRestartedFirst = idx; |
| } |
| if(doPrimRestart && !foundRestart && inIndex == restartIndex) |
| { |
| foundRestart = true; |
| indexThatRestartedFirst = idx; |
| } |
| return inIndex; |
| } |
| |
| static inline void outputPrimitive( |
| const device ushort *indexBufferUint16, |
| const device uint *indexBufferUint32, |
| device ushort *outIndexBufferUint16, |
| device uint *outIndexBufferUint32, |
| const uint restartIndex, |
| const uint indexCount, |
| thread uint &baseIndex, |
| uint onIndex, |
| thread uint &onOutIndex |
| ) |
| { |
| if(baseIndex > onIndex) return; |
| bool foundRestart = false; |
| uint indexThatRestartedFirst = 0; |
| # 86 "./rewrite_indices.metal" |
| switch(fixIndexBufferMode) |
| { |
| case 0x00U: |
| { |
| auto tmpIndex = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex, foundRestart, indexThatRestartedFirst); |
| if(foundRestart) |
| { |
| baseIndex = indexThatRestartedFirst + 1; |
| return; |
| } |
| |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex; } onOutIndex++; }); |
| } |
| break; |
| case 0x01U: |
| { |
| auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); |
| auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); |
| if(foundRestart) |
| { |
| baseIndex = indexThatRestartedFirst + 1; |
| return; |
| } |
| if((onIndex - baseIndex) & 1) return; |
| |
| if(fixIndexBufferKey & 0x00200U) |
| { |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); |
| } |
| else |
| { |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); |
| } |
| } |
| break; |
| case 0x03U: |
| { |
| auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); |
| auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); |
| if(foundRestart) |
| { |
| baseIndex = indexThatRestartedFirst + 1; |
| return; |
| } |
| |
| if(fixIndexBufferKey & 0x00200U) |
| { |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); |
| } |
| else |
| { |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); |
| } |
| } |
| break; |
| case 0x04U: |
| { |
| auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst); |
| auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst); |
| auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst); |
| if(foundRestart) |
| { |
| baseIndex = indexThatRestartedFirst + 1; |
| return; |
| } |
| if(((onIndex - baseIndex) % 3) != 0) return; |
| |
| if(fixIndexBufferKey & 0x00200U) |
| { |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); |
| } |
| else |
| { |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); |
| } |
| } |
| break; |
| case 0x05U: |
| { |
| uint isOdd = ((onIndex - baseIndex) & 1); |
| auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0 + isOdd, foundRestart, indexThatRestartedFirst); |
| auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1 - isOdd, foundRestart, indexThatRestartedFirst); |
| auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst); |
| if(foundRestart) |
| { |
| baseIndex = indexThatRestartedFirst + 1; |
| return; |
| } |
| |
| if(fixIndexBufferKey & 0x00200U) |
| { |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); |
| } |
| else |
| { |
| |
| if(isOdd) |
| { |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); |
| } |
| else |
| { |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; }); |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; }); |
| ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; }); |
| } |
| } |
| |
| assert(onOutIndex <= (onIndex + 1) * 3); |
| assert(onOutIndex <= (indexCount - 2) * 3); |
| } |
| break; |
| |
| } |
| |
| |
| } |
| |
| kernel void fixIndexBuffer( |
| const device ushort *indexBufferUint16 [[ buffer(0), function_constant(indexBufferIsUint16) ]], |
| const device uint *indexBufferUint32 [[ buffer(0), function_constant(indexBufferIsUint32) ]], |
| device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]], |
| device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]], |
| constant uint &indexCount [[ buffer(2) ]], |
| constant uint &primCount [[ buffer(3) ]], |
| uint prim [[thread_position_in_grid]]) |
| { |
| constexpr uint restartIndex = 0xFFFFFFFF; |
| uint baseIndex = 0; |
| uint onIndex = onIndex; |
| uint onOutIndex = onOutIndex; |
| if(prim < primCount) |
| { |
| switch(fixIndexBufferMode) |
| { |
| case 0x00U: |
| onIndex = prim; |
| onOutIndex = prim; |
| break; |
| case 0x01U: |
| onIndex = prim * 2 + 0; |
| onOutIndex = prim * 2 + 0; |
| break; |
| case 0x03U: |
| onIndex = prim; |
| onOutIndex = prim * 2 + 0; |
| break; |
| case 0x04U: |
| onIndex = prim * 3 + 0; |
| onOutIndex = prim * 3 + 0; |
| break; |
| case 0x05U: |
| onIndex = prim; |
| onOutIndex = prim * 3 + 0; |
| break; |
| } |
| outputPrimitive(indexBufferUint16, indexBufferUint32, outIndexBufferUint16, outIndexBufferUint32, restartIndex, indexCount, baseIndex, onIndex, onOutIndex); |
| } |
| } |
| # 8 "temp_master_source.metal" 2 |
| |