Source/ThirdParty/ANGLE/src/libANGLE/renderer/metal/shaders/mtl_default_shaders_src_autogen.metal - WebKit - Git at Google

 // GENERATED FILE - DO NOT EDIT.
 // Generated by gen_mtl_internal_shaders.py
 //
 // Copyright 2020 The ANGLE Project Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 //

 // Metal version of combined Metal default shaders.

 # 1 "temp_master_source.metal"
 # 1 "<built-in>" 1
 # 1 "<built-in>" 3
 # 396 "<built-in>" 3
 # 1 "<command line>" 1
 # 1 "<built-in>" 2
 # 1 "temp_master_source.metal" 2
 # 1 "./blit.metal" 1


 # 1 "./common.h" 1
 # 13 "./common.h"
 # include <simd/simd.h>
 # include <metal_stdlib>


 # 1 "./constants.h" 1
 # 11 "./constants.h"
 namespace rx
 {
 namespace mtl_shader
 {

 enum
 {
     kTextureType2D = 0,
     kTextureType2DMultisample = 1,
     kTextureType2DArray = 2,
     kTextureTypeCube = 3,
     kTextureType3D = 4,
     kTextureTypeCount = 5,
 };


 }
 }
 # 18 "./common.h" 2


 using namespace metal;


 constant uint32_t kNumColorOutputs [[function_constant(0)]];
 constant bool kColorOutputAvailable0 = kNumColorOutputs > 0;
 constant bool kColorOutputAvailable1 = kNumColorOutputs > 1;
 constant bool kColorOutputAvailable2 = kNumColorOutputs > 2;
 constant bool kColorOutputAvailable3 = kNumColorOutputs > 3;

 namespace rx
 {
 namespace mtl_shader
 {


 constant float2 gCorners[3] = {float2(-1.0f, -1.0f), float2(3.0f, -1.0f), float2(-1.0f, 3.0f)};

 template <typename T>
 struct MultipleColorOutputs
 {
     vec<T, 4> color0 [[color(0), function_constant(kColorOutputAvailable0)]];
     vec<T, 4> color1 [[color(1), function_constant(kColorOutputAvailable1)]];
     vec<T, 4> color2 [[color(2), function_constant(kColorOutputAvailable2)]];
     vec<T, 4> color3 [[color(3), function_constant(kColorOutputAvailable3)]];
 };
 # 61 "./common.h"
 template <typename T>
 static inline MultipleColorOutputs<T> toMultipleColorOutputs(vec<T, 4> color)
 {
     MultipleColorOutputs<T> re;

     do { if (kColorOutputAvailable0) { re.color0 = color; } } while (0);
     do { if (kColorOutputAvailable1) { re.color1 = color; } } while (0);
     do { if (kColorOutputAvailable2) { re.color2 = color; } } while (0);
     do { if (kColorOutputAvailable3) { re.color3 = color; } } while (0);

     return re;
 }

 static inline float3 cubeTexcoords(float2 texcoords, int face)
 {
     texcoords = 2.0 * texcoords - 1.0;
     switch (face)
     {
         case 0:
             return float3(1.0, -texcoords.y, -texcoords.x);
         case 1:
             return float3(-1.0, -texcoords.y, texcoords.x);
         case 2:
             return float3(texcoords.x, 1.0, texcoords.y);
         case 3:
             return float3(texcoords.x, -1.0, -texcoords.y);
         case 4:
             return float3(texcoords.x, -texcoords.y, 1.0);
         case 5:
             return float3(-texcoords.x, -texcoords.y, -1.0);
     }
     return float3(texcoords, 0);
 }

 template <typename T>
 static inline vec<T, 4> resolveTextureMS(texture2d_ms<T> srcTexture, uint2 coords)
 {
     uint samples = srcTexture.get_num_samples();

     vec<T, 4> output(0);

     for (uint sample = 0; sample < samples; ++sample)
     {
         output += srcTexture.read(coords, sample);
     }

     output = output / samples;

     return output;
 }

 static inline float4 sRGBtoLinear(float4 color)
 {
     float3 linear1 = color.rgb / 12.92;
     float3 linear2 = pow((color.rgb + float3(0.055)) / 1.055, 2.4);
     float3 factor = float3(color.rgb <= float3(0.04045));
     float4 linear = float4(factor * linear1 + float3(1.0 - factor) * linear2, color.a);

     return linear;
 }

 static inline float linearToSRGB(float color)
 {
     if (color <= 0.0f)
         return 0.0f;
     else if (color < 0.0031308f)
         return 12.92f * color;
     else if (color < 1.0f)
         return 1.055f * pow(color, 0.41666f) - 0.055f;
     else
         return 1.0f;
 }

 static inline float4 linearToSRGB(float4 color)
 {
     return float4(linearToSRGB(color.r), linearToSRGB(color.g), linearToSRGB(color.b), color.a);
 }

 template <typename Short>
 static inline Short bytesToShort(constant uchar *input, uint offset)
 {
     Short inputLo = input[offset];
     Short inputHi = input[offset + 1];

     return inputLo | (inputHi << 8);
 }

 template <typename Int>
 static inline Int bytesToInt(constant uchar *input, uint offset)
 {
     Int input0 = input[offset];
     Int input1 = input[offset + 1];
     Int input2 = input[offset + 2];
     Int input3 = input[offset + 3];

     return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24);
 }

 template <typename Short>
 static inline void shortToBytes(Short val, uint offset, device uchar *output)
 {
     ushort valUnsigned = as_type<ushort>(val);
     output[offset] = valUnsigned & 0xff;
     output[offset + 1] = (valUnsigned >> 8) & 0xff;
 }

 template <typename Int>
 static inline void intToBytes(Int val, uint offset, device uchar *output)
 {
     uint valUnsigned = as_type<uint>(val);
     output[offset] = valUnsigned & 0xff;
     output[offset + 1] = (valUnsigned >> 8) & 0xff;
     output[offset + 2] = (valUnsigned >> 16) & 0xff;
     output[offset + 3] = (valUnsigned >> 24) & 0xff;
 }

 static inline void floatToBytes(float val, uint offset, device uchar *output)
 {
     intToBytes(as_type<uint>(val), offset, output);
 }

 static inline void int24bitToBytes(uint val, uint offset, device uchar *output)
 {
     output[offset] = val & 0xff;
     output[offset + 1] = (val >> 8) & 0xff;
     output[offset + 2] = (val >> 16) & 0xff;
 }

 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
 static inline T getShiftedData(T input)
 {
     static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
                   "T must have at least as many bits as inputBitCount + inputBitStart.");
     const T mask = (1 << inputBitCount) - 1;
     return (input >> inputBitStart) & mask;
 }

 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
 static inline T shiftData(T input)
 {
     static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
                   "T must have at least as many bits as inputBitCount + inputBitStart.");
     const T mask = (1 << inputBitCount) - 1;
     return (input & mask) << inputBitStart;
 }

 template <unsigned int inputBitCount, typename T>
 static inline float normalizedToFloat(T input)
 {
     static_assert(inputBitCount <= (sizeof(T) * 8),
                   "T must have more bits than or same bits as inputBitCount.");
     static_assert(inputBitCount <= 23, "Only single precision is supported");

     constexpr float inverseMax = 1.0f / ((1 << inputBitCount) - 1);
     return input * inverseMax;
 }

 template <typename T>
 static inline float normalizedToFloat(T input)
 {
     return normalizedToFloat<sizeof(T) * 8, T>(input);
 }

 template <>
 inline float normalizedToFloat(short input)
 {
     constexpr float inverseMax = 1.0f / 0x7fff;
     return static_cast<float>(input) * inverseMax;
 }

 template <>
 inline float normalizedToFloat(int input)
 {
     constexpr float inverseMax = 1.0f / 0x7fffffff;
     return static_cast<float>(input) * inverseMax;
 }

 template <>
 inline float normalizedToFloat(uint input)
 {
     constexpr float inverseMax = 1.0f / 0xffffffff;
     return static_cast<float>(input) * inverseMax;
 }

 template <unsigned int outputBitCount, typename T>
 static inline T floatToNormalized(float input)
 {
     static_assert(outputBitCount <= (sizeof(T) * 8),
                   "T must have more bits than or same bits as inputBitCount.");
     static_assert(outputBitCount <= 23, "Only single precision is supported");

     return static_cast<T>(((1 << outputBitCount) - 1) * input + 0.5f);
 }

 template <typename T>
 static inline T floatToNormalized(float input)
 {
     return floatToNormalized<sizeof(T) * 8, T>(input);
 }

 }
 }
 # 9 "./blit.metal" 2

 using namespace rx::mtl_shader;


 constant bool kPremultiplyAlpha [[function_constant(1)]];
 constant bool kUnmultiplyAlpha [[function_constant(2)]];
 constant int kSourceTextureType [[function_constant(3)]];
 constant int kSourceTexture2Type [[function_constant(4)]];

 constant bool kSourceTextureType2D = kSourceTextureType == kTextureType2D;
 constant bool kSourceTextureType2DArray = kSourceTextureType == kTextureType2DArray;
 constant bool kSourceTextureType2DMS = kSourceTextureType == kTextureType2DMultisample;
 constant bool kSourceTextureTypeCube = kSourceTextureType == kTextureTypeCube;
 constant bool kSourceTextureType3D = kSourceTextureType == kTextureType3D;

 constant bool kSourceTexture2Type2D = kSourceTexture2Type == kTextureType2D;
 constant bool kSourceTexture2Type2DArray = kSourceTexture2Type == kTextureType2DArray;
 constant bool kSourceTexture2Type2DMS = kSourceTexture2Type == kTextureType2DMultisample;
 constant bool kSourceTexture2TypeCube = kSourceTexture2Type == kTextureTypeCube;

 struct BlitParams
 {

     float2 srcTexCoords[3];
     int srcLevel;
     int srcLayer;
     bool dstFlipViewportX;
     bool dstFlipViewportY;
     bool dstLuminance;
     uint8_t padding[13];
 };

 struct BlitVSOut
 {
     float4 position [[position]];
     float2 texCoords [[user(locn1)]];
 };

 vertex BlitVSOut blitVS(unsigned int vid [[vertex_id]], constant BlitParams &options [[buffer(0)]])
 {
     BlitVSOut output;
     output.position = float4(gCorners[vid], 0.0, 1.0);
     output.texCoords = options.srcTexCoords[vid];

     if (options.dstFlipViewportX)
     {
         output.position.x = -output.position.x;
     }
     if (!options.dstFlipViewportY)
     {


         output.position.y = -output.position.y;
     }

     return output;
 }

 template <typename SrcTexture2d>
 static uint2 getImageCoords(SrcTexture2d srcTexture, float2 texCoords)
 {
     uint2 dimens(srcTexture.get_width(), srcTexture.get_height());
     uint2 coords = uint2(texCoords * float2(dimens));

     return coords;
 }

 template <typename T>
 static inline vec<T, 4> blitSampleTextureMS(texture2d_ms<T> srcTexture, float2 texCoords)
 {
     uint2 coords = getImageCoords(srcTexture, texCoords);
     return resolveTextureMS(srcTexture, coords);
 }

 template <typename T>
 static inline vec<T, 4> blitSampleTexture3D(texture3d<T> srcTexture,
                                             sampler textureSampler,
                                             float2 texCoords,
                                             constant BlitParams &options)
 {
     uint depth = srcTexture.get_depth(options.srcLevel);
     float zCoord = (float(options.srcLayer) + 0.5) / float(depth);

     return srcTexture.sample(textureSampler, float3(texCoords, zCoord), level(options.srcLevel));
 }
 # 112 "./blit.metal"
 template <typename T>
 static inline vec<T, 4> blitReadTexture(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
 {
     vec<T, 4> output;

     switch (kSourceTextureType)
     {
         case kTextureType2D:
             output = srcTexture2d.sample(textureSampler, input.texCoords, level(options.srcLevel));
             break;
         case kTextureType2DArray:
             output = srcTexture2dArray.sample(textureSampler, input.texCoords, options.srcLayer,
                                               level(options.srcLevel));
             break;
         case kTextureType2DMultisample:
             output = blitSampleTextureMS(srcTexture2dMS, input.texCoords);
             break;
         case kTextureTypeCube:
             output = srcTextureCube.sample(textureSampler,
                                            cubeTexcoords(input.texCoords, options.srcLayer),
                                            level(options.srcLevel));
             break;
         case kTextureType3D:
             output = blitSampleTexture3D(srcTexture3d, textureSampler, input.texCoords, options);
             break;
     }

     if (kPremultiplyAlpha)
     {
         output.xyz *= output.a;
     }
     else if (kUnmultiplyAlpha)
     {
         if (output.a != 0.0)
         {
             output.xyz /= output.a;
         }
     }

     if (options.dstLuminance)
     {
         output.g = output.b = output.r;
     }

     return output;
 }

 template <typename T>
 static inline MultipleColorOutputs<T> blitFS(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
 {
     vec<T, 4> output = blitReadTexture(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);

     return toMultipleColorOutputs(output);
 }

 fragment MultipleColorOutputs<float> blitFloatFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
 {
     return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
 }
 fragment MultipleColorOutputs<int> blitIntFS(BlitVSOut input [[stage_in]], texture2d<int> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<int> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<int> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<int> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<int> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
 {
     return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
 }
 fragment MultipleColorOutputs<uint> blitUIntFS(BlitVSOut input [[stage_in]], texture2d<uint> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<uint> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<uint> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<uint> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<uint> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
 {
     return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
 }

 fragment MultipleColorOutputs<uint> copyTextureFloatToUIntFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
 {
     float4 inputColor = blitReadTexture<>(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
     uint4 output = uint4(inputColor * float4(255.0));

     return toMultipleColorOutputs(output);
 }


 struct FragmentDepthOut
 {
     float depth [[depth(any)]];
 };

 static inline float sampleDepth(
     texture2d<float> srcTexture2d [[function_constant(kSourceTextureType2D)]],
     texture2d_array<float> srcTexture2dArray [[function_constant(kSourceTextureType2DArray)]],
     texture2d_ms<float> srcTexture2dMS [[function_constant(kSourceTextureType2DMS)]],
     texturecube<float> srcTextureCube [[function_constant(kSourceTextureTypeCube)]],
     float2 texCoords,
     constant BlitParams &options)
 {
     float4 output;

     constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest);

     switch (kSourceTextureType)
     {
         case kTextureType2D:
             output = srcTexture2d.sample(textureSampler, texCoords, level(options.srcLevel));
             break;
         case kTextureType2DArray:
             output = srcTexture2dArray.sample(textureSampler, texCoords, options.srcLayer,
                                               level(options.srcLevel));
             break;
         case kTextureType2DMultisample:

             output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0);
             break;
         case kTextureTypeCube:
             output =
                 srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, options.srcLayer),
                                       level(options.srcLevel));
             break;
     }

     return output.r;
 }

 fragment FragmentDepthOut blitDepthFS(BlitVSOut input [[stage_in]],
                                       texture2d<float> srcTexture2d
                                       [[texture(0), function_constant(kSourceTextureType2D)]],
                                       texture2d_array<float> srcTexture2dArray
                                       [[texture(0), function_constant(kSourceTextureType2DArray)]],
                                       texture2d_ms<float> srcTexture2dMS
                                       [[texture(0), function_constant(kSourceTextureType2DMS)]],
                                       texturecube<float> srcTextureCube
                                       [[texture(0), function_constant(kSourceTextureTypeCube)]],
                                       constant BlitParams &options [[buffer(0)]])
 {
     FragmentDepthOut re;

     re.depth = sampleDepth(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
                            input.texCoords, options);

     return re;
 }

 static inline uint32_t sampleStencil(
     texture2d<uint32_t> srcTexture2d [[function_constant(kSourceTexture2Type2D)]],
     texture2d_array<uint32_t> srcTexture2dArray [[function_constant(kSourceTexture2Type2DArray)]],
     texture2d_ms<uint32_t> srcTexture2dMS [[function_constant(kSourceTexture2Type2DMS)]],
     texturecube<uint32_t> srcTextureCube [[function_constant(kSourceTexture2TypeCube)]],
     float2 texCoords,
     int srcLevel,
     int srcLayer)
 {
     uint4 output;
     constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest);

     switch (kSourceTexture2Type)
     {
         case kTextureType2D:
             output = srcTexture2d.sample(textureSampler, texCoords, level(srcLevel));
             break;
         case kTextureType2DArray:
             output = srcTexture2dArray.sample(textureSampler, texCoords, srcLayer, level(srcLevel));
             break;
         case kTextureType2DMultisample:

             output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0);
             break;
         case kTextureTypeCube:
             output = srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, srcLayer),
                                            level(srcLevel));
             break;
     }

     return output.r;
 }


 struct BlitStencilToBufferParams
 {
     float2 srcStartTexCoords;
     float2 srcTexCoordSteps;
     int srcLevel;
     int srcLayer;

     uint2 dstSize;
     uint dstBufferRowPitch;

     bool resolveMS;
 };

 kernel void blitStencilToBufferCS(ushort2 gIndices [[thread_position_in_grid]],
                                   texture2d<uint32_t> srcTexture2d
                                   [[texture(1), function_constant(kSourceTexture2Type2D)]],
                                   texture2d_array<uint32_t> srcTexture2dArray
                                   [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
                                   texture2d_ms<uint32_t> srcTexture2dMS
                                   [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
                                   texturecube<uint32_t> srcTextureCube
                                   [[texture(1), function_constant(kSourceTexture2TypeCube)]],
                                   constant BlitStencilToBufferParams &options [[buffer(0)]],
                                   device uchar *buffer [[buffer(1)]])
 {
     if (gIndices.x >= options.dstSize.x || gIndices.y >= options.dstSize.y)
     {
         return;
     }

     float2 srcTexCoords = options.srcStartTexCoords + float2(gIndices) * options.srcTexCoordSteps;

     if (kSourceTexture2Type == kTextureType2DMultisample && !options.resolveMS)
     {
         uint samples = srcTexture2dMS.get_num_samples();
         uint2 imageCoords = getImageCoords(srcTexture2dMS, srcTexCoords);
         uint bufferOffset = options.dstBufferRowPitch * gIndices.y + samples * gIndices.x;

         for (uint sample = 0; sample < samples; ++sample)
         {
             uint stencilPerSample = srcTexture2dMS.read(imageCoords, sample).r;
             buffer[bufferOffset + sample] = static_cast<uchar>(stencilPerSample);
         }
     }
     else
     {
         uint32_t stencil =
             sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
                           srcTexCoords, options.srcLevel, options.srcLayer);

         buffer[options.dstBufferRowPitch * gIndices.y + gIndices.x] = static_cast<uchar>(stencil);
     }
 }


 #if __METAL_VERSION__ >= 210

 struct FragmentStencilOut
 {
     uint32_t stencil [[stencil]];
 };

 struct FragmentDepthStencilOut
 {
     float depth [[depth(any)]];
     uint32_t stencil [[stencil]];
 };

 fragment FragmentStencilOut blitStencilFS(
     BlitVSOut input [[stage_in]],
     texture2d<uint32_t> srcTexture2d [[texture(1), function_constant(kSourceTexture2Type2D)]],
     texture2d_array<uint32_t> srcTexture2dArray
     [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
     texture2d_ms<uint32_t> srcTexture2dMS
     [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
     texturecube<uint32_t> srcTextureCube [[texture(1), function_constant(kSourceTexture2TypeCube)]],
     constant BlitParams &options [[buffer(0)]])
 {
     FragmentStencilOut re;

     re.stencil = sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
                                input.texCoords, options.srcLevel, options.srcLayer);

     return re;
 }

 fragment FragmentDepthStencilOut blitDepthStencilFS(
     BlitVSOut input [[stage_in]],

     texture2d<float> srcDepthTexture2d [[texture(0), function_constant(kSourceTextureType2D)]],
     texture2d_array<float> srcDepthTexture2dArray
     [[texture(0), function_constant(kSourceTextureType2DArray)]],
     texture2d_ms<float> srcDepthTexture2dMS
     [[texture(0), function_constant(kSourceTextureType2DMS)]],
     texturecube<float> srcDepthTextureCube
     [[texture(0), function_constant(kSourceTextureTypeCube)]],


     texture2d<uint32_t> srcStencilTexture2d
     [[texture(1), function_constant(kSourceTexture2Type2D)]],
     texture2d_array<uint32_t> srcStencilTexture2dArray
     [[texture(1), function_constant(kSourceTexture2Type2DArray)]],
     texture2d_ms<uint32_t> srcStencilTexture2dMS
     [[texture(1), function_constant(kSourceTexture2Type2DMS)]],
     texturecube<uint32_t> srcStencilTextureCube
     [[texture(1), function_constant(kSourceTexture2TypeCube)]],

     constant BlitParams &options [[buffer(0)]])
 {
     FragmentDepthStencilOut re;

     re.depth = sampleDepth(srcDepthTexture2d, srcDepthTexture2dArray, srcDepthTexture2dMS,
                            srcDepthTextureCube, input.texCoords, options);
     re.stencil =
         sampleStencil(srcStencilTexture2d, srcStencilTexture2dArray, srcStencilTexture2dMS,
                       srcStencilTextureCube, input.texCoords, options.srcLevel, options.srcLayer);
     return re;
 }
 #endif
 # 2 "temp_master_source.metal" 2
 # 1 "./clear.metal" 1
 # 10 "./clear.metal"
 using namespace rx::mtl_shader;

 struct ClearParams
 {
     float4 clearColor;
     float clearDepth;
 };

 vertex float4 clearVS(unsigned int vid [[ vertex_id ]],
                       constant ClearParams &clearParams [[buffer(0)]])
 {
     return float4(gCorners[vid], clearParams.clearDepth, 1.0);
 }

 fragment MultipleColorOutputs<float> clearFloatFS(constant ClearParams &clearParams [[buffer(0)]])
 {
     return toMultipleColorOutputs(clearParams.clearColor);
 }

 fragment MultipleColorOutputs<int> clearIntFS(constant ClearParams &clearParams [[buffer(0)]])
 {
     return toMultipleColorOutputs(as_type<int4>(clearParams.clearColor));
 }

 fragment MultipleColorOutputs<uint> clearUIntFS(constant ClearParams &clearParams [[buffer(0)]])
 {
     return toMultipleColorOutputs(as_type<uint4>(clearParams.clearColor));
 }
 # 3 "temp_master_source.metal" 2
 # 1 "./gen_indices.metal" 1


 using namespace rx::mtl_shader;


 constant bool kSourceBufferAligned[[function_constant(100)]];
 constant bool kSourceIndexIsU8[[function_constant(200)]];
 constant bool kSourceIndexIsU16[[function_constant(300)]];
 constant bool kSourceIndexIsU32[[function_constant(400)]];
 constant bool kSourceBufferUnaligned = !kSourceBufferAligned;
 constant bool kUseSourceBufferU8 = kSourceIndexIsU8 || kSourceBufferUnaligned;
 constant bool kUseSourceBufferU16 = kSourceIndexIsU16 && kSourceBufferAligned;
 constant bool kUseSourceBufferU32 = kSourceIndexIsU32 && kSourceBufferAligned;

 struct IndexConversionParams
 {
     uint32_t srcOffset;
     uint32_t indexCount;
     bool primitiveRestartEnabled;
 };


 inline ushort getIndexAligned(constant ushort *inputAligned, uint offset, uint idx)
 {
     return inputAligned[offset / 2 + idx];
 }
 inline uint getIndexAligned(constant uint *inputAligned, uint offset, uint idx)
 {
     return inputAligned[offset / 4 + idx];
 }
 inline uchar getIndexAligned(constant uchar *input, uint offset, uint idx)
 {
     return input[offset + idx];
 }
 inline ushort getIndexUnalignedU16(constant uchar *input, uint offset, uint idx)
 {
     ushort inputLo = input[offset + 2 * idx];
     ushort inputHi = input[offset + 2 * idx + 1];

     return inputLo | (inputHi << 8);
 }
 inline uint getIndexUnalignedU32(constant uchar *input, uint offset, uint idx)
 {
     uint input0 = input[offset + 4 * idx];
     uint input1 = input[offset + 4 * idx + 1];
     uint input2 = input[offset + 4 * idx + 2];
     uint input3 = input[offset + 4 * idx + 3];

     return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24);
 }

 kernel void convertIndexU8ToU16(uint idx [[thread_position_in_grid]],
                                 constant IndexConversionParams &options [[buffer(0)]],
                                 constant uchar *input [[buffer(1)]],
                                 device ushort *output [[buffer(2)]])
 {
     if (idx >= options.indexCount) { return; };

     uchar value = getIndexAligned(input, options.srcOffset, idx);

     if (options.primitiveRestartEnabled && value == 0xff)
     {
         output[idx] = 0xffff;
     }
     else
     {
         output[idx] = value;
     }
 }

 kernel void convertIndexU16(uint idx [[thread_position_in_grid]],
                             constant IndexConversionParams &options [[buffer(0)]],
                             constant uchar *input
                             [[buffer(1), function_constant(kSourceBufferUnaligned)]],
                             constant ushort *inputAligned
                             [[buffer(1), function_constant(kSourceBufferAligned)]],
                             device ushort *output [[buffer(2)]])
 {
     if (idx >= options.indexCount) { return; };

     ushort value;
     if (kSourceBufferAligned)
     {
         value = getIndexAligned(inputAligned, options.srcOffset, idx);
     }
     else
     {
         value = getIndexUnalignedU16(input, options.srcOffset, idx);
     }
     output[idx] = value;
 }

 kernel void convertIndexU32(uint idx [[thread_position_in_grid]],
                             constant IndexConversionParams &options [[buffer(0)]],
                             constant uchar *input
                             [[buffer(1), function_constant(kSourceBufferUnaligned)]],
                             constant uint *inputAligned
                             [[buffer(1), function_constant(kSourceBufferAligned)]],
                             device uint *output [[buffer(2)]])
 {
     if (idx >= options.indexCount) { return; };

     uint value;
     if (kSourceBufferAligned)
     {
         value = getIndexAligned(inputAligned, options.srcOffset, idx);
     }
     else
     {
         value = getIndexUnalignedU32(input, options.srcOffset, idx);
     }
     output[idx] = value;
 }

 struct IndexFromArrayParams
 {
     uint firstVertex;

     uint vertexCount;
 };


 kernel void genTriFanIndicesFromArray(uint idx [[thread_position_in_grid]],
                                       constant IndexFromArrayParams &options [[buffer(0)]],
                                       device uint *output [[buffer(2)]])
 {
     if (idx >= options.vertexCount) { return; };

     uint vertexIdx = options.firstVertex + 2 + idx;

     output[3 * idx] = options.firstVertex;
     output[3 * idx + 1] = vertexIdx - 1;
     output[3 * idx + 2] = vertexIdx;
 }

 inline uint getIndexU32(uint offset,
                         uint idx,
                         constant uchar *inputU8 [[function_constant(kUseSourceBufferU8)]],
                         constant ushort *inputU16 [[function_constant(kUseSourceBufferU16)]],
                         constant uint *inputU32 [[function_constant(kUseSourceBufferU32)]])
 {
     if (kUseSourceBufferU8)
     {
         if (kSourceIndexIsU16)
         {
             return getIndexUnalignedU16(inputU8, offset, idx);
         }
         else if (kSourceIndexIsU32)
         {
             return getIndexUnalignedU32(inputU8, offset, idx);
         }
         return getIndexAligned(inputU8, offset, idx);
     }
     else if (kUseSourceBufferU16)
     {
         return getIndexAligned(inputU16, offset, idx);
     }
     else if (kUseSourceBufferU32)
     {
         return getIndexAligned(inputU32, offset, idx);
     }
     return 0;
 }


 kernel void genTriFanIndicesFromElements(uint idx [[thread_position_in_grid]],
                                          constant IndexConversionParams &options [[buffer(0)]],
                                          constant uchar *inputU8
                                          [[buffer(1), function_constant(kUseSourceBufferU8)]],
                                          constant ushort *inputU16
                                          [[buffer(1), function_constant(kUseSourceBufferU16)]],
                                          constant uint *inputU32
                                          [[buffer(1), function_constant(kUseSourceBufferU32)]],
                                          device uint *output [[buffer(2)]])
 {
     if (idx >= options.indexCount) { return; };

     uint elemIdx = 2 + idx;

     output[3 * idx] = getIndexU32(options.srcOffset, 0, inputU8, inputU16, inputU32);
     output[3 * idx + 1] = getIndexU32(options.srcOffset, elemIdx - 1, inputU8, inputU16, inputU32);
     output[3 * idx + 2] = getIndexU32(options.srcOffset, elemIdx, inputU8, inputU16, inputU32);
 }


 kernel void genLineLoopIndicesFromArray(uint idx [[thread_position_in_grid]],
                                         constant IndexFromArrayParams &options [[buffer(0)]],
                                         device uint *output [[buffer(2)]])
 {
     uint totalIndices = options.vertexCount + 1;
     if (idx >= totalIndices) { return; };

     output[idx] = options.firstVertex + idx % options.vertexCount;
 }


 kernel void genLineLoopIndicesFromElements(uint idx [[thread_position_in_grid]],
                                            constant IndexConversionParams &options [[buffer(0)]],
                                            constant uchar *inputU8
                                            [[buffer(1), function_constant(kUseSourceBufferU8)]],
                                            constant ushort *inputU16
                                            [[buffer(1), function_constant(kUseSourceBufferU16)]],
                                            constant uint *inputU32
                                            [[buffer(1), function_constant(kUseSourceBufferU32)]],
                                            device uint *output [[buffer(2)]])
 {
     uint totalTargetIndices = options.indexCount + 1;
     if (idx >= totalTargetIndices) { return; };

     output[idx] =
         getIndexU32(options.srcOffset, idx % options.indexCount, inputU8, inputU16, inputU32);
 }
 # 4 "temp_master_source.metal" 2
 # 1 "./gen_mipmap.metal" 1


 using namespace rx::mtl_shader;
 # 31 "./gen_mipmap.metal"
 struct GenMipParams
 {
     uint srcLevel;
     uint numMipLevelsToGen;
     bool sRGB;
 };


 kernel void generate3DMipmaps(uint lIndex [[thread_index_in_threadgroup]],
                               ushort3 gIndices [[thread_position_in_grid]],
                               texture3d<float> srcTexture [[texture(0)]],
                               texture3d<float, access::write> dstMip1 [[texture(1)]],
                               texture3d<float, access::write> dstMip2 [[texture(2)]],
                               texture3d<float, access::write> dstMip3 [[texture(3)]],
                               texture3d<float, access::write> dstMip4 [[texture(4)]],
                               constant GenMipParams &options [[buffer(0)]])
 {
     ushort3 mipSize = ushort3(dstMip1.get_width(), dstMip1.get_height(), dstMip1.get_depth());
     bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y && gIndices.z < mipSize.z;

     constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);


     threadgroup float sR[(8 * 8 * 8)];
     threadgroup float sG[(8 * 8 * 8)];
     threadgroup float sB[(8 * 8 * 8)];
     threadgroup float sA[(8 * 8 * 8)];


     float4 texel1;
     if (validThread)
     {
         float3 texCoords = (float3(gIndices) + float3(0.5, 0.5, 0.5)) / float3(mipSize);
         texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel));


         dstMip1.write(texel1, gIndices);
     }
     else
     {

         lIndex = 0xffffffff;
     }

     if (options.numMipLevelsToGen == 1)
     {
         return;
     }


     if (options.sRGB)
     {
         texel1 = linearToSRGB(texel1);
     }
     sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;

     threadgroup_barrier(mem_flags::mem_threadgroup);


     if ((lIndex & 0x49) == 0)
     {
         bool3 atEdge = gIndices == (mipSize - ushort3(1));


         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));

         float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));

         float4 texel4 = (atEdge.z) ? (texel1) : (float4(sR[lIndex + (8 * 8)], sG[lIndex + (8 * 8)], sB[lIndex + (8 * 8)], sA[lIndex + (8 * 8)]));

         float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));


         float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + ((8 * 8) + 1)], sG[lIndex + ((8 * 8) + 1)], sB[lIndex + ((8 * 8) + 1)], sA[lIndex + ((8 * 8) + 1)]));


         float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + ((8 * 8) + 8)], sG[lIndex + ((8 * 8) + 8)], sB[lIndex + ((8 * 8) + 8)], sA[lIndex + ((8 * 8) + 8)]));


         float4 texel8 =
             (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + ((8 * 8) + 8 + 1)], sG[lIndex + ((8 * 8) + 8 + 1)], sB[lIndex + ((8 * 8) + 8 + 1)], sA[lIndex + ((8 * 8) + 8 + 1)]));


         texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;

         dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1);


         sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
     }

     if (options.numMipLevelsToGen == 2)
     {
         return;
     }


     threadgroup_barrier(mem_flags::mem_threadgroup);


     if ((lIndex & 0xdb) == 0)
     {
         mipSize = max(mipSize >> 1, ushort3(1));
         bool3 atEdge = (gIndices >> 1) == (mipSize - ushort3(1));


         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));

         float4 texel3 =
             (atEdge.y) ? (texel1) : (float4(sR[lIndex + (2 * 8)], sG[lIndex + (2 * 8)], sB[lIndex + (2 * 8)], sA[lIndex + (2 * 8)]));

         float4 texel4 =
             (atEdge.z) ? (texel1) : (float4(sR[lIndex + (2 * (8 * 8))], sG[lIndex + (2 * (8 * 8))], sB[lIndex + (2 * (8 * 8))], sA[lIndex + (2 * (8 * 8))]));

         float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));


         float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (2 * (8 * 8) + 2)], sG[lIndex + (2 * (8 * 8) + 2)], sB[lIndex + (2 * (8 * 8) + 2)], sA[lIndex + (2 * (8 * 8) + 2)]));


         float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8)], sG[lIndex + (2 * (8 * 8) + 2 * 8)], sB[lIndex + (2 * (8 * 8) + 2 * 8)], sA[lIndex + (2 * (8 * 8) + 2 * 8)]));


         float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sG[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sB[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sA[lIndex + (2 * (8 * 8) + 2 * 8 + 2)]));


         texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;

         dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2);


         sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
     }

     if (options.numMipLevelsToGen == 3)
     {
         return;
     }


     threadgroup_barrier(mem_flags::mem_threadgroup);


     if ((lIndex & 0x1ff) == 0)
     {
         mipSize = max(mipSize >> 1, ushort3(1));
         bool3 atEdge = (gIndices >> 2) == (mipSize - ushort3(1));


         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));

         float4 texel3 =
             (atEdge.y) ? (texel1) : (float4(sR[lIndex + (4 * 8)], sG[lIndex + (4 * 8)], sB[lIndex + (4 * 8)], sA[lIndex + (4 * 8)]));

         float4 texel4 =
             (atEdge.z) ? (texel1) : (float4(sR[lIndex + (4 * (8 * 8))], sG[lIndex + (4 * (8 * 8))], sB[lIndex + (4 * (8 * 8))], sA[lIndex + (4 * (8 * 8))]));

         float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));


         float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (4 * (8 * 8) + 4)], sG[lIndex + (4 * (8 * 8) + 4)], sB[lIndex + (4 * (8 * 8) + 4)], sA[lIndex + (4 * (8 * 8) + 4)]));


         float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8)], sG[lIndex + (4 * (8 * 8) + 4 * 8)], sB[lIndex + (4 * (8 * 8) + 4 * 8)], sA[lIndex + (4 * (8 * 8) + 4 * 8)]));


         float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sG[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sB[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sA[lIndex + (4 * (8 * 8) + 4 * 8 + 4)]));


         texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;

         dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3);
     }
 }

 kernel void generate2DMipmaps(uint lIndex [[thread_index_in_threadgroup]],
                               ushort2 gIndices [[thread_position_in_grid]],
                               texture2d<float> srcTexture [[texture(0)]],
                               texture2d<float, access::write> dstMip1 [[texture(1)]],
                               texture2d<float, access::write> dstMip2 [[texture(2)]],
                               texture2d<float, access::write> dstMip3 [[texture(3)]],
                               texture2d<float, access::write> dstMip4 [[texture(4)]],
                               constant GenMipParams &options [[buffer(0)]])
 {
     uint firstMipLevel = options.srcLevel + 1;
     ushort2 mipSize =
         ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
     bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y;

     constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);


     threadgroup float sR[(8 * 8)];
     threadgroup float sG[(8 * 8)];
     threadgroup float sB[(8 * 8)];
     threadgroup float sA[(8 * 8)];


     float4 texel1;
     if (validThread)
     {
         float2 texCoords = (float2(gIndices) + float2(0.5, 0.5)) / float2(mipSize);
         texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel));


         dstMip1.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices);
     }
     else
     {

         lIndex = 0xffffffff;
     }

     if (options.numMipLevelsToGen == 1)
     {
         return;
     }


     sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;

     threadgroup_barrier(mem_flags::mem_threadgroup);


     if ((lIndex & 0x09) == 0)
     {
         bool2 atEdge = gIndices == (mipSize - ushort2(1));


         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));

         float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));

         float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));


         texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;

         dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1);


         sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
     }

     if (options.numMipLevelsToGen == 2)
     {
         return;
     }


     threadgroup_barrier(mem_flags::mem_threadgroup);


     if ((lIndex & 0x1b) == 0)
     {
         mipSize = max(mipSize >> 1, ushort2(1));
         bool2 atEdge = (gIndices >> 1) == (mipSize - ushort2(1));


         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));

         float4 texel3 =
             (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8]));

         float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));


         texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;

         dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2);


         sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
     }

     if (options.numMipLevelsToGen == 3)
     {
         return;
     }


     threadgroup_barrier(mem_flags::mem_threadgroup);


     if ((lIndex & 0x3f) == 0)
     {
         mipSize = max(mipSize >> 1, ushort2(1));
         bool2 atEdge = (gIndices >> 2) == (mipSize - ushort2(1));


         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));

         float4 texel3 =
             (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8]));

         float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));


         texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;

         dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3);
     }
 }

 template <typename TextureTypeR, typename TextureTypeW>
 static __attribute__((always_inline)) void generateCubeOr2DArray2ndAndMoreMipmaps(
     uint lIndex,
     ushort3 gIndices,
     TextureTypeR srcTexture,
     TextureTypeW dstMip2,
     TextureTypeW dstMip3,
     TextureTypeW dstMip4,
     ushort2 mip1Size,
     float4 mip1Texel,
     threadgroup float *sR,
     threadgroup float *sG,
     threadgroup float *sB,
     threadgroup float *sA,
     constant GenMipParams &options)
 {
     ushort2 mipSize = mip1Size;
     float4 texel1 = mip1Texel;


     sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;

     threadgroup_barrier(mem_flags::mem_threadgroup);


     if ((lIndex & 0x09) == 0)
     {
         bool2 atEdge = gIndices.xy == (mipSize - ushort2(1));


         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));

         float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));

         float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));


         texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;

         dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 1, gIndices.z);


         sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
     }

     if (options.numMipLevelsToGen == 2)
     {
         return;
     }


     threadgroup_barrier(mem_flags::mem_threadgroup);


     if ((lIndex & 0x1b) == 0)
     {
         mipSize = max(mipSize >> 1, ushort2(1));
         bool2 atEdge = (gIndices.xy >> 1) == (mipSize - ushort2(1));


         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));

         float4 texel3 =
             (atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8]));

         float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));


         texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;

         dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 2, gIndices.z);


         sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
     }

     if (options.numMipLevelsToGen == 3)
     {
         return;
     }


     threadgroup_barrier(mem_flags::mem_threadgroup);


     if ((lIndex & 0x3f) == 0)
     {
         mipSize = max(mipSize >> 1, ushort2(1));
         bool2 atEdge = (gIndices.xy >> 2) == (mipSize - ushort2(1));


         float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));

         float4 texel3 =
             (atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8]));

         float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));


         texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;

         dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 3, gIndices.z);
     }
 }

 kernel void generateCubeMipmaps(uint lIndex [[thread_index_in_threadgroup]],
                                 ushort3 gIndices [[thread_position_in_grid]],
                                 texturecube<float> srcTexture [[texture(0)]],
                                 texturecube<float, access::write> dstMip1 [[texture(1)]],
                                 texturecube<float, access::write> dstMip2 [[texture(2)]],
                                 texturecube<float, access::write> dstMip3 [[texture(3)]],
                                 texturecube<float, access::write> dstMip4 [[texture(4)]],
                                 constant GenMipParams &options [[buffer(0)]])
 {
     uint firstMipLevel = options.srcLevel + 1;
     ushort2 mip1Size =
         ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
     bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y;

     constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);


     float4 mip1Texel;
     if (validThread)
     {
         float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size);
         mip1Texel = srcTexture.sample(textureSampler, cubeTexcoords(texCoords, int(gIndices.z)),
                                       level(options.srcLevel));


         dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z);
     }
     else
     {

         lIndex = 0xffffffff;
     }

     if (options.numMipLevelsToGen == 1)
     {
         return;
     }


     threadgroup float sR[(8 * 8)];
     threadgroup float sG[(8 * 8)];
     threadgroup float sB[(8 * 8)];
     threadgroup float sA[(8 * 8)];

     generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4,
                                            mip1Size, mip1Texel, sR, sG, sB, sA, options);
 }

 kernel void generate2DArrayMipmaps(uint lIndex [[thread_index_in_threadgroup]],
                                    ushort3 gIndices [[thread_position_in_grid]],
                                    texture2d_array<float> srcTexture [[texture(0)]],
                                    texture2d_array<float, access::write> dstMip1 [[texture(1)]],
                                    texture2d_array<float, access::write> dstMip2 [[texture(2)]],
                                    texture2d_array<float, access::write> dstMip3 [[texture(3)]],
                                    texture2d_array<float, access::write> dstMip4 [[texture(4)]],
                                    constant GenMipParams &options [[buffer(0)]])
 {
     uint firstMipLevel = options.srcLevel + 1;
     ushort2 mip1Size =
         ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
     bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y;

     constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);


     float4 mip1Texel;
     if (validThread)
     {
         float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size);
         mip1Texel =
             srcTexture.sample(textureSampler, texCoords, gIndices.z, level(options.srcLevel));


         dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z);
     }
     else
     {

         lIndex = 0xffffffff;
     }

     if (options.numMipLevelsToGen == 1)
     {
         return;
     }


     threadgroup float sR[(8 * 8)];
     threadgroup float sG[(8 * 8)];
     threadgroup float sB[(8 * 8)];
     threadgroup float sA[(8 * 8)];

     generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4,
                                            mip1Size, mip1Texel, sR, sG, sB, sA, options);
 }
 # 5 "temp_master_source.metal" 2
 # 1 "./copy_buffer.metal" 1
 # 12 "./copy_buffer.metal"
 #include <metal_pack>


 # 1 "./format_autogen.h" 1
 # 11 "./format_autogen.h"
 namespace rx
 {
 namespace mtl_shader
 {

 namespace FormatID
 {
 enum
 {
     NONE,
     D16_UNORM,
     D24_UNORM_S8_UINT,
     D24_UNORM_X8_UINT,
     D32_FLOAT,
     D32_FLOAT_S8X24_UINT,
     D32_UNORM,
     S8_UINT,
     A16_FLOAT,
     A1R5G5B5_UNORM,
     A2R10G10B10_SINT_VERTEX,
     A2R10G10B10_SNORM_VERTEX,
     A2R10G10B10_SSCALED_VERTEX,
     A2R10G10B10_UINT_VERTEX,
     A2R10G10B10_UNORM_VERTEX,
     A2R10G10B10_USCALED_VERTEX,
     A32_FLOAT,
     A8_UNORM,
     ASTC_10x10_SRGB_BLOCK,
     ASTC_10x10_UNORM_BLOCK,
     ASTC_10x5_SRGB_BLOCK,
     ASTC_10x5_UNORM_BLOCK,
     ASTC_10x6_SRGB_BLOCK,
     ASTC_10x6_UNORM_BLOCK,
     ASTC_10x8_SRGB_BLOCK,
     ASTC_10x8_UNORM_BLOCK,
     ASTC_12x10_SRGB_BLOCK,
     ASTC_12x10_UNORM_BLOCK,
     ASTC_12x12_SRGB_BLOCK,
     ASTC_12x12_UNORM_BLOCK,
     ASTC_3x3x3_UNORM_BLOCK,
     ASTC_3x3x3_UNORM_SRGB_BLOCK,
     ASTC_4x3x3_UNORM_BLOCK,
     ASTC_4x3x3_UNORM_SRGB_BLOCK,
     ASTC_4x4_SRGB_BLOCK,
     ASTC_4x4_UNORM_BLOCK,
     ASTC_4x4x3_UNORM_BLOCK,
     ASTC_4x4x3_UNORM_SRGB_BLOCK,
     ASTC_4x4x4_UNORM_BLOCK,
     ASTC_4x4x4_UNORM_SRGB_BLOCK,
     ASTC_5x4_SRGB_BLOCK,
     ASTC_5x4_UNORM_BLOCK,
     ASTC_5x4x4_UNORM_BLOCK,
     ASTC_5x4x4_UNORM_SRGB_BLOCK,
     ASTC_5x5_SRGB_BLOCK,
     ASTC_5x5_UNORM_BLOCK,
     ASTC_5x5x4_UNORM_BLOCK,
     ASTC_5x5x4_UNORM_SRGB_BLOCK,
     ASTC_5x5x5_UNORM_BLOCK,
     ASTC_5x5x5_UNORM_SRGB_BLOCK,
     ASTC_6x5_SRGB_BLOCK,
     ASTC_6x5_UNORM_BLOCK,
     ASTC_6x5x5_UNORM_BLOCK,
     ASTC_6x5x5_UNORM_SRGB_BLOCK,
     ASTC_6x6_SRGB_BLOCK,
     ASTC_6x6_UNORM_BLOCK,
     ASTC_6x6x5_UNORM_BLOCK,
     ASTC_6x6x5_UNORM_SRGB_BLOCK,
     ASTC_6x6x6_UNORM_BLOCK,
     ASTC_6x6x6_UNORM_SRGB_BLOCK,
     ASTC_8x5_SRGB_BLOCK,
     ASTC_8x5_UNORM_BLOCK,
     ASTC_8x6_SRGB_BLOCK,
     ASTC_8x6_UNORM_BLOCK,
     ASTC_8x8_SRGB_BLOCK,
     ASTC_8x8_UNORM_BLOCK,
     B10G10R10A2_UNORM,
     B4G4R4A4_UNORM,
     B5G5R5A1_UNORM,
     B5G6R5_UNORM,
     B8G8R8A8_TYPELESS,
     B8G8R8A8_TYPELESS_SRGB,
     B8G8R8A8_UNORM,
     B8G8R8A8_UNORM_SRGB,
     B8G8R8X8_UNORM,
     BC1_RGBA_UNORM_BLOCK,
     BC1_RGBA_UNORM_SRGB_BLOCK,
     BC1_RGB_UNORM_BLOCK,
     BC1_RGB_UNORM_SRGB_BLOCK,
     BC2_RGBA_UNORM_BLOCK,
     BC2_RGBA_UNORM_SRGB_BLOCK,
     BC3_RGBA_UNORM_BLOCK,
     BC3_RGBA_UNORM_SRGB_BLOCK,
     BC4_RED_SNORM_BLOCK,
     BC4_RED_UNORM_BLOCK,
     BC5_RG_SNORM_BLOCK,
     BC5_RG_UNORM_BLOCK,
     BC6H_RGB_SFLOAT_BLOCK,
     BC6H_RGB_UFLOAT_BLOCK,
     BC7_RGBA_UNORM_BLOCK,
     BC7_RGBA_UNORM_SRGB_BLOCK,
     EAC_R11G11_SNORM_BLOCK,
     EAC_R11G11_UNORM_BLOCK,
     EAC_R11_SNORM_BLOCK,
     EAC_R11_UNORM_BLOCK,
     ETC1_LOSSY_DECODE_R8G8B8_UNORM_BLOCK,
     ETC1_R8G8B8_UNORM_BLOCK,
     ETC2_R8G8B8A1_SRGB_BLOCK,
     ETC2_R8G8B8A1_UNORM_BLOCK,
     ETC2_R8G8B8A8_SRGB_BLOCK,
     ETC2_R8G8B8A8_UNORM_BLOCK,
     ETC2_R8G8B8_SRGB_BLOCK,
     ETC2_R8G8B8_UNORM_BLOCK,
     G8_B8R8_2PLANE_420_UNORM,
     G8_B8_R8_3PLANE_420_UNORM,
     L16A16_FLOAT,
     L16_FLOAT,
     L32A32_FLOAT,
     L32_FLOAT,
     L8A8_UNORM,
     L8_UNORM,
     PVRTC1_RGBA_2BPP_UNORM_BLOCK,
     PVRTC1_RGBA_2BPP_UNORM_SRGB_BLOCK,
     PVRTC1_RGBA_4BPP_UNORM_BLOCK,
     PVRTC1_RGBA_4BPP_UNORM_SRGB_BLOCK,
     PVRTC1_RGB_2BPP_UNORM_BLOCK,
     PVRTC1_RGB_2BPP_UNORM_SRGB_BLOCK,
     PVRTC1_RGB_4BPP_UNORM_BLOCK,
     PVRTC1_RGB_4BPP_UNORM_SRGB_BLOCK,
     R10G10B10A2_SINT,
     R10G10B10A2_SNORM,
     R10G10B10A2_SSCALED,
     R10G10B10A2_UINT,
     R10G10B10A2_UNORM,
     R10G10B10A2_USCALED,
     R10G10B10X2_UNORM,
     R11G11B10_FLOAT,
     R16G16B16A16_FLOAT,
     R16G16B16A16_SINT,
     R16G16B16A16_SNORM,
     R16G16B16A16_SSCALED,
     R16G16B16A16_UINT,
     R16G16B16A16_UNORM,
     R16G16B16A16_USCALED,
     R16G16B16_FLOAT,
     R16G16B16_SINT,
     R16G16B16_SNORM,
     R16G16B16_SSCALED,
     R16G16B16_UINT,
     R16G16B16_UNORM,
     R16G16B16_USCALED,
     R16G16_FLOAT,
     R16G16_SINT,
     R16G16_SNORM,
     R16G16_SSCALED,
     R16G16_UINT,
     R16G16_UNORM,
     R16G16_USCALED,
     R16_FLOAT,
     R16_SINT,
     R16_SNORM,
     R16_SSCALED,
     R16_UINT,
     R16_UNORM,
     R16_USCALED,
     R32G32B32A32_FIXED,
     R32G32B32A32_FLOAT,
     R32G32B32A32_SINT,
     R32G32B32A32_SNORM,
     R32G32B32A32_SSCALED,
     R32G32B32A32_UINT,
     R32G32B32A32_UNORM,
     R32G32B32A32_USCALED,
     R32G32B32_FIXED,
     R32G32B32_FLOAT,
     R32G32B32_SINT,
     R32G32B32_SNORM,
     R32G32B32_SSCALED,
     R32G32B32_UINT,
     R32G32B32_UNORM,
     R32G32B32_USCALED,
     R32G32_FIXED,
     R32G32_FLOAT,
     R32G32_SINT,
     R32G32_SNORM,
     R32G32_SSCALED,
     R32G32_UINT,
     R32G32_UNORM,
     R32G32_USCALED,
     R32_FIXED,
     R32_FLOAT,
     R32_SINT,
     R32_SNORM,
     R32_SSCALED,
     R32_UINT,
     R32_UNORM,
     R32_USCALED,
     R4G4B4A4_UNORM,
     R5G5B5A1_UNORM,
     R5G6B5_UNORM,
     R8G8B8A8_SINT,
     R8G8B8A8_SNORM,
     R8G8B8A8_SSCALED,
     R8G8B8A8_TYPELESS,
     R8G8B8A8_TYPELESS_SRGB,
     R8G8B8A8_UINT,
     R8G8B8A8_UNORM,
     R8G8B8A8_UNORM_SRGB,
     R8G8B8A8_USCALED,
     R8G8B8X8_UNORM,
     R8G8B8_SINT,
     R8G8B8_SNORM,
     R8G8B8_SSCALED,
     R8G8B8_UINT,
     R8G8B8_UNORM,
     R8G8B8_UNORM_SRGB,
     R8G8B8_USCALED,
     R8G8_SINT,
     R8G8_SNORM,
     R8G8_SSCALED,
     R8G8_UINT,
     R8G8_UNORM,
     R8G8_UNORM_SRGB,
     R8G8_USCALED,
     R8_SINT,
     R8_SNORM,
     R8_SSCALED,
     R8_UINT,
     R8_UNORM,
     R8_UNORM_SRGB,
     R8_USCALED,
     R9G9B9E5_SHAREDEXP,
     X2R10G10B10_SINT_VERTEX,
     X2R10G10B10_SNORM_VERTEX,
     X2R10G10B10_SSCALED_VERTEX,
     X2R10G10B10_UINT_VERTEX,
     X2R10G10B10_UNORM_VERTEX,
     X2R10G10B10_USCALED_VERTEX
 };

 }

 }
 }
 # 16 "./copy_buffer.metal" 2

 using namespace rx::mtl_shader;

 constant int kCopyFormatType [[function_constant(10)]];


 constant int kCopyTextureType [[function_constant(20)]];
 constant bool kCopyTextureType2D = kCopyTextureType == kTextureType2D;
 constant bool kCopyTextureType2DArray = kCopyTextureType == kTextureType2DArray;
 constant bool kCopyTextureType2DMS = kCopyTextureType == kTextureType2DMultisample;
 constant bool kCopyTextureTypeCube = kCopyTextureType == kTextureTypeCube;
 constant bool kCopyTextureType3D = kCopyTextureType == kTextureType3D;

 struct CopyPixelParams
 {
     uint3 copySize;
     uint3 textureOffset;

     uint bufferStartOffset;
     uint pixelSize;
     uint bufferRowPitch;
     uint bufferDepthPitch;
 };

 struct WritePixelParams
 {
     uint2 copySize;
     uint2 textureOffset;

     uint bufferStartOffset;

     uint pixelSize;
     uint bufferRowPitch;

     uint textureLevel;
     uint textureLayer;

     bool reverseTextureRowOrder;
 };
 # 120 "./copy_buffer.metal"
 template <typename T>
 static inline void textureWrite(ushort3 gIndices,
                                 constant CopyPixelParams &options,
                                 vec<T, 4> color,
                                 texture2d<T, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
 {
     uint3 writeIndices = options.textureOffset + uint3(gIndices);
     switch (kCopyTextureType)
     {
         case kTextureType2D:
             dstTexture2d.write(color, writeIndices.xy);
             break;
         case kTextureType2DArray:
             dstTexture2dArray.write(color, writeIndices.xy, writeIndices.z);
             break;
         case kTextureType3D:
             dstTexture3d.write(color, writeIndices);
             break;
         case kTextureTypeCube:
             dstTextureCube.write(color, writeIndices.xy, writeIndices.z);
             break;
     }
 }


 template <typename T>
 static inline vec<T, 4> textureRead(ushort2 gIndices,
                                     constant WritePixelParams &options,
                                     texture2d<T, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<T, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]])
 {
     vec<T, 4> color;
     uint2 coords = uint2(gIndices);
     if (options.reverseTextureRowOrder)
     {
         coords.y = options.copySize.y - 1 - gIndices.y;
     }
     coords += options.textureOffset;
     switch (kCopyTextureType)
     {
         case kTextureType2D:
             color = srcTexture2d.read(coords.xy, options.textureLevel);
             break;
         case kTextureType2DArray:
             color = srcTexture2dArray.read(coords.xy, options.textureLayer, options.textureLevel);
             break;
         case kTextureType2DMultisample:
             color = resolveTextureMS(srcTexture2dMS, coords.xy);
             break;
         case kTextureType3D:
             color = srcTexture3d.read(uint3(coords, options.textureLayer), options.textureLevel);
             break;
         case kTextureTypeCube:
             color = srcTextureCube.read(coords.xy, options.textureLayer, options.textureLevel);
             break;
     }
     return color;
 }
 # 215 "./copy_buffer.metal"
 static inline float4 readR5G6B5_UNORM(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     ushort src = bytesToShort<ushort>(buffer, bufferOffset);

     color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src));
     color.g = normalizedToFloat<6>(getShiftedData<6, 5>(src));
     color.b = normalizedToFloat<5>(getShiftedData<5, 0>(src));
     color.a = 1.0;
     return color;
 }
 static inline void writeR5G6B5_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) |
                  shiftData<6, 5>(floatToNormalized<6, ushort>(color.g)) |
                  shiftData<5, 0>(floatToNormalized<5, ushort>(color.b));

     shortToBytes(dst, bufferOffset, buffer);
 }


 static inline float4 readR4G4B4A4_UNORM(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     ushort src = bytesToShort<ushort>(buffer, bufferOffset);

     color.r = normalizedToFloat<4>(getShiftedData<4, 12>(src));
     color.g = normalizedToFloat<4>(getShiftedData<4, 8>(src));
     color.b = normalizedToFloat<4>(getShiftedData<4, 4>(src));
     color.a = normalizedToFloat<4>(getShiftedData<4, 0>(src));
     return color;
 }
 static inline void writeR4G4B4A4_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     ushort dst = shiftData<4, 12>(floatToNormalized<4, ushort>(color.r)) |
                  shiftData<4, 8>(floatToNormalized<4, ushort>(color.g)) |
                  shiftData<4, 4>(floatToNormalized<4, ushort>(color.b)) |
                  shiftData<4, 0>(floatToNormalized<4, ushort>(color.a));
     ;

     shortToBytes(dst, bufferOffset, buffer);
 }


 static inline float4 readR5G5B5A1_UNORM(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     ushort src = bytesToShort<ushort>(buffer, bufferOffset);

     color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src));
     color.g = normalizedToFloat<5>(getShiftedData<5, 6>(src));
     color.b = normalizedToFloat<5>(getShiftedData<5, 1>(src));
     color.a = normalizedToFloat<1>(getShiftedData<1, 0>(src));
     return color;
 }
 static inline void writeR5G5B5A1_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) |
                  shiftData<5, 6>(floatToNormalized<5, ushort>(color.g)) |
                  shiftData<5, 1>(floatToNormalized<5, ushort>(color.b)) |
                  shiftData<1, 0>(floatToNormalized<1, ushort>(color.a));
     ;

     shortToBytes(dst, bufferOffset, buffer);
 }


 static inline int4 readR10G10B10A2_SINT(uint bufferOffset, constant uchar *buffer)
 {
     int4 color;
     int src = bytesToInt<int>(buffer, bufferOffset);

     constexpr int3 rgbSignMask(0x200);
     constexpr int3 negativeMask(0xfffffc00);
     constexpr int alphaSignMask = 0x2;
     constexpr int alphaNegMask = 0xfffffffc;

     color.r = getShiftedData<10, 0>(src);
     color.g = getShiftedData<10, 10>(src);
     color.b = getShiftedData<10, 20>(src);

     int3 isRgbNegative = (color.rgb & rgbSignMask) >> 9;
     color.rgb = (isRgbNegative * negativeMask) | color.rgb;

     color.a = getShiftedData<2, 30>(src);
     int isAlphaNegative = color.a & alphaSignMask >> 1;
     color.a = (isAlphaNegative * alphaNegMask) | color.a;
     return color;
 }

 static inline uint4 readR10G10B10A2_UINT(uint bufferOffset, constant uchar *buffer)
 {
     uint4 color;
     uint src = bytesToInt<uint>(buffer, bufferOffset);

     color.r = getShiftedData<10, 0>(src);
     color.g = getShiftedData<10, 10>(src);
     color.b = getShiftedData<10, 20>(src);
     color.a = getShiftedData<2, 30>(src);
     return color;
 }


 static inline float4 readR8G8B8A8(uint bufferOffset, constant uchar *buffer, bool isSRGB)
 {
     float4 color;
     uint src = bytesToInt<uint>(buffer, bufferOffset);

     if (isSRGB)
     {
         color = unpack_unorm4x8_srgb_to_float(src);
     }
     else
     {
         color = unpack_unorm4x8_to_float(src);
     }
     return color;
 }
 static inline void writeR8G8B8A8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB)
 {
     uint dst;

     if (isSRGB)
     {
         dst = pack_float_to_srgb_unorm4x8(color);
     }
     else
     {
         dst = pack_float_to_unorm4x8(color);
     }

     intToBytes(dst, bufferOffset, buffer);
 }

 static inline float4 readR8G8B8(uint bufferOffset, constant uchar *buffer, bool isSRGB)
 {
     float4 color;
     color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
     color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
     color.b = normalizedToFloat<uchar>(buffer[bufferOffset + 2]);
     color.a = 1.0;

     if (isSRGB)
     {
         color = sRGBtoLinear(color);
     }
     return color;
 }
 static inline void writeR8G8B8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB)
 {
     color.a = 1.0;
     uint dst;

     if (isSRGB)
     {
         dst = pack_float_to_srgb_unorm4x8(color);
     }
     else
     {
         dst = pack_float_to_unorm4x8(color);
     }
     int24bitToBytes(dst, bufferOffset, buffer);
 }


 static inline float4 readR8G8B8A8_SNORM(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     uint src = bytesToInt<uint>(buffer, bufferOffset);

     color = unpack_snorm4x8_to_float(src);

     return color;
 }
 static inline void writeR8G8B8A8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     uint dst = pack_float_to_snorm4x8(color);

     intToBytes(dst, bufferOffset, buffer);
 }


 static inline float4 readR8G8B8_SNORM(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
     color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]);
     color.b = normalizedToFloat<7, char>(buffer[bufferOffset + 2]);
     color.a = 1.0;

     return color;
 }
 static inline void writeR8G8B8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     uint dst = pack_float_to_snorm4x8(color);

     int24bitToBytes(dst, bufferOffset, buffer);
 }


 static inline float4 readR8G8B8A8_UNORM(uint bufferOffset, constant uchar *buffer)
 {
     return readR8G8B8A8(bufferOffset, buffer, false);
 }
 static inline void writeR8G8B8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false);
 }

 static inline float4 readR8G8B8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
 {
     return readR8G8B8A8(bufferOffset, buffer, true);
 }
 static inline void writeR8G8B8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true);
 }


 static inline float4 readB8G8R8A8_UNORM(uint bufferOffset, constant uchar *buffer)
 {
     return readR8G8B8A8(bufferOffset, buffer, false).bgra;
 }
 static inline void writeB8G8R8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     color.rgba = color.bgra;
     return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false);
 }

 static inline float4 readB8G8R8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
 {
     return readR8G8B8A8(bufferOffset, buffer, true).bgra;
 }
 static inline void writeB8G8R8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     color.rgba = color.bgra;
     return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true);
 }


 static inline float4 readR8G8B8_UNORM(uint bufferOffset, constant uchar *buffer)
 {
     return readR8G8B8(bufferOffset, buffer, false);
 }
 static inline void writeR8G8B8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, false);
 }

 static inline float4 readR8G8B8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
 {
     return readR8G8B8(bufferOffset, buffer, true);
 }
 static inline void writeR8G8B8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, true);
 }


 static inline float4 readL8_UNORM(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset]));
     color.a = 1.0;
     return color;
 }
 static inline void writeL8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
 }


 static inline void writeA8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     buffer[bufferOffset] = floatToNormalized<uchar>(color.a);
 }


 static inline float4 readL8A8_UNORM(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset]));
     color.a = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
     return color;
 }
 static inline void writeL8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
     buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.a);
 }


 static inline float4 readR8_UNORM(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
     color.g = color.b = 0.0;
     color.a = 1.0;
     return color;
 }
 static inline void writeR8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
 }

 static inline float4 readR8_SNORM(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
     color.g = color.b = 0.0;
     color.a = 1.0;
     return color;
 }
 static inline void writeR8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r));
 }


 static inline int4 readR8_SINT(uint bufferOffset, constant uchar *buffer)
 {
     int4 color;
     color.r = as_type<char>(buffer[bufferOffset]);
     color.g = color.b = 0;
     color.a = 1;
     return color;
 }
 static inline void writeR8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
 {
     buffer[bufferOffset] = static_cast<uchar>(color.r);
 }


 static inline uint4 readR8_UINT(uint bufferOffset, constant uchar *buffer)
 {
     uint4 color;
     color.r = as_type<uchar>(buffer[bufferOffset]);
     color.g = color.b = 0;
     color.a = 1;
     return color;
 }
 static inline void writeR8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
 {
     buffer[bufferOffset] = static_cast<uchar>(color.r);
 }


 static inline float4 readR8G8_UNORM(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
     color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
     color.b = 0.0;
     color.a = 1.0;
     return color;
 }
 static inline void writeR8G8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
     buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.g);
 }

 static inline float4 readR8G8_SNORM(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
     color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]);
     color.b = 0.0;
     color.a = 1.0;
     return color;
 }
 static inline void writeR8G8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r));
     buffer[bufferOffset + 1] = as_type<uchar>(floatToNormalized<7, char>(color.g));
 }


 static inline int4 readR8G8_SINT(uint bufferOffset, constant uchar *buffer)
 {
     int4 color;
     color.r = as_type<char>(buffer[bufferOffset]);
     color.g = as_type<char>(buffer[bufferOffset + 1]);
     color.b = 0;
     color.a = 1;
     return color;
 }
 static inline void writeR8G8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
 {
     buffer[bufferOffset] = static_cast<uchar>(color.r);
     buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
 }


 static inline uint4 readR8G8_UINT(uint bufferOffset, constant uchar *buffer)
 {
     uint4 color;
     color.r = as_type<uchar>(buffer[bufferOffset]);
     color.g = as_type<uchar>(buffer[bufferOffset + 1]);
     color.b = 0;
     color.a = 1;
     return color;
 }
 static inline void writeR8G8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
 {
     buffer[bufferOffset] = static_cast<uchar>(color.r);
     buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
 }


 static inline int4 readR8G8B8_SINT(uint bufferOffset, constant uchar *buffer)
 {
     int4 color;
     color.r = as_type<char>(buffer[bufferOffset]);
     color.g = as_type<char>(buffer[bufferOffset + 1]);
     color.b = as_type<char>(buffer[bufferOffset + 2]);
     color.a = 1;
     return color;
 }


 static inline uint4 readR8G8B8_UINT(uint bufferOffset, constant uchar *buffer)
 {
     uint4 color;
     color.r = as_type<uchar>(buffer[bufferOffset]);
     color.g = as_type<uchar>(buffer[bufferOffset + 1]);
     color.b = as_type<uchar>(buffer[bufferOffset + 2]);
     color.a = 1;
     return color;
 }


 static inline int4 readR8G8B8A8_SINT(uint bufferOffset, constant uchar *buffer)
 {
     int4 color;
     color.r = as_type<char>(buffer[bufferOffset]);
     color.g = as_type<char>(buffer[bufferOffset + 1]);
     color.b = as_type<char>(buffer[bufferOffset + 2]);
     color.a = as_type<char>(buffer[bufferOffset + 3]);
     return color;
 }
 static inline void writeR8G8B8A8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
 {
     buffer[bufferOffset] = static_cast<uchar>(color.r);
     buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
     buffer[bufferOffset + 2] = static_cast<uchar>(color.b);
     buffer[bufferOffset + 3] = static_cast<uchar>(color.a);
 }


 static inline uint4 readR8G8B8A8_UINT(uint bufferOffset, constant uchar *buffer)
 {
     uint4 color;
     color.r = as_type<uchar>(buffer[bufferOffset]);
     color.g = as_type<uchar>(buffer[bufferOffset + 1]);
     color.b = as_type<uchar>(buffer[bufferOffset + 2]);
     color.a = as_type<uchar>(buffer[bufferOffset + 3]);
     return color;
 }
 static inline void writeR8G8B8A8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
 {
     buffer[bufferOffset] = static_cast<uchar>(color.r);
     buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
     buffer[bufferOffset + 2] = static_cast<uchar>(color.b);
     buffer[bufferOffset + 3] = static_cast<uchar>(color.a);
 }


 static inline float4 readR16_FLOAT(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
     color.g = color.b = 0.0;
     color.a = 1.0;
     return color;
 }
 static inline void writeR16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
 }

 template <typename ShortType>
 static inline float4 readR16_NORM(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
     color.g = color.b = 0.0;
     color.a = 1.0;
     return color;
 }


 static inline int4 readR16_SINT(uint bufferOffset, constant uchar *buffer)
 {
     int4 color;
     color.r = bytesToShort<short>(buffer, bufferOffset);
     color.g = color.b = 0;
     color.a = 1;
     return color;
 }
 static inline void writeR16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
 {
     shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
 }


 static inline uint4 readR16_UINT(uint bufferOffset, constant uchar *buffer)
 {
     uint4 color;
     color.r = bytesToShort<ushort>(buffer, bufferOffset);
     color.g = color.b = 0;
     color.a = 1;
     return color;
 }
 static inline void writeR16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
 {
     shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
 }


 static inline float4 readA16_FLOAT(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
     color.rgb = 0.0;
     return color;
 }
 static inline void writeA16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset, buffer);
 }


 static inline float4 readL16_FLOAT(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
     color.a = 1.0;
     return color;
 }
 static inline void writeL16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
 }


 static inline float4 readL16A16_FLOAT(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
     color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
     return color;
 }
 static inline void writeL16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
     shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 2, buffer);
 }


 static inline float4 readR16G16_FLOAT(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
     color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
     color.b = 0.0;
     color.a = 1.0;
     return color;
 }
 static inline void writeR16G16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
     shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer);
 }


 template <typename ShortType>
 static inline float4 readR16G16_NORM(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
     color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
     color.b = 0.0;
     color.a = 1.0;
     return color;
 }


 static inline int4 readR16G16_SINT(uint bufferOffset, constant uchar *buffer)
 {
     int4 color;
     color.r = bytesToShort<short>(buffer, bufferOffset);
     color.g = bytesToShort<short>(buffer, bufferOffset + 2);
     color.b = 0;
     color.a = 1;
     return color;
 }
 static inline void writeR16G16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
 {
     shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
     shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer);
 }


 static inline uint4 readR16G16_UINT(uint bufferOffset, constant uchar *buffer)
 {
     uint4 color;
     color.r = bytesToShort<ushort>(buffer, bufferOffset);
     color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
     color.b = 0;
     color.a = 1;
     return color;
 }
 static inline void writeR16G16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
 {
     shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
     shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer);
 }


 static inline float4 readR16G16B16_FLOAT(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
     color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
     color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4));
     color.a = 1.0;
     return color;
 }


 template <typename ShortType>
 static inline float4 readR16G16B16_NORM(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
     color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
     color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4));
     color.a = 1.0;
     return color;
 }


 static inline int4 readR16G16B16_SINT(uint bufferOffset, constant uchar *buffer)
 {
     int4 color;
     color.r = bytesToShort<short>(buffer, bufferOffset);
     color.g = bytesToShort<short>(buffer, bufferOffset + 2);
     color.b = bytesToShort<short>(buffer, bufferOffset + 4);
     color.a = 1;
     return color;
 }


 static inline uint4 readR16G16B16_UINT(uint bufferOffset, constant uchar *buffer)
 {
     uint4 color;
     color.r = bytesToShort<ushort>(buffer, bufferOffset);
     color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
     color.b = bytesToShort<ushort>(buffer, bufferOffset + 4);
     color.a = 1;
     return color;
 }


 static inline float4 readR16G16B16A16_FLOAT(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
     color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
     color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4));
     color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 6));
     return color;
 }
 static inline void writeR16G16B16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
     shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer);
     shortToBytes(as_type<ushort>(static_cast<half>(color.b)), bufferOffset + 4, buffer);
     shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 6, buffer);
 }


 template <typename ShortType>
 static inline float4 readR16G16B16A16_NORM(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
     color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
     color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4));
     color.a = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 6));
     return color;
 }


 static inline int4 readR16G16B16A16_SINT(uint bufferOffset, constant uchar *buffer)
 {
     int4 color;
     color.r = bytesToShort<short>(buffer, bufferOffset);
     color.g = bytesToShort<short>(buffer, bufferOffset + 2);
     color.b = bytesToShort<short>(buffer, bufferOffset + 4);
     color.a = bytesToShort<short>(buffer, bufferOffset + 6);
     return color;
 }
 static inline void writeR16G16B16A16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
 {
     shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
     shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer);
     shortToBytes(static_cast<short>(color.b), bufferOffset + 4, buffer);
     shortToBytes(static_cast<short>(color.a), bufferOffset + 6, buffer);
 }


 static inline uint4 readR16G16B16A16_UINT(uint bufferOffset, constant uchar *buffer)
 {
     uint4 color;
     color.r = bytesToShort<ushort>(buffer, bufferOffset);
     color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
     color.b = bytesToShort<ushort>(buffer, bufferOffset + 4);
     color.a = bytesToShort<ushort>(buffer, bufferOffset + 6);
     return color;
 }
 static inline void writeR16G16B16A16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
 {
     shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
     shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer);
     shortToBytes(static_cast<ushort>(color.b), bufferOffset + 4, buffer);
     shortToBytes(static_cast<ushort>(color.a), bufferOffset + 6, buffer);
 }


 static inline float4 readR32_FLOAT(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
     color.g = color.b = 0.0;
     color.a = 1.0;
     return color;
 }
 static inline void writeR32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
 }


 template <typename IntType>
 static inline float4 readR32_NORM(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
     color.g = color.b = 0.0;
     color.a = 1.0;
     return color;
 }


 static inline float4 readA32_FLOAT(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
     color.rgb = 0.0;
     return color;
 }
 static inline void writeA32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     intToBytes(as_type<uint>(color.a), bufferOffset, buffer);
 }


 static inline float4 readL32_FLOAT(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
     color.a = 1.0;
     return color;
 }
 static inline void writeL32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
 }


 static inline int4 readR32_SINT(uint bufferOffset, constant uchar *buffer)
 {
     int4 color;
     color.r = bytesToInt<int>(buffer, bufferOffset);
     color.g = color.b = 0;
     color.a = 1;
     return color;
 }
 static inline void writeR32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
 {
     intToBytes(color.r, bufferOffset, buffer);
 }


 static inline float4 readR32_FIXED(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     constexpr float kDivisor = 1.0f / (1 << 16);
     color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
     color.g = color.b = 0.0;
     color.a = 1.0;
     return color;
 }


 static inline uint4 readR32_UINT(uint bufferOffset, constant uchar *buffer)
 {
     uint4 color;
     color.r = bytesToInt<uint>(buffer, bufferOffset);
     color.g = color.b = 0;
     color.a = 1;
     return color;
 }
 static inline void writeR32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
 {
     intToBytes(color.r, bufferOffset, buffer);
 }


 static inline float4 readL32A32_FLOAT(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
     color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
     return color;
 }
 static inline void writeL32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
     intToBytes(as_type<uint>(color.a), bufferOffset + 4, buffer);
 }


 static inline float4 readR32G32_FLOAT(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
     color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
     color.b = 0.0;
     color.a = 1.0;
     return color;
 }
 static inline void writeR32G32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
     intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer);
 }


 template <typename IntType>
 static inline float4 readR32G32_NORM(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
     color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
     color.b = 0.0;
     color.a = 1.0;
     return color;
 }


 static inline int4 readR32G32_SINT(uint bufferOffset, constant uchar *buffer)
 {
     int4 color;
     color.r = bytesToInt<int>(buffer, bufferOffset);
     color.g = bytesToInt<int>(buffer, bufferOffset + 4);
     color.b = 0;
     color.a = 1;
     return color;
 }
 static inline void writeR32G32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
 {
     intToBytes(color.r, bufferOffset, buffer);
     intToBytes(color.g, bufferOffset + 4, buffer);
 }


 static inline float4 readR32G32_FIXED(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     constexpr float kDivisor = 1.0f / (1 << 16);
     color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
     color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
     color.b = 0.0;
     color.a = 1.0;
     return color;
 }


 static inline uint4 readR32G32_UINT(uint bufferOffset, constant uchar *buffer)
 {
     uint4 color;
     color.r = bytesToInt<uint>(buffer, bufferOffset);
     color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
     color.b = 0;
     color.a = 1;
     return color;
 }
 static inline void writeR32G32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
 {
     intToBytes(color.r, bufferOffset, buffer);
     intToBytes(color.g, bufferOffset + 4, buffer);
 }


 static inline float4 readR32G32B32_FLOAT(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
     color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
     color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8));
     color.a = 1.0;
     return color;
 }


 template <typename IntType>
 static inline float4 readR32G32B32_NORM(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
     color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
     color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8));
     color.a = 1.0;
     return color;
 }


 static inline int4 readR32G32B32_SINT(uint bufferOffset, constant uchar *buffer)
 {
     int4 color;
     color.r = bytesToInt<int>(buffer, bufferOffset);
     color.g = bytesToInt<int>(buffer, bufferOffset + 4);
     color.b = bytesToInt<int>(buffer, bufferOffset + 8);
     color.a = 1;
     return color;
 }


 static inline float4 readR32G32B32_FIXED(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     constexpr float kDivisor = 1.0f / (1 << 16);
     color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
     color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
     color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor;
     color.a = 1.0;
     return color;
 }


 static inline uint4 readR32G32B32_UINT(uint bufferOffset, constant uchar *buffer)
 {
     uint4 color;
     color.r = bytesToInt<uint>(buffer, bufferOffset);
     color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
     color.b = bytesToInt<uint>(buffer, bufferOffset + 8);
     color.a = 1;
     return color;
 }


 static inline float4 readR32G32B32A32_FLOAT(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
     color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
     color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8));
     color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 12));
     return color;
 }
 static inline void writeR32G32B32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
 {
     intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
     intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer);
     intToBytes(as_type<uint>(color.b), bufferOffset + 8, buffer);
     intToBytes(as_type<uint>(color.a), bufferOffset + 12, buffer);
 }


 template <typename IntType>
 static inline float4 readR32G32B32A32_NORM(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
     color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
     color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8));
     color.a = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 12));
     return color;
 }


 static inline int4 readR32G32B32A32_SINT(uint bufferOffset, constant uchar *buffer)
 {
     int4 color;
     color.r = bytesToInt<int>(buffer, bufferOffset);
     color.g = bytesToInt<int>(buffer, bufferOffset + 4);
     color.b = bytesToInt<int>(buffer, bufferOffset + 8);
     color.a = bytesToInt<int>(buffer, bufferOffset + 12);
     return color;
 }
 static inline void writeR32G32B32A32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
 {
     intToBytes(color.r, bufferOffset, buffer);
     intToBytes(color.g, bufferOffset + 4, buffer);
     intToBytes(color.b, bufferOffset + 8, buffer);
     intToBytes(color.a, bufferOffset + 12, buffer);
 }

 static inline float4 readR32G32B32A32_FIXED(uint bufferOffset, constant uchar *buffer)
 {
     float4 color;
     constexpr float kDivisor = 1.0f / (1 << 16);
     color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
     color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
     color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor;
     color.a = bytesToInt<int>(buffer, bufferOffset + 12) * kDivisor;
     return color;
 }


 static inline uint4 readR32G32B32A32_UINT(uint bufferOffset, constant uchar *buffer)
 {
     uint4 color;
     color.r = bytesToInt<uint>(buffer, bufferOffset);
     color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
     color.b = bytesToInt<uint>(buffer, bufferOffset + 8);
     color.a = bytesToInt<uint>(buffer, bufferOffset + 12);
     return color;
 }
 static inline void writeR32G32B32A32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
 {
     intToBytes(color.r, bufferOffset, buffer);
     intToBytes(color.g, bufferOffset + 4, buffer);
     intToBytes(color.b, bufferOffset + 8, buffer);
     intToBytes(color.a, bufferOffset + 12, buffer);
 }
 # 1292 "./copy_buffer.metal"
 static inline int4 readR8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8_SINT(bufferOffset, buffer); } static inline uint4 readR8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8_UINT(bufferOffset, buffer); } static inline int4 readR8G8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8A8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8A8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_UINT(bufferOffset, buffer); }
 static inline int4 readR16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16_SINT(bufferOffset, buffer); } static inline uint4 readR16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16_UINT(bufferOffset, buffer); } static inline int4 readR16G16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16A16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16A16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_UINT(bufferOffset, buffer); }
 static inline int4 readR32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32_SINT(bufferOffset, buffer); } static inline uint4 readR32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32_UINT(bufferOffset, buffer); } static inline int4 readR32G32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32A32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32A32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_UINT(bufferOffset, buffer); }

 static inline int4 readR10G10B10A2_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_SINT(bufferOffset, buffer); } static inline uint4 readR10G10B10A2_USCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_UINT(bufferOffset, buffer); }


 kernel void readFromBufferToFloatTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<float, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
 {
     if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
 # 1336 "./copy_buffer.metal"
     uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);

     switch (kCopyFormatType)
     {
         case FormatID::R5G6B5_UNORM: { auto color = readR5G6B5_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM: { auto color = readR8G8B8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = readR8G8B8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SNORM: { auto color = readR8G8B8A8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM: { auto color = readB8G8R8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = readB8G8R8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM: { auto color = readR8G8B8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = readR8G8B8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SNORM: { auto color = readR8G8B8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8_UNORM: { auto color = readL8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8A8_UNORM: { auto color = readL8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R5G5B5A1_UNORM: { auto color = readR5G5B5A1_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R4G4B4A4_UNORM: { auto color = readR4G4B4A4_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_UNORM: { auto color = readR8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_SNORM: { auto color = readR8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UNORM: { auto color = readR8G8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SNORM: { auto color = readR8G8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_FLOAT: { auto color = readR16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A16_FLOAT: { auto color = readA16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16_FLOAT: { auto color = readL16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16A16_FLOAT: { auto color = readL16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_FLOAT: { auto color = readR16G16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_FLOAT: { auto color = readR16G16B16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = readR16G16B16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_FLOAT: { auto color = readR32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A32_FLOAT: { auto color = readA32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32_FLOAT: { auto color = readL32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32A32_FLOAT: { auto color = readL32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_FLOAT: { auto color = readR32G32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_FLOAT: { auto color = readR32G32B32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = readR32G32B32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
     }


 }

 kernel void readFromBufferToIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<int, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
 {
     if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
 # 1364 "./copy_buffer.metal"
     uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);

     switch (kCopyFormatType)
     {
         case FormatID::R8_SINT: { auto color = readR8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SINT: { auto color = readR8G8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SINT: { auto color = readR8G8B8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SINT: { auto color = readR8G8B8A8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SINT: { auto color = readR16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SINT: { auto color = readR16G16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SINT: { auto color = readR16G16B16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SINT: { auto color = readR16G16B16A16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_SINT: { auto color = readR32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_SINT: { auto color = readR32G32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_SINT: { auto color = readR32G32B32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_SINT: { auto color = readR32G32B32A32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
     }


 }

 kernel void readFromBufferToUIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<uint, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
 {
     if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
 # 1392 "./copy_buffer.metal"
     uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);

     switch (kCopyFormatType)
     {
         case FormatID::R8_UINT: { auto color = readR8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UINT: { auto color = readR8G8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UINT: { auto color = readR8G8B8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UINT: { auto color = readR8G8B8A8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UINT: { auto color = readR16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UINT: { auto color = readR16G16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UINT: { auto color = readR16G16B16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UINT: { auto color = readR16G16B16A16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_UINT: { auto color = readR32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_UINT: { auto color = readR32G32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_UINT: { auto color = readR32G32B32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_UINT: { auto color = readR32G32B32A32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
     }


 }


 kernel void writeFromFloatTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<float, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<float, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
 {
     if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
 # 1439 "./copy_buffer.metal"
     uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);

     switch (kCopyFormatType)
     {
         case FormatID::R5G6B5_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G6B5_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R5G5B5A1_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G5B5A1_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R4G4B4A4_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR4G4B4A4_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break;
     }


 }

 kernel void writeFromIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<int, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<int, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
 {
     if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
 # 1464 "./copy_buffer.metal"
     uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);

     switch (kCopyFormatType)
     {
         case FormatID::R8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_SINT(gIndices, options, bufferOffset, color, buffer); } break;
     }


 }

 kernel void writeFromUIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<uint, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<uint, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
 {
     if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
 # 1489 "./copy_buffer.metal"
     uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);

     switch (kCopyFormatType)
     {
         case FormatID::R8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_UINT(gIndices, options, bufferOffset, color, buffer); } break;
     }


 }


 struct CopyVertexParams
 {
     uint srcBufferStartOffset;
     uint srcStride;
     uint srcComponentBytes;
     uint srcComponents;


     uchar4 srcDefaultAlphaData;

     uint dstBufferStartOffset;
     uint dstStride;
     uint dstComponents;

     uint vertexCount;
 };
 # 1539 "./copy_buffer.metal"
 template <typename IntType>
 static inline void writeFloatVertex(constant CopyVertexParams &options,
                                     uint idx,
                                     vec<IntType, 4> data,
                                     device uchar *dst)
 {
     uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;

     for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
     {
         floatToBytes(static_cast<float>(data[component]), dstOffset, dst);
     }
 }

 template <>
 inline void writeFloatVertex(constant CopyVertexParams &options,
                              uint idx,
                              vec<float, 4> data,
                              device uchar *dst)
 {
     uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;

     for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
     {
         floatToBytes(data[component], dstOffset, dst);
     }
 }


 static inline void convertToFloatVertexFormat(uint index,
                                               constant CopyVertexParams &options,
                                               constant uchar *srcBuffer,
                                               device uchar *dstBuffer)
 {
 # 1585 "./copy_buffer.metal"
     uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index;
 # 1594 "./copy_buffer.metal"
     switch (kCopyFormatType)
     {
         case FormatID::R8_UNORM: { auto data = readR8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SNORM: { auto data = readR8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_UINT: { auto data = readR8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SINT: { auto data = readR8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_USCALED: { auto data = readR8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SSCALED: { auto data = readR8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UNORM: { auto data = readR8G8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SNORM: { auto data = readR8G8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UINT: { auto data = readR8G8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SINT: { auto data = readR8G8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_USCALED: { auto data = readR8G8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SSCALED: { auto data = readR8G8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UNORM: { auto data = readR8G8B8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SNORM: { auto data = readR8G8B8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UINT: { auto data = readR8G8B8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SINT: { auto data = readR8G8B8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_USCALED: { auto data = readR8G8B8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SSCALED: { auto data = readR8G8B8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UNORM: { auto data = readR8G8B8A8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SNORM: { auto data = readR8G8B8A8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UINT: { auto data = readR8G8B8A8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SINT: { auto data = readR8G8B8A8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_USCALED: { auto data = readR8G8B8A8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SSCALED: { auto data = readR8G8B8A8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UNORM: { auto data = readR16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SNORM: { auto data = readR16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UINT: { auto data = readR16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SINT: { auto data = readR16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_USCALED: { auto data = readR16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SSCALED: { auto data = readR16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UNORM: { auto data = readR16G16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SNORM: { auto data = readR16G16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UINT: { auto data = readR16G16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SINT: { auto data = readR16G16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_USCALED: { auto data = readR16G16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SSCALED: { auto data = readR16G16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UNORM: { auto data = readR16G16B16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SNORM: { auto data = readR16G16B16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UINT: { auto data = readR16G16B16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SINT: { auto data = readR16G16B16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_USCALED: { auto data = readR16G16B16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SSCALED: { auto data = readR16G16B16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UNORM: { auto data = readR16G16B16A16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SNORM: { auto data = readR16G16B16A16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UINT: { auto data = readR16G16B16A16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SINT: { auto data = readR16G16B16A16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_USCALED: { auto data = readR16G16B16A16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SSCALED: { auto data = readR16G16B16A16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UNORM: { auto data = readR32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SNORM: { auto data = readR32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UINT: { auto data = readR32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SINT: { auto data = readR32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_USCALED: { auto data = readR32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SSCALED: { auto data = readR32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UNORM: { auto data = readR32G32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SNORM: { auto data = readR32G32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UINT: { auto data = readR32G32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SINT: { auto data = readR32G32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_USCALED: { auto data = readR32G32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SSCALED: { auto data = readR32G32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UNORM: { auto data = readR32G32B32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SNORM: { auto data = readR32G32B32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UINT: { auto data = readR32G32B32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SINT: { auto data = readR32G32B32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_USCALED: { auto data = readR32G32B32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SSCALED: { auto data = readR32G32B32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UNORM: { auto data = readR32G32B32A32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SNORM: { auto data = readR32G32B32A32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UINT: { auto data = readR32G32B32A32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SINT: { auto data = readR32G32B32A32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_USCALED: { auto data = readR32G32B32A32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SSCALED: { auto data = readR32G32B32A32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_FLOAT: { auto data = readR16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_FLOAT: { auto data = readR16G16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_FLOAT: { auto data = readR16G16B16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto data = readR16G16B16A16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FLOAT: { auto data = readR32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FLOAT: { auto data = readR32G32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FLOAT: { auto data = readR32G32B32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto data = readR32G32B32A32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FIXED: { auto data = readR32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FIXED: { auto data = readR32G32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FIXED: { auto data = readR32G32B32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FIXED: { auto data = readR32G32B32A32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SINT: { auto data = readR10G10B10A2_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_UINT: { auto data = readR10G10B10A2_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SSCALED: { auto data = readR10G10B10A2_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_USCALED: { auto data = readR10G10B10A2_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break;
     }


 }


 kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]],
                                          constant CopyVertexParams &options [[buffer(0)]],
                                          constant uchar *srcBuffer [[buffer(1)]],
                                          device uchar *dstBuffer [[buffer(2)]])
 {
     if (index >= options.vertexCount) { return; };
     convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
 }


 vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]],
                                          constant CopyVertexParams &options [[buffer(0)]],
                                          constant uchar *srcBuffer [[buffer(1)]],
                                          device uchar *dstBuffer [[buffer(2)]])
 {
     convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
 }


 static inline void expandVertexFormatComponents(uint index,
                                                 constant CopyVertexParams &options,
                                                 constant uchar *srcBuffer,
                                                 device uchar *dstBuffer)
 {
     uint srcOffset = options.srcBufferStartOffset + options.srcStride * index;
     uint dstOffset = options.dstBufferStartOffset + options.dstStride * index;

     uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u);
     uint component;
     for (component = 0; component < options.srcComponents; ++component,
         srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes)
     {
         for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
         {
             dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte];
         }
     }

     for (; component < dstComponentsBeforeAlpha;
          ++component, dstOffset += options.srcComponentBytes)
     {
         for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
         {
             dstBuffer[dstOffset + byte] = 0;
         }
     }

     if (component < options.dstComponents)
     {

         for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
         {
             dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte];
         }
     }
 }


 kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]],
                                            constant CopyVertexParams &options [[buffer(0)]],
                                            constant uchar *srcBuffer [[buffer(1)]],
                                            device uchar *dstBuffer [[buffer(2)]])
 {
     if (index >= options.vertexCount) { return; };

     expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
 }


 vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]],
                                            constant CopyVertexParams &options [[buffer(0)]],
                                            constant uchar *srcBuffer [[buffer(1)]],
                                            device uchar *dstBuffer [[buffer(2)]])
 {
     expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
 }
 # 6 "temp_master_source.metal" 2
 # 1 "./visibility.metal" 1


 constant bool kCombineWithExistingResult [[function_constant(1000)]];


 struct CombineVisibilityResultOptions
 {

     uint startOffset;

     uint numOffsets;
 };

 kernel void combineVisibilityResult(uint idx [[thread_position_in_grid]],
                                     constant CombineVisibilityResultOptions &options [[buffer(0)]],
                                     constant ushort4 *renderpassVisibilityResult [[buffer(1)]],
                                     device ushort4 *finalResults [[buffer(2)]])
 {
     if (idx > 0)
     {


         return;
     }
     ushort4 finalResult16x4;

     if (kCombineWithExistingResult)
     {
         finalResult16x4 = finalResults[0];
     }
     else
     {
         finalResult16x4 = ushort4(0, 0, 0, 0);
     }

     for (uint i = 0; i < options.numOffsets; ++i)
     {
         uint offset = options.startOffset + i;
         ushort4 renderpassResult = renderpassVisibilityResult[offset];


         finalResult16x4 = finalResult16x4 | renderpassResult;
     }
     finalResults[0] = finalResult16x4;
 }
 # 7 "temp_master_source.metal" 2
 # 1 "./rewrite_indices.metal" 1
 # 11 "./rewrite_indices.metal"
 # 1 "./rewrite_indices_shared.h" 1
 # 12 "./rewrite_indices.metal" 2
 using namespace metal;

 constant uint fixIndexBufferKey [[ function_constant(2000) ]];
 constant bool indexBufferIsUint16 = (((fixIndexBufferKey >> 0U) & 0x03U) == 2U);
 constant bool indexBufferIsUint32 = (((fixIndexBufferKey >> 0U) & 0x03U) == 3U);
 constant bool outIndexBufferIsUint16 = (((fixIndexBufferKey >> 2U) & 0x03U) == 2U);
 constant bool outIndexBufferIsUint32 = (((fixIndexBufferKey >> 2U) & 0x03U) == 3U);
 constant bool doPrimRestart = (fixIndexBufferKey & 0x00100U);
 constant uint fixIndexBufferMode = (fixIndexBufferKey >> 4U) & 0x0FU;


 static inline uint readIdx(
                            const device ushort *indexBufferUint16,
                            const device uint *indexBufferUint32,
                            const uint restartIndex,
                            const uint indexCount,
                            uint idx,
                            thread bool &foundRestart,
                            thread uint &indexThatRestartedFirst
                            )
 {
     uint inIndex = idx;
     if(inIndex < indexCount)
     {
         if(indexBufferIsUint16)
         {
             inIndex = indexBufferUint16[inIndex];
         }
         else if(indexBufferIsUint32)
         {
             inIndex = indexBufferUint32[inIndex];
         }
     }
     else
     {
         foundRestart = true;
         indexThatRestartedFirst = idx;
     }
     if(doPrimRestart && !foundRestart && inIndex == restartIndex)
     {
         foundRestart = true;
         indexThatRestartedFirst = idx;
     }
     return inIndex;
 }

 static inline void outputPrimitive(
                                    const device ushort *indexBufferUint16,
                                    const device uint *indexBufferUint32,
                                    device ushort *outIndexBufferUint16,
                                    device uint *outIndexBufferUint32,
                                    const uint restartIndex,
                                    const uint indexCount,
                                    thread uint &baseIndex,
                                    uint onIndex,
                                    thread uint &onOutIndex
                                    )
 {
     if(baseIndex > onIndex) return;
     bool foundRestart = false;
     uint indexThatRestartedFirst = 0;
 # 86 "./rewrite_indices.metal"
     switch(fixIndexBufferMode)
     {
         case 0x00U:
         {
             auto tmpIndex = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex, foundRestart, indexThatRestartedFirst);
             if(foundRestart)
             {
                 baseIndex = indexThatRestartedFirst + 1;
                 return;
             }

             ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex; } onOutIndex++; });
         }
         break;
         case 0x01U:
         {
             auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst);
             auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst);
             if(foundRestart)
             {
                 baseIndex = indexThatRestartedFirst + 1;
                 return;
             }
             if((onIndex - baseIndex) & 1) return;

             if(fixIndexBufferKey & 0x00200U)
             {
                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
             }
             else
             {
                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
             }
         }
         break;
         case 0x03U:
         {
             auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst);
             auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst);
             if(foundRestart)
             {
                 baseIndex = indexThatRestartedFirst + 1;
                 return;
             }

             if(fixIndexBufferKey & 0x00200U)
             {
                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
             }
             else
             {
                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
             }
         }
         break;
         case 0x04U:
         {
             auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst);
             auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst);
             auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst);
             if(foundRestart)
             {
                 baseIndex = indexThatRestartedFirst + 1;
                 return;
             }
             if(((onIndex - baseIndex) % 3) != 0) return;

             if(fixIndexBufferKey & 0x00200U)
             {
                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
             }
             else
             {
                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
             }
         }
         break;
         case 0x05U:
         {
             uint isOdd = ((onIndex - baseIndex) & 1);
             auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0 + isOdd, foundRestart, indexThatRestartedFirst);
             auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1 - isOdd, foundRestart, indexThatRestartedFirst);
             auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst);
             if(foundRestart)
             {
                 baseIndex = indexThatRestartedFirst + 1;
                 return;
             }

             if(fixIndexBufferKey & 0x00200U)
             {
                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
                 ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
             }
             else
             {

                 if(isOdd)
                 {
                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
                 }
                 else
                 {
                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
                     ({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
                 }
             }

             assert(onOutIndex <= (onIndex + 1) * 3);
             assert(onOutIndex <= (indexCount - 2) * 3);
         }
         break;

     }


 }

 kernel void fixIndexBuffer(
                            const device ushort *indexBufferUint16 [[ buffer(0), function_constant(indexBufferIsUint16) ]],
                            const device uint *indexBufferUint32 [[ buffer(0), function_constant(indexBufferIsUint32) ]],
                            device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]],
                            device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]],
                            constant uint &indexCount [[ buffer(2) ]],
                            constant uint &primCount [[ buffer(3) ]],
                            uint prim [[thread_position_in_grid]])
 {
     constexpr uint restartIndex = 0xFFFFFFFF;
     uint baseIndex = 0;
     uint onIndex = onIndex;
     uint onOutIndex = onOutIndex;
     if(prim < primCount)
     {
         switch(fixIndexBufferMode)
         {
             case 0x00U:
                 onIndex = prim;
                 onOutIndex = prim;
                 break;
             case 0x01U:
                 onIndex = prim * 2 + 0;
                 onOutIndex = prim * 2 + 0;
                 break;
             case 0x03U:
                 onIndex = prim;
                 onOutIndex = prim * 2 + 0;
                 break;
             case 0x04U:
                 onIndex = prim * 3 + 0;
                 onOutIndex = prim * 3 + 0;
                 break;
             case 0x05U:
                 onIndex = prim;
                 onOutIndex = prim * 3 + 0;
                 break;
         }
         outputPrimitive(indexBufferUint16, indexBufferUint32, outIndexBufferUint16, outIndexBufferUint32, restartIndex, indexCount, baseIndex, onIndex, onOutIndex);
     }
 }
 # 8 "temp_master_source.metal" 2