blob: dfde096c0a9c7905b57a46d621f86c4f4b21974e [file] [log] [blame]
// GENERATED FILE - DO NOT EDIT.
// Generated by gen_mtl_internal_shaders.py
//
// Copyright 2020 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Metal version of combined Metal default shaders.
# 1 "temp_master_source.metal"
# 1 "<built-in>" 1
# 1 "<built-in>" 3
# 396 "<built-in>" 3
# 1 "<command line>" 1
# 1 "<built-in>" 2
# 1 "temp_master_source.metal" 2
# 1 "./blit.metal" 1
# 1 "./common.h" 1
# 13 "./common.h"
# include <simd/simd.h>
# include <metal_stdlib>
# 1 "./constants.h" 1
# 11 "./constants.h"
namespace rx
{
namespace mtl_shader
{
enum
{
kTextureType2D = 0,
kTextureType2DMultisample = 1,
kTextureType2DArray = 2,
kTextureTypeCube = 3,
kTextureType3D = 4,
kTextureTypeCount = 5,
};
}
}
# 18 "./common.h" 2
using namespace metal;
constant uint32_t kNumColorOutputs [[function_constant(0)]];
constant bool kColorOutputAvailable0 = kNumColorOutputs > 0;
constant bool kColorOutputAvailable1 = kNumColorOutputs > 1;
constant bool kColorOutputAvailable2 = kNumColorOutputs > 2;
constant bool kColorOutputAvailable3 = kNumColorOutputs > 3;
namespace rx
{
namespace mtl_shader
{
constant float2 gCorners[3] = {float2(-1.0f, -1.0f), float2(3.0f, -1.0f), float2(-1.0f, 3.0f)};
template <typename T>
struct MultipleColorOutputs
{
vec<T, 4> color0 [[color(0), function_constant(kColorOutputAvailable0)]];
vec<T, 4> color1 [[color(1), function_constant(kColorOutputAvailable1)]];
vec<T, 4> color2 [[color(2), function_constant(kColorOutputAvailable2)]];
vec<T, 4> color3 [[color(3), function_constant(kColorOutputAvailable3)]];
};
# 61 "./common.h"
template <typename T>
static inline MultipleColorOutputs<T> toMultipleColorOutputs(vec<T, 4> color)
{
MultipleColorOutputs<T> re;
do { if (kColorOutputAvailable0) { re.color0 = color; } } while (0);
do { if (kColorOutputAvailable1) { re.color1 = color; } } while (0);
do { if (kColorOutputAvailable2) { re.color2 = color; } } while (0);
do { if (kColorOutputAvailable3) { re.color3 = color; } } while (0);
return re;
}
static inline float3 cubeTexcoords(float2 texcoords, int face)
{
texcoords = 2.0 * texcoords - 1.0;
switch (face)
{
case 0:
return float3(1.0, -texcoords.y, -texcoords.x);
case 1:
return float3(-1.0, -texcoords.y, texcoords.x);
case 2:
return float3(texcoords.x, 1.0, texcoords.y);
case 3:
return float3(texcoords.x, -1.0, -texcoords.y);
case 4:
return float3(texcoords.x, -texcoords.y, 1.0);
case 5:
return float3(-texcoords.x, -texcoords.y, -1.0);
}
return float3(texcoords, 0);
}
template <typename T>
static inline vec<T, 4> resolveTextureMS(texture2d_ms<T> srcTexture, uint2 coords)
{
uint samples = srcTexture.get_num_samples();
vec<T, 4> output(0);
for (uint sample = 0; sample < samples; ++sample)
{
output += srcTexture.read(coords, sample);
}
output = output / samples;
return output;
}
static inline float4 sRGBtoLinear(float4 color)
{
float3 linear1 = color.rgb / 12.92;
float3 linear2 = pow((color.rgb + float3(0.055)) / 1.055, 2.4);
float3 factor = float3(color.rgb <= float3(0.04045));
float4 linear = float4(factor * linear1 + float3(1.0 - factor) * linear2, color.a);
return linear;
}
static inline float linearToSRGB(float color)
{
if (color <= 0.0f)
return 0.0f;
else if (color < 0.0031308f)
return 12.92f * color;
else if (color < 1.0f)
return 1.055f * pow(color, 0.41666f) - 0.055f;
else
return 1.0f;
}
static inline float4 linearToSRGB(float4 color)
{
return float4(linearToSRGB(color.r), linearToSRGB(color.g), linearToSRGB(color.b), color.a);
}
template <typename Short>
static inline Short bytesToShort(constant uchar *input, uint offset)
{
Short inputLo = input[offset];
Short inputHi = input[offset + 1];
return inputLo | (inputHi << 8);
}
template <typename Int>
static inline Int bytesToInt(constant uchar *input, uint offset)
{
Int input0 = input[offset];
Int input1 = input[offset + 1];
Int input2 = input[offset + 2];
Int input3 = input[offset + 3];
return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24);
}
template <typename Short>
static inline void shortToBytes(Short val, uint offset, device uchar *output)
{
ushort valUnsigned = as_type<ushort>(val);
output[offset] = valUnsigned & 0xff;
output[offset + 1] = (valUnsigned >> 8) & 0xff;
}
template <typename Int>
static inline void intToBytes(Int val, uint offset, device uchar *output)
{
uint valUnsigned = as_type<uint>(val);
output[offset] = valUnsigned & 0xff;
output[offset + 1] = (valUnsigned >> 8) & 0xff;
output[offset + 2] = (valUnsigned >> 16) & 0xff;
output[offset + 3] = (valUnsigned >> 24) & 0xff;
}
static inline void floatToBytes(float val, uint offset, device uchar *output)
{
intToBytes(as_type<uint>(val), offset, output);
}
static inline void int24bitToBytes(uint val, uint offset, device uchar *output)
{
output[offset] = val & 0xff;
output[offset + 1] = (val >> 8) & 0xff;
output[offset + 2] = (val >> 16) & 0xff;
}
template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
static inline T getShiftedData(T input)
{
static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
"T must have at least as many bits as inputBitCount + inputBitStart.");
const T mask = (1 << inputBitCount) - 1;
return (input >> inputBitStart) & mask;
}
template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
static inline T shiftData(T input)
{
static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
"T must have at least as many bits as inputBitCount + inputBitStart.");
const T mask = (1 << inputBitCount) - 1;
return (input & mask) << inputBitStart;
}
template <unsigned int inputBitCount, typename T>
static inline float normalizedToFloat(T input)
{
static_assert(inputBitCount <= (sizeof(T) * 8),
"T must have more bits than or same bits as inputBitCount.");
static_assert(inputBitCount <= 23, "Only single precision is supported");
constexpr float inverseMax = 1.0f / ((1 << inputBitCount) - 1);
return input * inverseMax;
}
template <typename T>
static inline float normalizedToFloat(T input)
{
return normalizedToFloat<sizeof(T) * 8, T>(input);
}
template <>
inline float normalizedToFloat(short input)
{
constexpr float inverseMax = 1.0f / 0x7fff;
return static_cast<float>(input) * inverseMax;
}
template <>
inline float normalizedToFloat(int input)
{
constexpr float inverseMax = 1.0f / 0x7fffffff;
return static_cast<float>(input) * inverseMax;
}
template <>
inline float normalizedToFloat(uint input)
{
constexpr float inverseMax = 1.0f / 0xffffffff;
return static_cast<float>(input) * inverseMax;
}
template <unsigned int outputBitCount, typename T>
static inline T floatToNormalized(float input)
{
static_assert(outputBitCount <= (sizeof(T) * 8),
"T must have more bits than or same bits as inputBitCount.");
static_assert(outputBitCount <= 23, "Only single precision is supported");
return static_cast<T>(((1 << outputBitCount) - 1) * input + 0.5f);
}
template <typename T>
static inline T floatToNormalized(float input)
{
return floatToNormalized<sizeof(T) * 8, T>(input);
}
}
}
# 9 "./blit.metal" 2
using namespace rx::mtl_shader;
constant bool kPremultiplyAlpha [[function_constant(1)]];
constant bool kUnmultiplyAlpha [[function_constant(2)]];
constant int kSourceTextureType [[function_constant(3)]];
constant int kSourceTexture2Type [[function_constant(4)]];
constant bool kSourceTextureType2D = kSourceTextureType == kTextureType2D;
constant bool kSourceTextureType2DArray = kSourceTextureType == kTextureType2DArray;
constant bool kSourceTextureType2DMS = kSourceTextureType == kTextureType2DMultisample;
constant bool kSourceTextureTypeCube = kSourceTextureType == kTextureTypeCube;
constant bool kSourceTextureType3D = kSourceTextureType == kTextureType3D;
constant bool kSourceTexture2Type2D = kSourceTexture2Type == kTextureType2D;
constant bool kSourceTexture2Type2DArray = kSourceTexture2Type == kTextureType2DArray;
constant bool kSourceTexture2Type2DMS = kSourceTexture2Type == kTextureType2DMultisample;
constant bool kSourceTexture2TypeCube = kSourceTexture2Type == kTextureTypeCube;
struct BlitParams
{
float2 srcTexCoords[3];
int srcLevel;
int srcLayer;
bool dstFlipViewportX;
bool dstFlipViewportY;
bool dstLuminance;
uint8_t padding[13];
};
struct BlitVSOut
{
float4 position [[position]];
float2 texCoords [[user(locn1)]];
};
vertex BlitVSOut blitVS(unsigned int vid [[vertex_id]], constant BlitParams &options [[buffer(0)]])
{
BlitVSOut output;
output.position = float4(gCorners[vid], 0.0, 1.0);
output.texCoords = options.srcTexCoords[vid];
if (options.dstFlipViewportX)
{
output.position.x = -output.position.x;
}
if (!options.dstFlipViewportY)
{
output.position.y = -output.position.y;
}
return output;
}
template <typename SrcTexture2d>
static uint2 getImageCoords(SrcTexture2d srcTexture, float2 texCoords)
{
uint2 dimens(srcTexture.get_width(), srcTexture.get_height());
uint2 coords = uint2(texCoords * float2(dimens));
return coords;
}
template <typename T>
static inline vec<T, 4> blitSampleTextureMS(texture2d_ms<T> srcTexture, float2 texCoords)
{
uint2 coords = getImageCoords(srcTexture, texCoords);
return resolveTextureMS(srcTexture, coords);
}
template <typename T>
static inline vec<T, 4> blitSampleTexture3D(texture3d<T> srcTexture,
sampler textureSampler,
float2 texCoords,
constant BlitParams &options)
{
uint depth = srcTexture.get_depth(options.srcLevel);
float zCoord = (float(options.srcLayer) + 0.5) / float(depth);
return srcTexture.sample(textureSampler, float3(texCoords, zCoord), level(options.srcLevel));
}
# 112 "./blit.metal"
template <typename T>
static inline vec<T, 4> blitReadTexture(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
{
vec<T, 4> output;
switch (kSourceTextureType)
{
case kTextureType2D:
output = srcTexture2d.sample(textureSampler, input.texCoords, level(options.srcLevel));
break;
case kTextureType2DArray:
output = srcTexture2dArray.sample(textureSampler, input.texCoords, options.srcLayer,
level(options.srcLevel));
break;
case kTextureType2DMultisample:
output = blitSampleTextureMS(srcTexture2dMS, input.texCoords);
break;
case kTextureTypeCube:
output = srcTextureCube.sample(textureSampler,
cubeTexcoords(input.texCoords, options.srcLayer),
level(options.srcLevel));
break;
case kTextureType3D:
output = blitSampleTexture3D(srcTexture3d, textureSampler, input.texCoords, options);
break;
}
if (kPremultiplyAlpha)
{
output.xyz *= output.a;
}
else if (kUnmultiplyAlpha)
{
if (output.a != 0.0)
{
output.xyz /= output.a;
}
}
if (options.dstLuminance)
{
output.g = output.b = output.r;
}
return output;
}
template <typename T>
static inline MultipleColorOutputs<T> blitFS(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
{
vec<T, 4> output = blitReadTexture(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
return toMultipleColorOutputs(output);
}
fragment MultipleColorOutputs<float> blitFloatFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
{
return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
}
fragment MultipleColorOutputs<int> blitIntFS(BlitVSOut input [[stage_in]], texture2d<int> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<int> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<int> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<int> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<int> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
{
return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
}
fragment MultipleColorOutputs<uint> blitUIntFS(BlitVSOut input [[stage_in]], texture2d<uint> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<uint> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<uint> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<uint> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<uint> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
{
return blitFS(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
}
fragment MultipleColorOutputs<uint> copyTextureFloatToUIntFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
{
float4 inputColor = blitReadTexture<>(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
uint4 output = uint4(inputColor * float4(255.0));
return toMultipleColorOutputs(output);
}
struct FragmentDepthOut
{
float depth [[depth(any)]];
};
static inline float sampleDepth(
texture2d<float> srcTexture2d [[function_constant(kSourceTextureType2D)]],
texture2d_array<float> srcTexture2dArray [[function_constant(kSourceTextureType2DArray)]],
texture2d_ms<float> srcTexture2dMS [[function_constant(kSourceTextureType2DMS)]],
texturecube<float> srcTextureCube [[function_constant(kSourceTextureTypeCube)]],
float2 texCoords,
constant BlitParams &options)
{
float4 output;
constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest);
switch (kSourceTextureType)
{
case kTextureType2D:
output = srcTexture2d.sample(textureSampler, texCoords, level(options.srcLevel));
break;
case kTextureType2DArray:
output = srcTexture2dArray.sample(textureSampler, texCoords, options.srcLayer,
level(options.srcLevel));
break;
case kTextureType2DMultisample:
output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0);
break;
case kTextureTypeCube:
output =
srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, options.srcLayer),
level(options.srcLevel));
break;
}
return output.r;
}
fragment FragmentDepthOut blitDepthFS(BlitVSOut input [[stage_in]],
texture2d<float> srcTexture2d
[[texture(0), function_constant(kSourceTextureType2D)]],
texture2d_array<float> srcTexture2dArray
[[texture(0), function_constant(kSourceTextureType2DArray)]],
texture2d_ms<float> srcTexture2dMS
[[texture(0), function_constant(kSourceTextureType2DMS)]],
texturecube<float> srcTextureCube
[[texture(0), function_constant(kSourceTextureTypeCube)]],
constant BlitParams &options [[buffer(0)]])
{
FragmentDepthOut re;
re.depth = sampleDepth(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
input.texCoords, options);
return re;
}
static inline uint32_t sampleStencil(
texture2d<uint32_t> srcTexture2d [[function_constant(kSourceTexture2Type2D)]],
texture2d_array<uint32_t> srcTexture2dArray [[function_constant(kSourceTexture2Type2DArray)]],
texture2d_ms<uint32_t> srcTexture2dMS [[function_constant(kSourceTexture2Type2DMS)]],
texturecube<uint32_t> srcTextureCube [[function_constant(kSourceTexture2TypeCube)]],
float2 texCoords,
int srcLevel,
int srcLayer)
{
uint4 output;
constexpr sampler textureSampler(mag_filter::nearest, min_filter::nearest);
switch (kSourceTexture2Type)
{
case kTextureType2D:
output = srcTexture2d.sample(textureSampler, texCoords, level(srcLevel));
break;
case kTextureType2DArray:
output = srcTexture2dArray.sample(textureSampler, texCoords, srcLayer, level(srcLevel));
break;
case kTextureType2DMultisample:
output = srcTexture2dMS.read(getImageCoords(srcTexture2dMS, texCoords), 0);
break;
case kTextureTypeCube:
output = srcTextureCube.sample(textureSampler, cubeTexcoords(texCoords, srcLayer),
level(srcLevel));
break;
}
return output.r;
}
struct BlitStencilToBufferParams
{
float2 srcStartTexCoords;
float2 srcTexCoordSteps;
int srcLevel;
int srcLayer;
uint2 dstSize;
uint dstBufferRowPitch;
bool resolveMS;
};
kernel void blitStencilToBufferCS(ushort2 gIndices [[thread_position_in_grid]],
texture2d<uint32_t> srcTexture2d
[[texture(1), function_constant(kSourceTexture2Type2D)]],
texture2d_array<uint32_t> srcTexture2dArray
[[texture(1), function_constant(kSourceTexture2Type2DArray)]],
texture2d_ms<uint32_t> srcTexture2dMS
[[texture(1), function_constant(kSourceTexture2Type2DMS)]],
texturecube<uint32_t> srcTextureCube
[[texture(1), function_constant(kSourceTexture2TypeCube)]],
constant BlitStencilToBufferParams &options [[buffer(0)]],
device uchar *buffer [[buffer(1)]])
{
if (gIndices.x >= options.dstSize.x || gIndices.y >= options.dstSize.y)
{
return;
}
float2 srcTexCoords = options.srcStartTexCoords + float2(gIndices) * options.srcTexCoordSteps;
if (kSourceTexture2Type == kTextureType2DMultisample && !options.resolveMS)
{
uint samples = srcTexture2dMS.get_num_samples();
uint2 imageCoords = getImageCoords(srcTexture2dMS, srcTexCoords);
uint bufferOffset = options.dstBufferRowPitch * gIndices.y + samples * gIndices.x;
for (uint sample = 0; sample < samples; ++sample)
{
uint stencilPerSample = srcTexture2dMS.read(imageCoords, sample).r;
buffer[bufferOffset + sample] = static_cast<uchar>(stencilPerSample);
}
}
else
{
uint32_t stencil =
sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
srcTexCoords, options.srcLevel, options.srcLayer);
buffer[options.dstBufferRowPitch * gIndices.y + gIndices.x] = static_cast<uchar>(stencil);
}
}
#if __METAL_VERSION__ >= 210
struct FragmentStencilOut
{
uint32_t stencil [[stencil]];
};
struct FragmentDepthStencilOut
{
float depth [[depth(any)]];
uint32_t stencil [[stencil]];
};
fragment FragmentStencilOut blitStencilFS(
BlitVSOut input [[stage_in]],
texture2d<uint32_t> srcTexture2d [[texture(1), function_constant(kSourceTexture2Type2D)]],
texture2d_array<uint32_t> srcTexture2dArray
[[texture(1), function_constant(kSourceTexture2Type2DArray)]],
texture2d_ms<uint32_t> srcTexture2dMS
[[texture(1), function_constant(kSourceTexture2Type2DMS)]],
texturecube<uint32_t> srcTextureCube [[texture(1), function_constant(kSourceTexture2TypeCube)]],
constant BlitParams &options [[buffer(0)]])
{
FragmentStencilOut re;
re.stencil = sampleStencil(srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube,
input.texCoords, options.srcLevel, options.srcLayer);
return re;
}
fragment FragmentDepthStencilOut blitDepthStencilFS(
BlitVSOut input [[stage_in]],
texture2d<float> srcDepthTexture2d [[texture(0), function_constant(kSourceTextureType2D)]],
texture2d_array<float> srcDepthTexture2dArray
[[texture(0), function_constant(kSourceTextureType2DArray)]],
texture2d_ms<float> srcDepthTexture2dMS
[[texture(0), function_constant(kSourceTextureType2DMS)]],
texturecube<float> srcDepthTextureCube
[[texture(0), function_constant(kSourceTextureTypeCube)]],
texture2d<uint32_t> srcStencilTexture2d
[[texture(1), function_constant(kSourceTexture2Type2D)]],
texture2d_array<uint32_t> srcStencilTexture2dArray
[[texture(1), function_constant(kSourceTexture2Type2DArray)]],
texture2d_ms<uint32_t> srcStencilTexture2dMS
[[texture(1), function_constant(kSourceTexture2Type2DMS)]],
texturecube<uint32_t> srcStencilTextureCube
[[texture(1), function_constant(kSourceTexture2TypeCube)]],
constant BlitParams &options [[buffer(0)]])
{
FragmentDepthStencilOut re;
re.depth = sampleDepth(srcDepthTexture2d, srcDepthTexture2dArray, srcDepthTexture2dMS,
srcDepthTextureCube, input.texCoords, options);
re.stencil =
sampleStencil(srcStencilTexture2d, srcStencilTexture2dArray, srcStencilTexture2dMS,
srcStencilTextureCube, input.texCoords, options.srcLevel, options.srcLayer);
return re;
}
#endif
# 2 "temp_master_source.metal" 2
# 1 "./clear.metal" 1
# 10 "./clear.metal"
using namespace rx::mtl_shader;
struct ClearParams
{
float4 clearColor;
float clearDepth;
};
vertex float4 clearVS(unsigned int vid [[ vertex_id ]],
constant ClearParams &clearParams [[buffer(0)]])
{
return float4(gCorners[vid], clearParams.clearDepth, 1.0);
}
fragment MultipleColorOutputs<float> clearFloatFS(constant ClearParams &clearParams [[buffer(0)]])
{
return toMultipleColorOutputs(clearParams.clearColor);
}
fragment MultipleColorOutputs<int> clearIntFS(constant ClearParams &clearParams [[buffer(0)]])
{
return toMultipleColorOutputs(as_type<int4>(clearParams.clearColor));
}
fragment MultipleColorOutputs<uint> clearUIntFS(constant ClearParams &clearParams [[buffer(0)]])
{
return toMultipleColorOutputs(as_type<uint4>(clearParams.clearColor));
}
# 3 "temp_master_source.metal" 2
# 1 "./gen_indices.metal" 1
using namespace rx::mtl_shader;
constant bool kSourceBufferAligned[[function_constant(100)]];
constant bool kSourceIndexIsU8[[function_constant(200)]];
constant bool kSourceIndexIsU16[[function_constant(300)]];
constant bool kSourceIndexIsU32[[function_constant(400)]];
constant bool kSourceBufferUnaligned = !kSourceBufferAligned;
constant bool kUseSourceBufferU8 = kSourceIndexIsU8 || kSourceBufferUnaligned;
constant bool kUseSourceBufferU16 = kSourceIndexIsU16 && kSourceBufferAligned;
constant bool kUseSourceBufferU32 = kSourceIndexIsU32 && kSourceBufferAligned;
struct IndexConversionParams
{
uint32_t srcOffset;
uint32_t indexCount;
bool primitiveRestartEnabled;
};
inline ushort getIndexAligned(constant ushort *inputAligned, uint offset, uint idx)
{
return inputAligned[offset / 2 + idx];
}
inline uint getIndexAligned(constant uint *inputAligned, uint offset, uint idx)
{
return inputAligned[offset / 4 + idx];
}
inline uchar getIndexAligned(constant uchar *input, uint offset, uint idx)
{
return input[offset + idx];
}
inline ushort getIndexUnalignedU16(constant uchar *input, uint offset, uint idx)
{
ushort inputLo = input[offset + 2 * idx];
ushort inputHi = input[offset + 2 * idx + 1];
return inputLo | (inputHi << 8);
}
inline uint getIndexUnalignedU32(constant uchar *input, uint offset, uint idx)
{
uint input0 = input[offset + 4 * idx];
uint input1 = input[offset + 4 * idx + 1];
uint input2 = input[offset + 4 * idx + 2];
uint input3 = input[offset + 4 * idx + 3];
return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24);
}
kernel void convertIndexU8ToU16(uint idx [[thread_position_in_grid]],
constant IndexConversionParams &options [[buffer(0)]],
constant uchar *input [[buffer(1)]],
device ushort *output [[buffer(2)]])
{
if (idx >= options.indexCount) { return; };
uchar value = getIndexAligned(input, options.srcOffset, idx);
if (options.primitiveRestartEnabled && value == 0xff)
{
output[idx] = 0xffff;
}
else
{
output[idx] = value;
}
}
kernel void convertIndexU16(uint idx [[thread_position_in_grid]],
constant IndexConversionParams &options [[buffer(0)]],
constant uchar *input
[[buffer(1), function_constant(kSourceBufferUnaligned)]],
constant ushort *inputAligned
[[buffer(1), function_constant(kSourceBufferAligned)]],
device ushort *output [[buffer(2)]])
{
if (idx >= options.indexCount) { return; };
ushort value;
if (kSourceBufferAligned)
{
value = getIndexAligned(inputAligned, options.srcOffset, idx);
}
else
{
value = getIndexUnalignedU16(input, options.srcOffset, idx);
}
output[idx] = value;
}
kernel void convertIndexU32(uint idx [[thread_position_in_grid]],
constant IndexConversionParams &options [[buffer(0)]],
constant uchar *input
[[buffer(1), function_constant(kSourceBufferUnaligned)]],
constant uint *inputAligned
[[buffer(1), function_constant(kSourceBufferAligned)]],
device uint *output [[buffer(2)]])
{
if (idx >= options.indexCount) { return; };
uint value;
if (kSourceBufferAligned)
{
value = getIndexAligned(inputAligned, options.srcOffset, idx);
}
else
{
value = getIndexUnalignedU32(input, options.srcOffset, idx);
}
output[idx] = value;
}
struct IndexFromArrayParams
{
uint firstVertex;
uint vertexCount;
};
kernel void genTriFanIndicesFromArray(uint idx [[thread_position_in_grid]],
constant IndexFromArrayParams &options [[buffer(0)]],
device uint *output [[buffer(2)]])
{
if (idx >= options.vertexCount) { return; };
uint vertexIdx = options.firstVertex + 2 + idx;
output[3 * idx] = options.firstVertex;
output[3 * idx + 1] = vertexIdx - 1;
output[3 * idx + 2] = vertexIdx;
}
inline uint getIndexU32(uint offset,
uint idx,
constant uchar *inputU8 [[function_constant(kUseSourceBufferU8)]],
constant ushort *inputU16 [[function_constant(kUseSourceBufferU16)]],
constant uint *inputU32 [[function_constant(kUseSourceBufferU32)]])
{
if (kUseSourceBufferU8)
{
if (kSourceIndexIsU16)
{
return getIndexUnalignedU16(inputU8, offset, idx);
}
else if (kSourceIndexIsU32)
{
return getIndexUnalignedU32(inputU8, offset, idx);
}
return getIndexAligned(inputU8, offset, idx);
}
else if (kUseSourceBufferU16)
{
return getIndexAligned(inputU16, offset, idx);
}
else if (kUseSourceBufferU32)
{
return getIndexAligned(inputU32, offset, idx);
}
return 0;
}
kernel void genTriFanIndicesFromElements(uint idx [[thread_position_in_grid]],
constant IndexConversionParams &options [[buffer(0)]],
constant uchar *inputU8
[[buffer(1), function_constant(kUseSourceBufferU8)]],
constant ushort *inputU16
[[buffer(1), function_constant(kUseSourceBufferU16)]],
constant uint *inputU32
[[buffer(1), function_constant(kUseSourceBufferU32)]],
device uint *output [[buffer(2)]])
{
if (idx >= options.indexCount) { return; };
uint elemIdx = 2 + idx;
output[3 * idx] = getIndexU32(options.srcOffset, 0, inputU8, inputU16, inputU32);
output[3 * idx + 1] = getIndexU32(options.srcOffset, elemIdx - 1, inputU8, inputU16, inputU32);
output[3 * idx + 2] = getIndexU32(options.srcOffset, elemIdx, inputU8, inputU16, inputU32);
}
kernel void genLineLoopIndicesFromArray(uint idx [[thread_position_in_grid]],
constant IndexFromArrayParams &options [[buffer(0)]],
device uint *output [[buffer(2)]])
{
uint totalIndices = options.vertexCount + 1;
if (idx >= totalIndices) { return; };
output[idx] = options.firstVertex + idx % options.vertexCount;
}
kernel void genLineLoopIndicesFromElements(uint idx [[thread_position_in_grid]],
constant IndexConversionParams &options [[buffer(0)]],
constant uchar *inputU8
[[buffer(1), function_constant(kUseSourceBufferU8)]],
constant ushort *inputU16
[[buffer(1), function_constant(kUseSourceBufferU16)]],
constant uint *inputU32
[[buffer(1), function_constant(kUseSourceBufferU32)]],
device uint *output [[buffer(2)]])
{
uint totalTargetIndices = options.indexCount + 1;
if (idx >= totalTargetIndices) { return; };
output[idx] =
getIndexU32(options.srcOffset, idx % options.indexCount, inputU8, inputU16, inputU32);
}
# 4 "temp_master_source.metal" 2
# 1 "./gen_mipmap.metal" 1
using namespace rx::mtl_shader;
# 31 "./gen_mipmap.metal"
struct GenMipParams
{
uint srcLevel;
uint numMipLevelsToGen;
bool sRGB;
};
kernel void generate3DMipmaps(uint lIndex [[thread_index_in_threadgroup]],
ushort3 gIndices [[thread_position_in_grid]],
texture3d<float> srcTexture [[texture(0)]],
texture3d<float, access::write> dstMip1 [[texture(1)]],
texture3d<float, access::write> dstMip2 [[texture(2)]],
texture3d<float, access::write> dstMip3 [[texture(3)]],
texture3d<float, access::write> dstMip4 [[texture(4)]],
constant GenMipParams &options [[buffer(0)]])
{
ushort3 mipSize = ushort3(dstMip1.get_width(), dstMip1.get_height(), dstMip1.get_depth());
bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y && gIndices.z < mipSize.z;
constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
threadgroup float sR[(8 * 8 * 8)];
threadgroup float sG[(8 * 8 * 8)];
threadgroup float sB[(8 * 8 * 8)];
threadgroup float sA[(8 * 8 * 8)];
float4 texel1;
if (validThread)
{
float3 texCoords = (float3(gIndices) + float3(0.5, 0.5, 0.5)) / float3(mipSize);
texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel));
dstMip1.write(texel1, gIndices);
}
else
{
lIndex = 0xffffffff;
}
if (options.numMipLevelsToGen == 1)
{
return;
}
if (options.sRGB)
{
texel1 = linearToSRGB(texel1);
}
sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((lIndex & 0x49) == 0)
{
bool3 atEdge = gIndices == (mipSize - ushort3(1));
float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));
float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));
float4 texel4 = (atEdge.z) ? (texel1) : (float4(sR[lIndex + (8 * 8)], sG[lIndex + (8 * 8)], sB[lIndex + (8 * 8)], sA[lIndex + (8 * 8)]));
float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));
float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + ((8 * 8) + 1)], sG[lIndex + ((8 * 8) + 1)], sB[lIndex + ((8 * 8) + 1)], sA[lIndex + ((8 * 8) + 1)]));
float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + ((8 * 8) + 8)], sG[lIndex + ((8 * 8) + 8)], sB[lIndex + ((8 * 8) + 8)], sA[lIndex + ((8 * 8) + 8)]));
float4 texel8 =
(atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + ((8 * 8) + 8 + 1)], sG[lIndex + ((8 * 8) + 8 + 1)], sB[lIndex + ((8 * 8) + 8 + 1)], sA[lIndex + ((8 * 8) + 8 + 1)]));
texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1);
sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
}
if (options.numMipLevelsToGen == 2)
{
return;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((lIndex & 0xdb) == 0)
{
mipSize = max(mipSize >> 1, ushort3(1));
bool3 atEdge = (gIndices >> 1) == (mipSize - ushort3(1));
float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));
float4 texel3 =
(atEdge.y) ? (texel1) : (float4(sR[lIndex + (2 * 8)], sG[lIndex + (2 * 8)], sB[lIndex + (2 * 8)], sA[lIndex + (2 * 8)]));
float4 texel4 =
(atEdge.z) ? (texel1) : (float4(sR[lIndex + (2 * (8 * 8))], sG[lIndex + (2 * (8 * 8))], sB[lIndex + (2 * (8 * 8))], sA[lIndex + (2 * (8 * 8))]));
float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));
float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (2 * (8 * 8) + 2)], sG[lIndex + (2 * (8 * 8) + 2)], sB[lIndex + (2 * (8 * 8) + 2)], sA[lIndex + (2 * (8 * 8) + 2)]));
float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8)], sG[lIndex + (2 * (8 * 8) + 2 * 8)], sB[lIndex + (2 * (8 * 8) + 2 * 8)], sA[lIndex + (2 * (8 * 8) + 2 * 8)]));
float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sG[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sB[lIndex + (2 * (8 * 8) + 2 * 8 + 2)], sA[lIndex + (2 * (8 * 8) + 2 * 8 + 2)]));
texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2);
sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
}
if (options.numMipLevelsToGen == 3)
{
return;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((lIndex & 0x1ff) == 0)
{
mipSize = max(mipSize >> 1, ushort3(1));
bool3 atEdge = (gIndices >> 2) == (mipSize - ushort3(1));
float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));
float4 texel3 =
(atEdge.y) ? (texel1) : (float4(sR[lIndex + (4 * 8)], sG[lIndex + (4 * 8)], sB[lIndex + (4 * 8)], sA[lIndex + (4 * 8)]));
float4 texel4 =
(atEdge.z) ? (texel1) : (float4(sR[lIndex + (4 * (8 * 8))], sG[lIndex + (4 * (8 * 8))], sB[lIndex + (4 * (8 * 8))], sA[lIndex + (4 * (8 * 8))]));
float4 texel5 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));
float4 texel6 = (atEdge.x | atEdge.z) ? (texel2) : (float4(sR[lIndex + (4 * (8 * 8) + 4)], sG[lIndex + (4 * (8 * 8) + 4)], sB[lIndex + (4 * (8 * 8) + 4)], sA[lIndex + (4 * (8 * 8) + 4)]));
float4 texel7 = (atEdge.y | atEdge.z) ? (texel3) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8)], sG[lIndex + (4 * (8 * 8) + 4 * 8)], sB[lIndex + (4 * (8 * 8) + 4 * 8)], sA[lIndex + (4 * (8 * 8) + 4 * 8)]));
float4 texel8 = (atEdge.x | atEdge.y | atEdge.z) ? (texel5) : (float4(sR[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sG[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sB[lIndex + (4 * (8 * 8) + 4 * 8 + 4)], sA[lIndex + (4 * (8 * 8) + 4 * 8 + 4)]));
texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0;
dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3);
}
}
kernel void generate2DMipmaps(uint lIndex [[thread_index_in_threadgroup]],
ushort2 gIndices [[thread_position_in_grid]],
texture2d<float> srcTexture [[texture(0)]],
texture2d<float, access::write> dstMip1 [[texture(1)]],
texture2d<float, access::write> dstMip2 [[texture(2)]],
texture2d<float, access::write> dstMip3 [[texture(3)]],
texture2d<float, access::write> dstMip4 [[texture(4)]],
constant GenMipParams &options [[buffer(0)]])
{
uint firstMipLevel = options.srcLevel + 1;
ushort2 mipSize =
ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y;
constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
threadgroup float sR[(8 * 8)];
threadgroup float sG[(8 * 8)];
threadgroup float sB[(8 * 8)];
threadgroup float sA[(8 * 8)];
float4 texel1;
if (validThread)
{
float2 texCoords = (float2(gIndices) + float2(0.5, 0.5)) / float2(mipSize);
texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel));
dstMip1.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices);
}
else
{
lIndex = 0xffffffff;
}
if (options.numMipLevelsToGen == 1)
{
return;
}
sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((lIndex & 0x09) == 0)
{
bool2 atEdge = gIndices == (mipSize - ushort2(1));
float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));
float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));
float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));
texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 1);
sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
}
if (options.numMipLevelsToGen == 2)
{
return;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((lIndex & 0x1b) == 0)
{
mipSize = max(mipSize >> 1, ushort2(1));
bool2 atEdge = (gIndices >> 1) == (mipSize - ushort2(1));
float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));
float4 texel3 =
(atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8]));
float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));
texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 2);
sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
}
if (options.numMipLevelsToGen == 3)
{
return;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((lIndex & 0x3f) == 0)
{
mipSize = max(mipSize >> 1, ushort2(1));
bool2 atEdge = (gIndices >> 2) == (mipSize - ushort2(1));
float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));
float4 texel3 =
(atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8]));
float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));
texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices >> 3);
}
}
template <typename TextureTypeR, typename TextureTypeW>
static __attribute__((always_inline)) void generateCubeOr2DArray2ndAndMoreMipmaps(
uint lIndex,
ushort3 gIndices,
TextureTypeR srcTexture,
TextureTypeW dstMip2,
TextureTypeW dstMip3,
TextureTypeW dstMip4,
ushort2 mip1Size,
float4 mip1Texel,
threadgroup float *sR,
threadgroup float *sG,
threadgroup float *sB,
threadgroup float *sA,
constant GenMipParams &options)
{
ushort2 mipSize = mip1Size;
float4 texel1 = mip1Texel;
sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((lIndex & 0x09) == 0)
{
bool2 atEdge = gIndices.xy == (mipSize - ushort2(1));
float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 1], sG[lIndex + 1], sB[lIndex + 1], sA[lIndex + 1]));
float4 texel3 = (atEdge.y) ? (texel1) : (float4(sR[lIndex + 8], sG[lIndex + 8], sB[lIndex + 8], sA[lIndex + 8]));
float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (8 + 1)], sG[lIndex + (8 + 1)], sB[lIndex + (8 + 1)], sA[lIndex + (8 + 1)]));
texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
dstMip2.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 1, gIndices.z);
sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
}
if (options.numMipLevelsToGen == 2)
{
return;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((lIndex & 0x1b) == 0)
{
mipSize = max(mipSize >> 1, ushort2(1));
bool2 atEdge = (gIndices.xy >> 1) == (mipSize - ushort2(1));
float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 2], sG[lIndex + 2], sB[lIndex + 2], sA[lIndex + 2]));
float4 texel3 =
(atEdge.y) ? (texel1) : (float4(sR[lIndex + 2 * 8], sG[lIndex + 2 * 8], sB[lIndex + 2 * 8], sA[lIndex + 2 * 8]));
float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (2 * 8 + 2)], sG[lIndex + (2 * 8 + 2)], sB[lIndex + (2 * 8 + 2)], sA[lIndex + (2 * 8 + 2)]));
texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
dstMip3.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 2, gIndices.z);
sR[lIndex] = texel1.r; sG[lIndex] = texel1.g; sB[lIndex] = texel1.b; sA[lIndex] = texel1.a;;
}
if (options.numMipLevelsToGen == 3)
{
return;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((lIndex & 0x3f) == 0)
{
mipSize = max(mipSize >> 1, ushort2(1));
bool2 atEdge = (gIndices.xy >> 2) == (mipSize - ushort2(1));
float4 texel2 = (atEdge.x) ? (texel1) : (float4(sR[lIndex + 4], sG[lIndex + 4], sB[lIndex + 4], sA[lIndex + 4]));
float4 texel3 =
(atEdge.y) ? (texel1) : (float4(sR[lIndex + 4 * 8], sG[lIndex + 4 * 8], sB[lIndex + 4 * 8], sA[lIndex + 4 * 8]));
float4 texel4 = (atEdge.x | atEdge.y) ? (texel2) : (float4(sR[lIndex + (4 * 8 + 4)], sG[lIndex + (4 * 8 + 4)], sB[lIndex + (4 * 8 + 4)], sA[lIndex + (4 * 8 + 4)]));
texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0;
dstMip4.write((options.sRGB ? sRGBtoLinear(texel1) : texel1), gIndices.xy >> 3, gIndices.z);
}
}
kernel void generateCubeMipmaps(uint lIndex [[thread_index_in_threadgroup]],
ushort3 gIndices [[thread_position_in_grid]],
texturecube<float> srcTexture [[texture(0)]],
texturecube<float, access::write> dstMip1 [[texture(1)]],
texturecube<float, access::write> dstMip2 [[texture(2)]],
texturecube<float, access::write> dstMip3 [[texture(3)]],
texturecube<float, access::write> dstMip4 [[texture(4)]],
constant GenMipParams &options [[buffer(0)]])
{
uint firstMipLevel = options.srcLevel + 1;
ushort2 mip1Size =
ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y;
constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
float4 mip1Texel;
if (validThread)
{
float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size);
mip1Texel = srcTexture.sample(textureSampler, cubeTexcoords(texCoords, int(gIndices.z)),
level(options.srcLevel));
dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z);
}
else
{
lIndex = 0xffffffff;
}
if (options.numMipLevelsToGen == 1)
{
return;
}
threadgroup float sR[(8 * 8)];
threadgroup float sG[(8 * 8)];
threadgroup float sB[(8 * 8)];
threadgroup float sA[(8 * 8)];
generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4,
mip1Size, mip1Texel, sR, sG, sB, sA, options);
}
kernel void generate2DArrayMipmaps(uint lIndex [[thread_index_in_threadgroup]],
ushort3 gIndices [[thread_position_in_grid]],
texture2d_array<float> srcTexture [[texture(0)]],
texture2d_array<float, access::write> dstMip1 [[texture(1)]],
texture2d_array<float, access::write> dstMip2 [[texture(2)]],
texture2d_array<float, access::write> dstMip3 [[texture(3)]],
texture2d_array<float, access::write> dstMip4 [[texture(4)]],
constant GenMipParams &options [[buffer(0)]])
{
uint firstMipLevel = options.srcLevel + 1;
ushort2 mip1Size =
ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel));
bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y;
constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear);
float4 mip1Texel;
if (validThread)
{
float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size);
mip1Texel =
srcTexture.sample(textureSampler, texCoords, gIndices.z, level(options.srcLevel));
dstMip1.write((options.sRGB ? sRGBtoLinear(mip1Texel) : mip1Texel), gIndices.xy, gIndices.z);
}
else
{
lIndex = 0xffffffff;
}
if (options.numMipLevelsToGen == 1)
{
return;
}
threadgroup float sR[(8 * 8)];
threadgroup float sG[(8 * 8)];
threadgroup float sB[(8 * 8)];
threadgroup float sA[(8 * 8)];
generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4,
mip1Size, mip1Texel, sR, sG, sB, sA, options);
}
# 5 "temp_master_source.metal" 2
# 1 "./copy_buffer.metal" 1
# 12 "./copy_buffer.metal"
#include <metal_pack>
# 1 "./format_autogen.h" 1
# 11 "./format_autogen.h"
namespace rx
{
namespace mtl_shader
{
namespace FormatID
{
enum
{
NONE,
D16_UNORM,
D24_UNORM_S8_UINT,
D24_UNORM_X8_UINT,
D32_FLOAT,
D32_FLOAT_S8X24_UINT,
D32_UNORM,
S8_UINT,
A16_FLOAT,
A1R5G5B5_UNORM,
A2R10G10B10_SINT_VERTEX,
A2R10G10B10_SNORM_VERTEX,
A2R10G10B10_SSCALED_VERTEX,
A2R10G10B10_UINT_VERTEX,
A2R10G10B10_UNORM_VERTEX,
A2R10G10B10_USCALED_VERTEX,
A32_FLOAT,
A8_UNORM,
ASTC_10x10_SRGB_BLOCK,
ASTC_10x10_UNORM_BLOCK,
ASTC_10x5_SRGB_BLOCK,
ASTC_10x5_UNORM_BLOCK,
ASTC_10x6_SRGB_BLOCK,
ASTC_10x6_UNORM_BLOCK,
ASTC_10x8_SRGB_BLOCK,
ASTC_10x8_UNORM_BLOCK,
ASTC_12x10_SRGB_BLOCK,
ASTC_12x10_UNORM_BLOCK,
ASTC_12x12_SRGB_BLOCK,
ASTC_12x12_UNORM_BLOCK,
ASTC_3x3x3_UNORM_BLOCK,
ASTC_3x3x3_UNORM_SRGB_BLOCK,
ASTC_4x3x3_UNORM_BLOCK,
ASTC_4x3x3_UNORM_SRGB_BLOCK,
ASTC_4x4_SRGB_BLOCK,
ASTC_4x4_UNORM_BLOCK,
ASTC_4x4x3_UNORM_BLOCK,
ASTC_4x4x3_UNORM_SRGB_BLOCK,
ASTC_4x4x4_UNORM_BLOCK,
ASTC_4x4x4_UNORM_SRGB_BLOCK,
ASTC_5x4_SRGB_BLOCK,
ASTC_5x4_UNORM_BLOCK,
ASTC_5x4x4_UNORM_BLOCK,
ASTC_5x4x4_UNORM_SRGB_BLOCK,
ASTC_5x5_SRGB_BLOCK,
ASTC_5x5_UNORM_BLOCK,
ASTC_5x5x4_UNORM_BLOCK,
ASTC_5x5x4_UNORM_SRGB_BLOCK,
ASTC_5x5x5_UNORM_BLOCK,
ASTC_5x5x5_UNORM_SRGB_BLOCK,
ASTC_6x5_SRGB_BLOCK,
ASTC_6x5_UNORM_BLOCK,
ASTC_6x5x5_UNORM_BLOCK,
ASTC_6x5x5_UNORM_SRGB_BLOCK,
ASTC_6x6_SRGB_BLOCK,
ASTC_6x6_UNORM_BLOCK,
ASTC_6x6x5_UNORM_BLOCK,
ASTC_6x6x5_UNORM_SRGB_BLOCK,
ASTC_6x6x6_UNORM_BLOCK,
ASTC_6x6x6_UNORM_SRGB_BLOCK,
ASTC_8x5_SRGB_BLOCK,
ASTC_8x5_UNORM_BLOCK,
ASTC_8x6_SRGB_BLOCK,
ASTC_8x6_UNORM_BLOCK,
ASTC_8x8_SRGB_BLOCK,
ASTC_8x8_UNORM_BLOCK,
B10G10R10A2_UNORM,
B4G4R4A4_UNORM,
B5G5R5A1_UNORM,
B5G6R5_UNORM,
B8G8R8A8_TYPELESS,
B8G8R8A8_TYPELESS_SRGB,
B8G8R8A8_UNORM,
B8G8R8A8_UNORM_SRGB,
B8G8R8X8_UNORM,
BC1_RGBA_UNORM_BLOCK,
BC1_RGBA_UNORM_SRGB_BLOCK,
BC1_RGB_UNORM_BLOCK,
BC1_RGB_UNORM_SRGB_BLOCK,
BC2_RGBA_UNORM_BLOCK,
BC2_RGBA_UNORM_SRGB_BLOCK,
BC3_RGBA_UNORM_BLOCK,
BC3_RGBA_UNORM_SRGB_BLOCK,
BC4_RED_SNORM_BLOCK,
BC4_RED_UNORM_BLOCK,
BC5_RG_SNORM_BLOCK,
BC5_RG_UNORM_BLOCK,
BC6H_RGB_SFLOAT_BLOCK,
BC6H_RGB_UFLOAT_BLOCK,
BC7_RGBA_UNORM_BLOCK,
BC7_RGBA_UNORM_SRGB_BLOCK,
EAC_R11G11_SNORM_BLOCK,
EAC_R11G11_UNORM_BLOCK,
EAC_R11_SNORM_BLOCK,
EAC_R11_UNORM_BLOCK,
ETC1_LOSSY_DECODE_R8G8B8_UNORM_BLOCK,
ETC1_R8G8B8_UNORM_BLOCK,
ETC2_R8G8B8A1_SRGB_BLOCK,
ETC2_R8G8B8A1_UNORM_BLOCK,
ETC2_R8G8B8A8_SRGB_BLOCK,
ETC2_R8G8B8A8_UNORM_BLOCK,
ETC2_R8G8B8_SRGB_BLOCK,
ETC2_R8G8B8_UNORM_BLOCK,
G8_B8R8_2PLANE_420_UNORM,
G8_B8_R8_3PLANE_420_UNORM,
L16A16_FLOAT,
L16_FLOAT,
L32A32_FLOAT,
L32_FLOAT,
L8A8_UNORM,
L8_UNORM,
PVRTC1_RGBA_2BPP_UNORM_BLOCK,
PVRTC1_RGBA_2BPP_UNORM_SRGB_BLOCK,
PVRTC1_RGBA_4BPP_UNORM_BLOCK,
PVRTC1_RGBA_4BPP_UNORM_SRGB_BLOCK,
PVRTC1_RGB_2BPP_UNORM_BLOCK,
PVRTC1_RGB_2BPP_UNORM_SRGB_BLOCK,
PVRTC1_RGB_4BPP_UNORM_BLOCK,
PVRTC1_RGB_4BPP_UNORM_SRGB_BLOCK,
R10G10B10A2_SINT,
R10G10B10A2_SNORM,
R10G10B10A2_SSCALED,
R10G10B10A2_UINT,
R10G10B10A2_UNORM,
R10G10B10A2_USCALED,
R10G10B10X2_UNORM,
R11G11B10_FLOAT,
R16G16B16A16_FLOAT,
R16G16B16A16_SINT,
R16G16B16A16_SNORM,
R16G16B16A16_SSCALED,
R16G16B16A16_UINT,
R16G16B16A16_UNORM,
R16G16B16A16_USCALED,
R16G16B16_FLOAT,
R16G16B16_SINT,
R16G16B16_SNORM,
R16G16B16_SSCALED,
R16G16B16_UINT,
R16G16B16_UNORM,
R16G16B16_USCALED,
R16G16_FLOAT,
R16G16_SINT,
R16G16_SNORM,
R16G16_SSCALED,
R16G16_UINT,
R16G16_UNORM,
R16G16_USCALED,
R16_FLOAT,
R16_SINT,
R16_SNORM,
R16_SSCALED,
R16_UINT,
R16_UNORM,
R16_USCALED,
R32G32B32A32_FIXED,
R32G32B32A32_FLOAT,
R32G32B32A32_SINT,
R32G32B32A32_SNORM,
R32G32B32A32_SSCALED,
R32G32B32A32_UINT,
R32G32B32A32_UNORM,
R32G32B32A32_USCALED,
R32G32B32_FIXED,
R32G32B32_FLOAT,
R32G32B32_SINT,
R32G32B32_SNORM,
R32G32B32_SSCALED,
R32G32B32_UINT,
R32G32B32_UNORM,
R32G32B32_USCALED,
R32G32_FIXED,
R32G32_FLOAT,
R32G32_SINT,
R32G32_SNORM,
R32G32_SSCALED,
R32G32_UINT,
R32G32_UNORM,
R32G32_USCALED,
R32_FIXED,
R32_FLOAT,
R32_SINT,
R32_SNORM,
R32_SSCALED,
R32_UINT,
R32_UNORM,
R32_USCALED,
R4G4B4A4_UNORM,
R5G5B5A1_UNORM,
R5G6B5_UNORM,
R8G8B8A8_SINT,
R8G8B8A8_SNORM,
R8G8B8A8_SSCALED,
R8G8B8A8_TYPELESS,
R8G8B8A8_TYPELESS_SRGB,
R8G8B8A8_UINT,
R8G8B8A8_UNORM,
R8G8B8A8_UNORM_SRGB,
R8G8B8A8_USCALED,
R8G8B8X8_UNORM,
R8G8B8_SINT,
R8G8B8_SNORM,
R8G8B8_SSCALED,
R8G8B8_UINT,
R8G8B8_UNORM,
R8G8B8_UNORM_SRGB,
R8G8B8_USCALED,
R8G8_SINT,
R8G8_SNORM,
R8G8_SSCALED,
R8G8_UINT,
R8G8_UNORM,
R8G8_UNORM_SRGB,
R8G8_USCALED,
R8_SINT,
R8_SNORM,
R8_SSCALED,
R8_UINT,
R8_UNORM,
R8_UNORM_SRGB,
R8_USCALED,
R9G9B9E5_SHAREDEXP,
X2R10G10B10_SINT_VERTEX,
X2R10G10B10_SNORM_VERTEX,
X2R10G10B10_SSCALED_VERTEX,
X2R10G10B10_UINT_VERTEX,
X2R10G10B10_UNORM_VERTEX,
X2R10G10B10_USCALED_VERTEX
};
}
}
}
# 16 "./copy_buffer.metal" 2
using namespace rx::mtl_shader;
constant int kCopyFormatType [[function_constant(10)]];
constant int kCopyTextureType [[function_constant(20)]];
constant bool kCopyTextureType2D = kCopyTextureType == kTextureType2D;
constant bool kCopyTextureType2DArray = kCopyTextureType == kTextureType2DArray;
constant bool kCopyTextureType2DMS = kCopyTextureType == kTextureType2DMultisample;
constant bool kCopyTextureTypeCube = kCopyTextureType == kTextureTypeCube;
constant bool kCopyTextureType3D = kCopyTextureType == kTextureType3D;
struct CopyPixelParams
{
uint3 copySize;
uint3 textureOffset;
uint bufferStartOffset;
uint pixelSize;
uint bufferRowPitch;
uint bufferDepthPitch;
};
struct WritePixelParams
{
uint2 copySize;
uint2 textureOffset;
uint bufferStartOffset;
uint pixelSize;
uint bufferRowPitch;
uint textureLevel;
uint textureLayer;
bool reverseTextureRowOrder;
};
# 120 "./copy_buffer.metal"
template <typename T>
static inline void textureWrite(ushort3 gIndices,
constant CopyPixelParams &options,
vec<T, 4> color,
texture2d<T, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
{
uint3 writeIndices = options.textureOffset + uint3(gIndices);
switch (kCopyTextureType)
{
case kTextureType2D:
dstTexture2d.write(color, writeIndices.xy);
break;
case kTextureType2DArray:
dstTexture2dArray.write(color, writeIndices.xy, writeIndices.z);
break;
case kTextureType3D:
dstTexture3d.write(color, writeIndices);
break;
case kTextureTypeCube:
dstTextureCube.write(color, writeIndices.xy, writeIndices.z);
break;
}
}
template <typename T>
static inline vec<T, 4> textureRead(ushort2 gIndices,
constant WritePixelParams &options,
texture2d<T, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<T, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<T, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<T, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<T, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]])
{
vec<T, 4> color;
uint2 coords = uint2(gIndices);
if (options.reverseTextureRowOrder)
{
coords.y = options.copySize.y - 1 - gIndices.y;
}
coords += options.textureOffset;
switch (kCopyTextureType)
{
case kTextureType2D:
color = srcTexture2d.read(coords.xy, options.textureLevel);
break;
case kTextureType2DArray:
color = srcTexture2dArray.read(coords.xy, options.textureLayer, options.textureLevel);
break;
case kTextureType2DMultisample:
color = resolveTextureMS(srcTexture2dMS, coords.xy);
break;
case kTextureType3D:
color = srcTexture3d.read(uint3(coords, options.textureLayer), options.textureLevel);
break;
case kTextureTypeCube:
color = srcTextureCube.read(coords.xy, options.textureLayer, options.textureLevel);
break;
}
return color;
}
# 215 "./copy_buffer.metal"
static inline float4 readR5G6B5_UNORM(uint bufferOffset, constant uchar *buffer)
{
float4 color;
ushort src = bytesToShort<ushort>(buffer, bufferOffset);
color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src));
color.g = normalizedToFloat<6>(getShiftedData<6, 5>(src));
color.b = normalizedToFloat<5>(getShiftedData<5, 0>(src));
color.a = 1.0;
return color;
}
static inline void writeR5G6B5_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) |
shiftData<6, 5>(floatToNormalized<6, ushort>(color.g)) |
shiftData<5, 0>(floatToNormalized<5, ushort>(color.b));
shortToBytes(dst, bufferOffset, buffer);
}
static inline float4 readR4G4B4A4_UNORM(uint bufferOffset, constant uchar *buffer)
{
float4 color;
ushort src = bytesToShort<ushort>(buffer, bufferOffset);
color.r = normalizedToFloat<4>(getShiftedData<4, 12>(src));
color.g = normalizedToFloat<4>(getShiftedData<4, 8>(src));
color.b = normalizedToFloat<4>(getShiftedData<4, 4>(src));
color.a = normalizedToFloat<4>(getShiftedData<4, 0>(src));
return color;
}
static inline void writeR4G4B4A4_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
ushort dst = shiftData<4, 12>(floatToNormalized<4, ushort>(color.r)) |
shiftData<4, 8>(floatToNormalized<4, ushort>(color.g)) |
shiftData<4, 4>(floatToNormalized<4, ushort>(color.b)) |
shiftData<4, 0>(floatToNormalized<4, ushort>(color.a));
;
shortToBytes(dst, bufferOffset, buffer);
}
static inline float4 readR5G5B5A1_UNORM(uint bufferOffset, constant uchar *buffer)
{
float4 color;
ushort src = bytesToShort<ushort>(buffer, bufferOffset);
color.r = normalizedToFloat<5>(getShiftedData<5, 11>(src));
color.g = normalizedToFloat<5>(getShiftedData<5, 6>(src));
color.b = normalizedToFloat<5>(getShiftedData<5, 1>(src));
color.a = normalizedToFloat<1>(getShiftedData<1, 0>(src));
return color;
}
static inline void writeR5G5B5A1_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
ushort dst = shiftData<5, 11>(floatToNormalized<5, ushort>(color.r)) |
shiftData<5, 6>(floatToNormalized<5, ushort>(color.g)) |
shiftData<5, 1>(floatToNormalized<5, ushort>(color.b)) |
shiftData<1, 0>(floatToNormalized<1, ushort>(color.a));
;
shortToBytes(dst, bufferOffset, buffer);
}
static inline int4 readR10G10B10A2_SINT(uint bufferOffset, constant uchar *buffer)
{
int4 color;
int src = bytesToInt<int>(buffer, bufferOffset);
constexpr int3 rgbSignMask(0x200);
constexpr int3 negativeMask(0xfffffc00);
constexpr int alphaSignMask = 0x2;
constexpr int alphaNegMask = 0xfffffffc;
color.r = getShiftedData<10, 0>(src);
color.g = getShiftedData<10, 10>(src);
color.b = getShiftedData<10, 20>(src);
int3 isRgbNegative = (color.rgb & rgbSignMask) >> 9;
color.rgb = (isRgbNegative * negativeMask) | color.rgb;
color.a = getShiftedData<2, 30>(src);
int isAlphaNegative = color.a & alphaSignMask >> 1;
color.a = (isAlphaNegative * alphaNegMask) | color.a;
return color;
}
static inline uint4 readR10G10B10A2_UINT(uint bufferOffset, constant uchar *buffer)
{
uint4 color;
uint src = bytesToInt<uint>(buffer, bufferOffset);
color.r = getShiftedData<10, 0>(src);
color.g = getShiftedData<10, 10>(src);
color.b = getShiftedData<10, 20>(src);
color.a = getShiftedData<2, 30>(src);
return color;
}
static inline float4 readR8G8B8A8(uint bufferOffset, constant uchar *buffer, bool isSRGB)
{
float4 color;
uint src = bytesToInt<uint>(buffer, bufferOffset);
if (isSRGB)
{
color = unpack_unorm4x8_srgb_to_float(src);
}
else
{
color = unpack_unorm4x8_to_float(src);
}
return color;
}
static inline void writeR8G8B8A8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB)
{
uint dst;
if (isSRGB)
{
dst = pack_float_to_srgb_unorm4x8(color);
}
else
{
dst = pack_float_to_unorm4x8(color);
}
intToBytes(dst, bufferOffset, buffer);
}
static inline float4 readR8G8B8(uint bufferOffset, constant uchar *buffer, bool isSRGB)
{
float4 color;
color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
color.b = normalizedToFloat<uchar>(buffer[bufferOffset + 2]);
color.a = 1.0;
if (isSRGB)
{
color = sRGBtoLinear(color);
}
return color;
}
static inline void writeR8G8B8(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer, bool isSRGB)
{
color.a = 1.0;
uint dst;
if (isSRGB)
{
dst = pack_float_to_srgb_unorm4x8(color);
}
else
{
dst = pack_float_to_unorm4x8(color);
}
int24bitToBytes(dst, bufferOffset, buffer);
}
static inline float4 readR8G8B8A8_SNORM(uint bufferOffset, constant uchar *buffer)
{
float4 color;
uint src = bytesToInt<uint>(buffer, bufferOffset);
color = unpack_snorm4x8_to_float(src);
return color;
}
static inline void writeR8G8B8A8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
uint dst = pack_float_to_snorm4x8(color);
intToBytes(dst, bufferOffset, buffer);
}
static inline float4 readR8G8B8_SNORM(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]);
color.b = normalizedToFloat<7, char>(buffer[bufferOffset + 2]);
color.a = 1.0;
return color;
}
static inline void writeR8G8B8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
uint dst = pack_float_to_snorm4x8(color);
int24bitToBytes(dst, bufferOffset, buffer);
}
static inline float4 readR8G8B8A8_UNORM(uint bufferOffset, constant uchar *buffer)
{
return readR8G8B8A8(bufferOffset, buffer, false);
}
static inline void writeR8G8B8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false);
}
static inline float4 readR8G8B8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
{
return readR8G8B8A8(bufferOffset, buffer, true);
}
static inline void writeR8G8B8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true);
}
static inline float4 readB8G8R8A8_UNORM(uint bufferOffset, constant uchar *buffer)
{
return readR8G8B8A8(bufferOffset, buffer, false).bgra;
}
static inline void writeB8G8R8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
color.rgba = color.bgra;
return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, false);
}
static inline float4 readB8G8R8A8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
{
return readR8G8B8A8(bufferOffset, buffer, true).bgra;
}
static inline void writeB8G8R8A8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
color.rgba = color.bgra;
return writeR8G8B8A8(gIndices, options, bufferOffset, color, buffer, true);
}
static inline float4 readR8G8B8_UNORM(uint bufferOffset, constant uchar *buffer)
{
return readR8G8B8(bufferOffset, buffer, false);
}
static inline void writeR8G8B8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, false);
}
static inline float4 readR8G8B8_UNORM_SRGB(uint bufferOffset, constant uchar *buffer)
{
return readR8G8B8(bufferOffset, buffer, true);
}
static inline void writeR8G8B8_UNORM_SRGB(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
return writeR8G8B8(gIndices, options, bufferOffset, color, buffer, true);
}
static inline float4 readL8_UNORM(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset]));
color.a = 1.0;
return color;
}
static inline void writeL8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
}
static inline void writeA8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
buffer[bufferOffset] = floatToNormalized<uchar>(color.a);
}
static inline float4 readL8A8_UNORM(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.rgb = float3(normalizedToFloat<uchar>(buffer[bufferOffset]));
color.a = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
return color;
}
static inline void writeL8A8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.a);
}
static inline float4 readR8_UNORM(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
color.g = color.b = 0.0;
color.a = 1.0;
return color;
}
static inline void writeR8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
}
static inline float4 readR8_SNORM(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
color.g = color.b = 0.0;
color.a = 1.0;
return color;
}
static inline void writeR8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r));
}
static inline int4 readR8_SINT(uint bufferOffset, constant uchar *buffer)
{
int4 color;
color.r = as_type<char>(buffer[bufferOffset]);
color.g = color.b = 0;
color.a = 1;
return color;
}
static inline void writeR8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
{
buffer[bufferOffset] = static_cast<uchar>(color.r);
}
static inline uint4 readR8_UINT(uint bufferOffset, constant uchar *buffer)
{
uint4 color;
color.r = as_type<uchar>(buffer[bufferOffset]);
color.g = color.b = 0;
color.a = 1;
return color;
}
static inline void writeR8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
{
buffer[bufferOffset] = static_cast<uchar>(color.r);
}
static inline float4 readR8G8_UNORM(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.r = normalizedToFloat<uchar>(buffer[bufferOffset]);
color.g = normalizedToFloat<uchar>(buffer[bufferOffset + 1]);
color.b = 0.0;
color.a = 1.0;
return color;
}
static inline void writeR8G8_UNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
buffer[bufferOffset] = floatToNormalized<uchar>(color.r);
buffer[bufferOffset + 1] = floatToNormalized<uchar>(color.g);
}
static inline float4 readR8G8_SNORM(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.r = normalizedToFloat<7, char>(buffer[bufferOffset]);
color.g = normalizedToFloat<7, char>(buffer[bufferOffset + 1]);
color.b = 0.0;
color.a = 1.0;
return color;
}
static inline void writeR8G8_SNORM(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
buffer[bufferOffset] = as_type<uchar>(floatToNormalized<7, char>(color.r));
buffer[bufferOffset + 1] = as_type<uchar>(floatToNormalized<7, char>(color.g));
}
static inline int4 readR8G8_SINT(uint bufferOffset, constant uchar *buffer)
{
int4 color;
color.r = as_type<char>(buffer[bufferOffset]);
color.g = as_type<char>(buffer[bufferOffset + 1]);
color.b = 0;
color.a = 1;
return color;
}
static inline void writeR8G8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
{
buffer[bufferOffset] = static_cast<uchar>(color.r);
buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
}
static inline uint4 readR8G8_UINT(uint bufferOffset, constant uchar *buffer)
{
uint4 color;
color.r = as_type<uchar>(buffer[bufferOffset]);
color.g = as_type<uchar>(buffer[bufferOffset + 1]);
color.b = 0;
color.a = 1;
return color;
}
static inline void writeR8G8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
{
buffer[bufferOffset] = static_cast<uchar>(color.r);
buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
}
static inline int4 readR8G8B8_SINT(uint bufferOffset, constant uchar *buffer)
{
int4 color;
color.r = as_type<char>(buffer[bufferOffset]);
color.g = as_type<char>(buffer[bufferOffset + 1]);
color.b = as_type<char>(buffer[bufferOffset + 2]);
color.a = 1;
return color;
}
static inline uint4 readR8G8B8_UINT(uint bufferOffset, constant uchar *buffer)
{
uint4 color;
color.r = as_type<uchar>(buffer[bufferOffset]);
color.g = as_type<uchar>(buffer[bufferOffset + 1]);
color.b = as_type<uchar>(buffer[bufferOffset + 2]);
color.a = 1;
return color;
}
static inline int4 readR8G8B8A8_SINT(uint bufferOffset, constant uchar *buffer)
{
int4 color;
color.r = as_type<char>(buffer[bufferOffset]);
color.g = as_type<char>(buffer[bufferOffset + 1]);
color.b = as_type<char>(buffer[bufferOffset + 2]);
color.a = as_type<char>(buffer[bufferOffset + 3]);
return color;
}
static inline void writeR8G8B8A8_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
{
buffer[bufferOffset] = static_cast<uchar>(color.r);
buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
buffer[bufferOffset + 2] = static_cast<uchar>(color.b);
buffer[bufferOffset + 3] = static_cast<uchar>(color.a);
}
static inline uint4 readR8G8B8A8_UINT(uint bufferOffset, constant uchar *buffer)
{
uint4 color;
color.r = as_type<uchar>(buffer[bufferOffset]);
color.g = as_type<uchar>(buffer[bufferOffset + 1]);
color.b = as_type<uchar>(buffer[bufferOffset + 2]);
color.a = as_type<uchar>(buffer[bufferOffset + 3]);
return color;
}
static inline void writeR8G8B8A8_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
{
buffer[bufferOffset] = static_cast<uchar>(color.r);
buffer[bufferOffset + 1] = static_cast<uchar>(color.g);
buffer[bufferOffset + 2] = static_cast<uchar>(color.b);
buffer[bufferOffset + 3] = static_cast<uchar>(color.a);
}
static inline float4 readR16_FLOAT(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
color.g = color.b = 0.0;
color.a = 1.0;
return color;
}
static inline void writeR16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
}
template <typename ShortType>
static inline float4 readR16_NORM(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
color.g = color.b = 0.0;
color.a = 1.0;
return color;
}
static inline int4 readR16_SINT(uint bufferOffset, constant uchar *buffer)
{
int4 color;
color.r = bytesToShort<short>(buffer, bufferOffset);
color.g = color.b = 0;
color.a = 1;
return color;
}
static inline void writeR16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
{
shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
}
static inline uint4 readR16_UINT(uint bufferOffset, constant uchar *buffer)
{
uint4 color;
color.r = bytesToShort<ushort>(buffer, bufferOffset);
color.g = color.b = 0;
color.a = 1;
return color;
}
static inline void writeR16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
{
shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
}
static inline float4 readA16_FLOAT(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
color.rgb = 0.0;
return color;
}
static inline void writeA16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset, buffer);
}
static inline float4 readL16_FLOAT(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
color.a = 1.0;
return color;
}
static inline void writeL16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
}
static inline float4 readL16A16_FLOAT(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.rgb = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
return color;
}
static inline void writeL16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 2, buffer);
}
static inline float4 readR16G16_FLOAT(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
color.b = 0.0;
color.a = 1.0;
return color;
}
static inline void writeR16G16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer);
}
template <typename ShortType>
static inline float4 readR16G16_NORM(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
color.b = 0.0;
color.a = 1.0;
return color;
}
static inline int4 readR16G16_SINT(uint bufferOffset, constant uchar *buffer)
{
int4 color;
color.r = bytesToShort<short>(buffer, bufferOffset);
color.g = bytesToShort<short>(buffer, bufferOffset + 2);
color.b = 0;
color.a = 1;
return color;
}
static inline void writeR16G16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
{
shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer);
}
static inline uint4 readR16G16_UINT(uint bufferOffset, constant uchar *buffer)
{
uint4 color;
color.r = bytesToShort<ushort>(buffer, bufferOffset);
color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
color.b = 0;
color.a = 1;
return color;
}
static inline void writeR16G16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
{
shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer);
}
static inline float4 readR16G16B16_FLOAT(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4));
color.a = 1.0;
return color;
}
template <typename ShortType>
static inline float4 readR16G16B16_NORM(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4));
color.a = 1.0;
return color;
}
static inline int4 readR16G16B16_SINT(uint bufferOffset, constant uchar *buffer)
{
int4 color;
color.r = bytesToShort<short>(buffer, bufferOffset);
color.g = bytesToShort<short>(buffer, bufferOffset + 2);
color.b = bytesToShort<short>(buffer, bufferOffset + 4);
color.a = 1;
return color;
}
static inline uint4 readR16G16B16_UINT(uint bufferOffset, constant uchar *buffer)
{
uint4 color;
color.r = bytesToShort<ushort>(buffer, bufferOffset);
color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
color.b = bytesToShort<ushort>(buffer, bufferOffset + 4);
color.a = 1;
return color;
}
static inline float4 readR16G16B16A16_FLOAT(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.r = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset));
color.g = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 2));
color.b = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 4));
color.a = as_type<half>(bytesToShort<ushort>(buffer, bufferOffset + 6));
return color;
}
static inline void writeR16G16B16A16_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
shortToBytes(as_type<ushort>(static_cast<half>(color.r)), bufferOffset, buffer);
shortToBytes(as_type<ushort>(static_cast<half>(color.g)), bufferOffset + 2, buffer);
shortToBytes(as_type<ushort>(static_cast<half>(color.b)), bufferOffset + 4, buffer);
shortToBytes(as_type<ushort>(static_cast<half>(color.a)), bufferOffset + 6, buffer);
}
template <typename ShortType>
static inline float4 readR16G16B16A16_NORM(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.r = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset));
color.g = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 2));
color.b = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 4));
color.a = normalizedToFloat<ShortType>(bytesToShort<ShortType>(buffer, bufferOffset + 6));
return color;
}
static inline int4 readR16G16B16A16_SINT(uint bufferOffset, constant uchar *buffer)
{
int4 color;
color.r = bytesToShort<short>(buffer, bufferOffset);
color.g = bytesToShort<short>(buffer, bufferOffset + 2);
color.b = bytesToShort<short>(buffer, bufferOffset + 4);
color.a = bytesToShort<short>(buffer, bufferOffset + 6);
return color;
}
static inline void writeR16G16B16A16_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
{
shortToBytes(static_cast<short>(color.r), bufferOffset, buffer);
shortToBytes(static_cast<short>(color.g), bufferOffset + 2, buffer);
shortToBytes(static_cast<short>(color.b), bufferOffset + 4, buffer);
shortToBytes(static_cast<short>(color.a), bufferOffset + 6, buffer);
}
static inline uint4 readR16G16B16A16_UINT(uint bufferOffset, constant uchar *buffer)
{
uint4 color;
color.r = bytesToShort<ushort>(buffer, bufferOffset);
color.g = bytesToShort<ushort>(buffer, bufferOffset + 2);
color.b = bytesToShort<ushort>(buffer, bufferOffset + 4);
color.a = bytesToShort<ushort>(buffer, bufferOffset + 6);
return color;
}
static inline void writeR16G16B16A16_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
{
shortToBytes(static_cast<ushort>(color.r), bufferOffset, buffer);
shortToBytes(static_cast<ushort>(color.g), bufferOffset + 2, buffer);
shortToBytes(static_cast<ushort>(color.b), bufferOffset + 4, buffer);
shortToBytes(static_cast<ushort>(color.a), bufferOffset + 6, buffer);
}
static inline float4 readR32_FLOAT(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
color.g = color.b = 0.0;
color.a = 1.0;
return color;
}
static inline void writeR32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
}
template <typename IntType>
static inline float4 readR32_NORM(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
color.g = color.b = 0.0;
color.a = 1.0;
return color;
}
static inline float4 readA32_FLOAT(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
color.rgb = 0.0;
return color;
}
static inline void writeA32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
intToBytes(as_type<uint>(color.a), bufferOffset, buffer);
}
static inline float4 readL32_FLOAT(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
color.a = 1.0;
return color;
}
static inline void writeL32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
}
static inline int4 readR32_SINT(uint bufferOffset, constant uchar *buffer)
{
int4 color;
color.r = bytesToInt<int>(buffer, bufferOffset);
color.g = color.b = 0;
color.a = 1;
return color;
}
static inline void writeR32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
{
intToBytes(color.r, bufferOffset, buffer);
}
static inline float4 readR32_FIXED(uint bufferOffset, constant uchar *buffer)
{
float4 color;
constexpr float kDivisor = 1.0f / (1 << 16);
color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
color.g = color.b = 0.0;
color.a = 1.0;
return color;
}
static inline uint4 readR32_UINT(uint bufferOffset, constant uchar *buffer)
{
uint4 color;
color.r = bytesToInt<uint>(buffer, bufferOffset);
color.g = color.b = 0;
color.a = 1;
return color;
}
static inline void writeR32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
{
intToBytes(color.r, bufferOffset, buffer);
}
static inline float4 readL32A32_FLOAT(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.rgb = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
return color;
}
static inline void writeL32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
intToBytes(as_type<uint>(color.a), bufferOffset + 4, buffer);
}
static inline float4 readR32G32_FLOAT(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
color.b = 0.0;
color.a = 1.0;
return color;
}
static inline void writeR32G32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer);
}
template <typename IntType>
static inline float4 readR32G32_NORM(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
color.b = 0.0;
color.a = 1.0;
return color;
}
static inline int4 readR32G32_SINT(uint bufferOffset, constant uchar *buffer)
{
int4 color;
color.r = bytesToInt<int>(buffer, bufferOffset);
color.g = bytesToInt<int>(buffer, bufferOffset + 4);
color.b = 0;
color.a = 1;
return color;
}
static inline void writeR32G32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
{
intToBytes(color.r, bufferOffset, buffer);
intToBytes(color.g, bufferOffset + 4, buffer);
}
static inline float4 readR32G32_FIXED(uint bufferOffset, constant uchar *buffer)
{
float4 color;
constexpr float kDivisor = 1.0f / (1 << 16);
color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
color.b = 0.0;
color.a = 1.0;
return color;
}
static inline uint4 readR32G32_UINT(uint bufferOffset, constant uchar *buffer)
{
uint4 color;
color.r = bytesToInt<uint>(buffer, bufferOffset);
color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
color.b = 0;
color.a = 1;
return color;
}
static inline void writeR32G32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
{
intToBytes(color.r, bufferOffset, buffer);
intToBytes(color.g, bufferOffset + 4, buffer);
}
static inline float4 readR32G32B32_FLOAT(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8));
color.a = 1.0;
return color;
}
template <typename IntType>
static inline float4 readR32G32B32_NORM(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8));
color.a = 1.0;
return color;
}
static inline int4 readR32G32B32_SINT(uint bufferOffset, constant uchar *buffer)
{
int4 color;
color.r = bytesToInt<int>(buffer, bufferOffset);
color.g = bytesToInt<int>(buffer, bufferOffset + 4);
color.b = bytesToInt<int>(buffer, bufferOffset + 8);
color.a = 1;
return color;
}
static inline float4 readR32G32B32_FIXED(uint bufferOffset, constant uchar *buffer)
{
float4 color;
constexpr float kDivisor = 1.0f / (1 << 16);
color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor;
color.a = 1.0;
return color;
}
static inline uint4 readR32G32B32_UINT(uint bufferOffset, constant uchar *buffer)
{
uint4 color;
color.r = bytesToInt<uint>(buffer, bufferOffset);
color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
color.b = bytesToInt<uint>(buffer, bufferOffset + 8);
color.a = 1;
return color;
}
static inline float4 readR32G32B32A32_FLOAT(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.r = as_type<float>(bytesToInt<uint>(buffer, bufferOffset));
color.g = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 4));
color.b = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 8));
color.a = as_type<float>(bytesToInt<uint>(buffer, bufferOffset + 12));
return color;
}
static inline void writeR32G32B32A32_FLOAT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<float, 4> color, device uchar *buffer)
{
intToBytes(as_type<uint>(color.r), bufferOffset, buffer);
intToBytes(as_type<uint>(color.g), bufferOffset + 4, buffer);
intToBytes(as_type<uint>(color.b), bufferOffset + 8, buffer);
intToBytes(as_type<uint>(color.a), bufferOffset + 12, buffer);
}
template <typename IntType>
static inline float4 readR32G32B32A32_NORM(uint bufferOffset, constant uchar *buffer)
{
float4 color;
color.r = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset));
color.g = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 4));
color.b = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 8));
color.a = normalizedToFloat<IntType>(bytesToInt<IntType>(buffer, bufferOffset + 12));
return color;
}
static inline int4 readR32G32B32A32_SINT(uint bufferOffset, constant uchar *buffer)
{
int4 color;
color.r = bytesToInt<int>(buffer, bufferOffset);
color.g = bytesToInt<int>(buffer, bufferOffset + 4);
color.b = bytesToInt<int>(buffer, bufferOffset + 8);
color.a = bytesToInt<int>(buffer, bufferOffset + 12);
return color;
}
static inline void writeR32G32B32A32_SINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<int, 4> color, device uchar *buffer)
{
intToBytes(color.r, bufferOffset, buffer);
intToBytes(color.g, bufferOffset + 4, buffer);
intToBytes(color.b, bufferOffset + 8, buffer);
intToBytes(color.a, bufferOffset + 12, buffer);
}
static inline float4 readR32G32B32A32_FIXED(uint bufferOffset, constant uchar *buffer)
{
float4 color;
constexpr float kDivisor = 1.0f / (1 << 16);
color.r = bytesToInt<int>(buffer, bufferOffset) * kDivisor;
color.g = bytesToInt<int>(buffer, bufferOffset + 4) * kDivisor;
color.b = bytesToInt<int>(buffer, bufferOffset + 8) * kDivisor;
color.a = bytesToInt<int>(buffer, bufferOffset + 12) * kDivisor;
return color;
}
static inline uint4 readR32G32B32A32_UINT(uint bufferOffset, constant uchar *buffer)
{
uint4 color;
color.r = bytesToInt<uint>(buffer, bufferOffset);
color.g = bytesToInt<uint>(buffer, bufferOffset + 4);
color.b = bytesToInt<uint>(buffer, bufferOffset + 8);
color.a = bytesToInt<uint>(buffer, bufferOffset + 12);
return color;
}
static inline void writeR32G32B32A32_UINT(ushort2 gIndices, constant WritePixelParams &options, uint bufferOffset, vec<uint, 4> color, device uchar *buffer)
{
intToBytes(color.r, bufferOffset, buffer);
intToBytes(color.g, bufferOffset + 4, buffer);
intToBytes(color.b, bufferOffset + 8, buffer);
intToBytes(color.a, bufferOffset + 12, buffer);
}
# 1292 "./copy_buffer.metal"
static inline int4 readR8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8_SINT(bufferOffset, buffer); } static inline uint4 readR8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8_UINT(bufferOffset, buffer); } static inline int4 readR8G8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8_UINT(bufferOffset, buffer); } static inline int4 readR8G8B8A8_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_SINT(bufferOffset, buffer); } static inline uint4 readR8G8B8A8_USCALED(uint bufferOffset, constant uchar *buffer) { return readR8G8B8A8_UINT(bufferOffset, buffer); }
static inline int4 readR16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16_SINT(bufferOffset, buffer); } static inline uint4 readR16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16_UINT(bufferOffset, buffer); } static inline int4 readR16G16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16_UINT(bufferOffset, buffer); } static inline int4 readR16G16B16A16_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_SINT(bufferOffset, buffer); } static inline uint4 readR16G16B16A16_USCALED(uint bufferOffset, constant uchar *buffer) { return readR16G16B16A16_UINT(bufferOffset, buffer); }
static inline int4 readR32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32_SINT(bufferOffset, buffer); } static inline uint4 readR32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32_UINT(bufferOffset, buffer); } static inline int4 readR32G32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32_UINT(bufferOffset, buffer); } static inline int4 readR32G32B32A32_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_SINT(bufferOffset, buffer); } static inline uint4 readR32G32B32A32_USCALED(uint bufferOffset, constant uchar *buffer) { return readR32G32B32A32_UINT(bufferOffset, buffer); }
static inline int4 readR10G10B10A2_SSCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_SINT(bufferOffset, buffer); } static inline uint4 readR10G10B10A2_USCALED(uint bufferOffset, constant uchar *buffer) { return readR10G10B10A2_UINT(bufferOffset, buffer); }
kernel void readFromBufferToFloatTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<float, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
{
if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
# 1336 "./copy_buffer.metal"
uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
switch (kCopyFormatType)
{
case FormatID::R5G6B5_UNORM: { auto color = readR5G6B5_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM: { auto color = readR8G8B8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = readR8G8B8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SNORM: { auto color = readR8G8B8A8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM: { auto color = readB8G8R8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = readB8G8R8A8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM: { auto color = readR8G8B8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = readR8G8B8_UNORM_SRGB(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SNORM: { auto color = readR8G8B8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8_UNORM: { auto color = readL8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L8A8_UNORM: { auto color = readL8A8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R5G5B5A1_UNORM: { auto color = readR5G5B5A1_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R4G4B4A4_UNORM: { auto color = readR4G4B4A4_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_UNORM: { auto color = readR8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8_SNORM: { auto color = readR8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UNORM: { auto color = readR8G8_UNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SNORM: { auto color = readR8G8_SNORM(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_FLOAT: { auto color = readR16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A16_FLOAT: { auto color = readA16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16_FLOAT: { auto color = readL16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L16A16_FLOAT: { auto color = readL16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_FLOAT: { auto color = readR16G16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_FLOAT: { auto color = readR16G16B16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = readR16G16B16A16_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_FLOAT: { auto color = readR32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::A32_FLOAT: { auto color = readA32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32_FLOAT: { auto color = readL32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::L32A32_FLOAT: { auto color = readL32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_FLOAT: { auto color = readR32G32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_FLOAT: { auto color = readR32G32B32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = readR32G32B32A32_FLOAT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
}
}
kernel void readFromBufferToIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<int, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
{
if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
# 1364 "./copy_buffer.metal"
uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
switch (kCopyFormatType)
{
case FormatID::R8_SINT: { auto color = readR8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_SINT: { auto color = readR8G8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_SINT: { auto color = readR8G8B8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_SINT: { auto color = readR8G8B8A8_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_SINT: { auto color = readR16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_SINT: { auto color = readR16G16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_SINT: { auto color = readR16G16B16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_SINT: { auto color = readR16G16B16A16_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_SINT: { auto color = readR32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_SINT: { auto color = readR32G32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_SINT: { auto color = readR32G32B32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_SINT: { auto color = readR32G32B32A32_SINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
}
}
kernel void readFromBufferToUIntTexture(ushort3 gIndices [[thread_position_in_grid]], constant CopyPixelParams &options[[buffer(0)]], constant uchar *buffer [[buffer(1)]], texture2d<uint, access::write> dstTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::write> dstTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::write> dstTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::write> dstTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]])
{
if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y || gIndices.z >= options.copySize.z) { return; }
# 1392 "./copy_buffer.metal"
uint bufferOffset = options.bufferStartOffset + (gIndices.z * options.bufferDepthPitch + gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
switch (kCopyFormatType)
{
case FormatID::R8_UINT: { auto color = readR8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8_UINT: { auto color = readR8G8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8_UINT: { auto color = readR8G8B8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R8G8B8A8_UINT: { auto color = readR8G8B8A8_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16_UINT: { auto color = readR16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16_UINT: { auto color = readR16G16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16_UINT: { auto color = readR16G16B16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R16G16B16A16_UINT: { auto color = readR16G16B16A16_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32_UINT: { auto color = readR32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32_UINT: { auto color = readR32G32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32_UINT: { auto color = readR32G32B32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break; case FormatID::R32G32B32A32_UINT: { auto color = readR32G32B32A32_UINT(bufferOffset, buffer); textureWrite(gIndices, options, color, dstTexture2d, dstTexture2dArray, dstTexture3d, dstTextureCube); } break;
}
}
kernel void writeFromFloatTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<float, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<float, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<float, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<float, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<float, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
{
if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
# 1439 "./copy_buffer.metal"
uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
switch (kCopyFormatType)
{
case FormatID::R5G6B5_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G6B5_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::B8G8R8A8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeB8G8R8A8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_UNORM_SRGB: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_UNORM_SRGB(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L8A8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL8A8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R5G5B5A1_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR5G5B5A1_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R4G4B4A4_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR4G4B4A4_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SNORM: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SNORM(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeA32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::L32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeL32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_FLOAT(gIndices, options, bufferOffset, color, buffer); } break;
}
}
kernel void writeFromIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<int, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<int, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<int, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<int, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<int, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
{
if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
# 1464 "./copy_buffer.metal"
uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
switch (kCopyFormatType)
{
case FormatID::R8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_SINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_SINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_SINT(gIndices, options, bufferOffset, color, buffer); } break;
}
}
kernel void writeFromUIntTextureToBuffer(ushort2 gIndices [[thread_position_in_grid]], constant WritePixelParams &options[[buffer(0)]], texture2d<uint, access::read> srcTexture2d [[texture(0), function_constant(kCopyTextureType2D)]], texture2d_array<uint, access::read> srcTexture2dArray [[texture(0), function_constant(kCopyTextureType2DArray)]], texture3d<uint, access::read> srcTexture3d [[texture(0), function_constant(kCopyTextureType3D)]], texturecube<uint, access::read> srcTextureCube [[texture(0), function_constant(kCopyTextureTypeCube)]], texture2d_ms<uint, access::read> srcTexture2dMS [[texture(0), function_constant(kCopyTextureType2DMS)]], device uchar *buffer [[buffer(1)]])
{
if (gIndices.x >= options.copySize.x || gIndices.y >= options.copySize.y) { return; }
# 1489 "./copy_buffer.metal"
uint bufferOffset = options.bufferStartOffset + (gIndices.y * options.bufferRowPitch + gIndices.x * options.pixelSize);
switch (kCopyFormatType)
{
case FormatID::R8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R8G8B8A8_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR8G8B8A8_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R16G16B16A16_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR16G16B16A16_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32_UINT(gIndices, options, bufferOffset, color, buffer); } break; case FormatID::R32G32B32A32_UINT: { auto color = textureRead(gIndices, options, srcTexture2d, srcTexture2dArray, srcTexture3d, srcTextureCube, srcTexture2dMS); writeR32G32B32A32_UINT(gIndices, options, bufferOffset, color, buffer); } break;
}
}
struct CopyVertexParams
{
uint srcBufferStartOffset;
uint srcStride;
uint srcComponentBytes;
uint srcComponents;
uchar4 srcDefaultAlphaData;
uint dstBufferStartOffset;
uint dstStride;
uint dstComponents;
uint vertexCount;
};
# 1539 "./copy_buffer.metal"
template <typename IntType>
static inline void writeFloatVertex(constant CopyVertexParams &options,
uint idx,
vec<IntType, 4> data,
device uchar *dst)
{
uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
{
floatToBytes(static_cast<float>(data[component]), dstOffset, dst);
}
}
template <>
inline void writeFloatVertex(constant CopyVertexParams &options,
uint idx,
vec<float, 4> data,
device uchar *dst)
{
uint dstOffset = idx * options.dstStride + options.dstBufferStartOffset;
for (uint component = 0; component < options.dstComponents; ++component, dstOffset += 4)
{
floatToBytes(data[component], dstOffset, dst);
}
}
static inline void convertToFloatVertexFormat(uint index,
constant CopyVertexParams &options,
constant uchar *srcBuffer,
device uchar *dstBuffer)
{
# 1585 "./copy_buffer.metal"
uint bufferOffset = options.srcBufferStartOffset + options.srcStride * index;
# 1594 "./copy_buffer.metal"
switch (kCopyFormatType)
{
case FormatID::R8_UNORM: { auto data = readR8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SNORM: { auto data = readR8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_UINT: { auto data = readR8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SINT: { auto data = readR8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_USCALED: { auto data = readR8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8_SSCALED: { auto data = readR8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UNORM: { auto data = readR8G8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SNORM: { auto data = readR8G8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_UINT: { auto data = readR8G8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SINT: { auto data = readR8G8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_USCALED: { auto data = readR8G8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8_SSCALED: { auto data = readR8G8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UNORM: { auto data = readR8G8B8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SNORM: { auto data = readR8G8B8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_UINT: { auto data = readR8G8B8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SINT: { auto data = readR8G8B8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_USCALED: { auto data = readR8G8B8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8_SSCALED: { auto data = readR8G8B8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UNORM: { auto data = readR8G8B8A8_UNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SNORM: { auto data = readR8G8B8A8_SNORM(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_UINT: { auto data = readR8G8B8A8_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SINT: { auto data = readR8G8B8A8_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_USCALED: { auto data = readR8G8B8A8_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R8G8B8A8_SSCALED: { auto data = readR8G8B8A8_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UNORM: { auto data = readR16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SNORM: { auto data = readR16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_UINT: { auto data = readR16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SINT: { auto data = readR16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_USCALED: { auto data = readR16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_SSCALED: { auto data = readR16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UNORM: { auto data = readR16G16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SNORM: { auto data = readR16G16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_UINT: { auto data = readR16G16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SINT: { auto data = readR16G16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_USCALED: { auto data = readR16G16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_SSCALED: { auto data = readR16G16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UNORM: { auto data = readR16G16B16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SNORM: { auto data = readR16G16B16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_UINT: { auto data = readR16G16B16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SINT: { auto data = readR16G16B16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_USCALED: { auto data = readR16G16B16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_SSCALED: { auto data = readR16G16B16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UNORM: { auto data = readR16G16B16A16_NORM<ushort>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SNORM: { auto data = readR16G16B16A16_NORM<short>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_UINT: { auto data = readR16G16B16A16_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SINT: { auto data = readR16G16B16A16_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_USCALED: { auto data = readR16G16B16A16_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_SSCALED: { auto data = readR16G16B16A16_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UNORM: { auto data = readR32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SNORM: { auto data = readR32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_UINT: { auto data = readR32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SINT: { auto data = readR32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_USCALED: { auto data = readR32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_SSCALED: { auto data = readR32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UNORM: { auto data = readR32G32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SNORM: { auto data = readR32G32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_UINT: { auto data = readR32G32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SINT: { auto data = readR32G32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_USCALED: { auto data = readR32G32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_SSCALED: { auto data = readR32G32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UNORM: { auto data = readR32G32B32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SNORM: { auto data = readR32G32B32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_UINT: { auto data = readR32G32B32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SINT: { auto data = readR32G32B32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_USCALED: { auto data = readR32G32B32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_SSCALED: { auto data = readR32G32B32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UNORM: { auto data = readR32G32B32A32_NORM<uint>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SNORM: { auto data = readR32G32B32A32_NORM<int>(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_UINT: { auto data = readR32G32B32A32_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SINT: { auto data = readR32G32B32A32_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_USCALED: { auto data = readR32G32B32A32_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_SSCALED: { auto data = readR32G32B32A32_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16_FLOAT: { auto data = readR16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16_FLOAT: { auto data = readR16G16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16_FLOAT: { auto data = readR16G16B16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R16G16B16A16_FLOAT: { auto data = readR16G16B16A16_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FLOAT: { auto data = readR32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FLOAT: { auto data = readR32G32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FLOAT: { auto data = readR32G32B32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FLOAT: { auto data = readR32G32B32A32_FLOAT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32_FIXED: { auto data = readR32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32_FIXED: { auto data = readR32G32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32_FIXED: { auto data = readR32G32B32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R32G32B32A32_FIXED: { auto data = readR32G32B32A32_FIXED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SINT: { auto data = readR10G10B10A2_SINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_UINT: { auto data = readR10G10B10A2_UINT(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_SSCALED: { auto data = readR10G10B10A2_SSCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break; case FormatID::R10G10B10A2_USCALED: { auto data = readR10G10B10A2_USCALED(bufferOffset, srcBuffer); writeFloatVertex(options, index, data, dstBuffer); } break;
}
}
kernel void convertToFloatVertexFormatCS(uint index [[thread_position_in_grid]],
constant CopyVertexParams &options [[buffer(0)]],
constant uchar *srcBuffer [[buffer(1)]],
device uchar *dstBuffer [[buffer(2)]])
{
if (index >= options.vertexCount) { return; };
convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
}
vertex void convertToFloatVertexFormatVS(uint index [[vertex_id]],
constant CopyVertexParams &options [[buffer(0)]],
constant uchar *srcBuffer [[buffer(1)]],
device uchar *dstBuffer [[buffer(2)]])
{
convertToFloatVertexFormat(index, options, srcBuffer, dstBuffer);
}
static inline void expandVertexFormatComponents(uint index,
constant CopyVertexParams &options,
constant uchar *srcBuffer,
device uchar *dstBuffer)
{
uint srcOffset = options.srcBufferStartOffset + options.srcStride * index;
uint dstOffset = options.dstBufferStartOffset + options.dstStride * index;
uint dstComponentsBeforeAlpha = min(options.dstComponents, 3u);
uint component;
for (component = 0; component < options.srcComponents; ++component,
srcOffset += options.srcComponentBytes, dstOffset += options.srcComponentBytes)
{
for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
{
dstBuffer[dstOffset + byte] = srcBuffer[srcOffset + byte];
}
}
for (; component < dstComponentsBeforeAlpha;
++component, dstOffset += options.srcComponentBytes)
{
for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
{
dstBuffer[dstOffset + byte] = 0;
}
}
if (component < options.dstComponents)
{
for (uint byte = 0; byte < options.srcComponentBytes; ++byte)
{
dstBuffer[dstOffset + byte] = options.srcDefaultAlphaData[byte];
}
}
}
kernel void expandVertexFormatComponentsCS(uint index [[thread_position_in_grid]],
constant CopyVertexParams &options [[buffer(0)]],
constant uchar *srcBuffer [[buffer(1)]],
device uchar *dstBuffer [[buffer(2)]])
{
if (index >= options.vertexCount) { return; };
expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
}
vertex void expandVertexFormatComponentsVS(uint index [[vertex_id]],
constant CopyVertexParams &options [[buffer(0)]],
constant uchar *srcBuffer [[buffer(1)]],
device uchar *dstBuffer [[buffer(2)]])
{
expandVertexFormatComponents(index, options, srcBuffer, dstBuffer);
}
# 6 "temp_master_source.metal" 2
# 1 "./visibility.metal" 1
constant bool kCombineWithExistingResult [[function_constant(1000)]];
struct CombineVisibilityResultOptions
{
uint startOffset;
uint numOffsets;
};
kernel void combineVisibilityResult(uint idx [[thread_position_in_grid]],
constant CombineVisibilityResultOptions &options [[buffer(0)]],
constant ushort4 *renderpassVisibilityResult [[buffer(1)]],
device ushort4 *finalResults [[buffer(2)]])
{
if (idx > 0)
{
return;
}
ushort4 finalResult16x4;
if (kCombineWithExistingResult)
{
finalResult16x4 = finalResults[0];
}
else
{
finalResult16x4 = ushort4(0, 0, 0, 0);
}
for (uint i = 0; i < options.numOffsets; ++i)
{
uint offset = options.startOffset + i;
ushort4 renderpassResult = renderpassVisibilityResult[offset];
finalResult16x4 = finalResult16x4 | renderpassResult;
}
finalResults[0] = finalResult16x4;
}
# 7 "temp_master_source.metal" 2
# 1 "./rewrite_indices.metal" 1
# 11 "./rewrite_indices.metal"
# 1 "./rewrite_indices_shared.h" 1
# 12 "./rewrite_indices.metal" 2
using namespace metal;
constant uint fixIndexBufferKey [[ function_constant(2000) ]];
constant bool indexBufferIsUint16 = (((fixIndexBufferKey >> 0U) & 0x03U) == 2U);
constant bool indexBufferIsUint32 = (((fixIndexBufferKey >> 0U) & 0x03U) == 3U);
constant bool outIndexBufferIsUint16 = (((fixIndexBufferKey >> 2U) & 0x03U) == 2U);
constant bool outIndexBufferIsUint32 = (((fixIndexBufferKey >> 2U) & 0x03U) == 3U);
constant bool doPrimRestart = (fixIndexBufferKey & 0x00100U);
constant uint fixIndexBufferMode = (fixIndexBufferKey >> 4U) & 0x0FU;
static inline uint readIdx(
const device ushort *indexBufferUint16,
const device uint *indexBufferUint32,
const uint restartIndex,
const uint indexCount,
uint idx,
thread bool &foundRestart,
thread uint &indexThatRestartedFirst
)
{
uint inIndex = idx;
if(inIndex < indexCount)
{
if(indexBufferIsUint16)
{
inIndex = indexBufferUint16[inIndex];
}
else if(indexBufferIsUint32)
{
inIndex = indexBufferUint32[inIndex];
}
}
else
{
foundRestart = true;
indexThatRestartedFirst = idx;
}
if(doPrimRestart && !foundRestart && inIndex == restartIndex)
{
foundRestart = true;
indexThatRestartedFirst = idx;
}
return inIndex;
}
static inline void outputPrimitive(
const device ushort *indexBufferUint16,
const device uint *indexBufferUint32,
device ushort *outIndexBufferUint16,
device uint *outIndexBufferUint32,
const uint restartIndex,
const uint indexCount,
thread uint &baseIndex,
uint onIndex,
thread uint &onOutIndex
)
{
if(baseIndex > onIndex) return;
bool foundRestart = false;
uint indexThatRestartedFirst = 0;
# 86 "./rewrite_indices.metal"
switch(fixIndexBufferMode)
{
case 0x00U:
{
auto tmpIndex = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex, foundRestart, indexThatRestartedFirst);
if(foundRestart)
{
baseIndex = indexThatRestartedFirst + 1;
return;
}
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex; } onOutIndex++; });
}
break;
case 0x01U:
{
auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst);
auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst);
if(foundRestart)
{
baseIndex = indexThatRestartedFirst + 1;
return;
}
if((onIndex - baseIndex) & 1) return;
if(fixIndexBufferKey & 0x00200U)
{
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
}
else
{
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
}
}
break;
case 0x03U:
{
auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst);
auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst);
if(foundRestart)
{
baseIndex = indexThatRestartedFirst + 1;
return;
}
if(fixIndexBufferKey & 0x00200U)
{
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
}
else
{
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
}
}
break;
case 0x04U:
{
auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0, foundRestart, indexThatRestartedFirst);
auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1, foundRestart, indexThatRestartedFirst);
auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst);
if(foundRestart)
{
baseIndex = indexThatRestartedFirst + 1;
return;
}
if(((onIndex - baseIndex) % 3) != 0) return;
if(fixIndexBufferKey & 0x00200U)
{
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
}
else
{
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
}
}
break;
case 0x05U:
{
uint isOdd = ((onIndex - baseIndex) & 1);
auto tmpIndex0 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 0 + isOdd, foundRestart, indexThatRestartedFirst);
auto tmpIndex1 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 1 - isOdd, foundRestart, indexThatRestartedFirst);
auto tmpIndex2 = readIdx(indexBufferUint16, indexBufferUint32, restartIndex, indexCount, onIndex + 2, foundRestart, indexThatRestartedFirst);
if(foundRestart)
{
baseIndex = indexThatRestartedFirst + 1;
return;
}
if(fixIndexBufferKey & 0x00200U)
{
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
}
else
{
if(isOdd)
{
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
}
else
{
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex0; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex0; } onOutIndex++; });
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex1; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex1; } onOutIndex++; });
({ if(outIndexBufferIsUint16) { outIndexBufferUint16[(onOutIndex)] = tmpIndex2; } if(outIndexBufferIsUint32) { outIndexBufferUint32[(onOutIndex)] = tmpIndex2; } onOutIndex++; });
}
}
assert(onOutIndex <= (onIndex + 1) * 3);
assert(onOutIndex <= (indexCount - 2) * 3);
}
break;
}
}
kernel void fixIndexBuffer(
const device ushort *indexBufferUint16 [[ buffer(0), function_constant(indexBufferIsUint16) ]],
const device uint *indexBufferUint32 [[ buffer(0), function_constant(indexBufferIsUint32) ]],
device ushort *outIndexBufferUint16 [[ buffer(1), function_constant(outIndexBufferIsUint16) ]],
device uint *outIndexBufferUint32 [[ buffer(1), function_constant(outIndexBufferIsUint32) ]],
constant uint &indexCount [[ buffer(2) ]],
constant uint &primCount [[ buffer(3) ]],
uint prim [[thread_position_in_grid]])
{
constexpr uint restartIndex = 0xFFFFFFFF;
uint baseIndex = 0;
uint onIndex = onIndex;
uint onOutIndex = onOutIndex;
if(prim < primCount)
{
switch(fixIndexBufferMode)
{
case 0x00U:
onIndex = prim;
onOutIndex = prim;
break;
case 0x01U:
onIndex = prim * 2 + 0;
onOutIndex = prim * 2 + 0;
break;
case 0x03U:
onIndex = prim;
onOutIndex = prim * 2 + 0;
break;
case 0x04U:
onIndex = prim * 3 + 0;
onOutIndex = prim * 3 + 0;
break;
case 0x05U:
onIndex = prim;
onOutIndex = prim * 3 + 0;
break;
}
outputPrimitive(indexBufferUint16, indexBufferUint32, outIndexBufferUint16, outIndexBufferUint32, restartIndex, indexCount, baseIndex, onIndex, onOutIndex);
}
}
# 8 "temp_master_source.metal" 2