| // |
| // Copyright 2020 The ANGLE Project. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| // |
| |
| #include "common.h" |
| |
| using namespace rx::mtl_shader; |
| |
| #define kThreadGroupXYZ \ |
| (kGenerateMipThreadGroupSizePerDim * kGenerateMipThreadGroupSizePerDim * \ |
| kGenerateMipThreadGroupSizePerDim) |
| |
| #define kThreadGroupXY (kGenerateMipThreadGroupSizePerDim * kGenerateMipThreadGroupSizePerDim) |
| #define kThreadGroupX kGenerateMipThreadGroupSizePerDim |
| |
| #define TEXEL_STORE(index, texel) \ |
| sR[index] = texel.r; \ |
| sG[index] = texel.g; \ |
| sB[index] = texel.b; \ |
| sA[index] = texel.a; |
| |
| #define TEXEL_LOAD(index) float4(sR[index], sG[index], sB[index], sA[index]) |
| |
| #define TO_LINEAR(texel) (options.sRGB ? sRGBtoLinear(texel) : texel) |
| |
| #define OUT_OF_BOUND_CHECK(edgeValue, targetValue, condition) \ |
| (condition) ? (edgeValue) : (targetValue) |
| |
| struct GenMipParams |
| { |
| uint srcLevel; |
| uint numMipLevelsToGen; |
| bool sRGB; |
| }; |
| |
| // NOTE(hqle): For numMipLevelsToGen > 1, this function assumes the texture is power of two. If it |
| // is not, quality will not be good. |
| kernel void generate3DMipmaps(uint lIndex [[thread_index_in_threadgroup]], |
| ushort3 gIndices [[thread_position_in_grid]], |
| texture3d<float> srcTexture [[texture(0)]], |
| texture3d<float, access::write> dstMip1 [[texture(1)]], |
| texture3d<float, access::write> dstMip2 [[texture(2)]], |
| texture3d<float, access::write> dstMip3 [[texture(3)]], |
| texture3d<float, access::write> dstMip4 [[texture(4)]], |
| constant GenMipParams &options [[buffer(0)]]) |
| { |
| ushort3 mipSize = ushort3(dstMip1.get_width(), dstMip1.get_height(), dstMip1.get_depth()); |
| bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y && gIndices.z < mipSize.z; |
| |
| constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); |
| |
| // NOTE(hqle): Use simd_group function whenever available. That could avoid barrier use. |
| |
| // Use struct of array style to avoid bank conflict. |
| threadgroup float sR[kThreadGroupXYZ]; |
| threadgroup float sG[kThreadGroupXYZ]; |
| threadgroup float sB[kThreadGroupXYZ]; |
| threadgroup float sA[kThreadGroupXYZ]; |
| |
| // ----- First mip level ------- |
| float4 texel1; |
| if (validThread) |
| { |
| float3 texCoords = (float3(gIndices) + float3(0.5, 0.5, 0.5)) / float3(mipSize); |
| texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); |
| |
| // Write to texture |
| dstMip1.write(texel1, gIndices); |
| } |
| else |
| { |
| // This will invalidate all subsequent checks |
| lIndex = 0xffffffff; |
| } |
| |
| if (options.numMipLevelsToGen == 1) |
| { |
| return; |
| } |
| |
| // ---- Second mip level -------- |
| |
| // Write to shared memory |
| if (options.sRGB) |
| { |
| texel1 = linearToSRGB(texel1); |
| } |
| TEXEL_STORE(lIndex, texel1); |
| |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| |
| // Index must be even |
| if ((lIndex & 0x49) == 0) // (lIndex & b1001001) == 0 |
| { |
| bool3 atEdge = gIndices == (mipSize - ushort3(1)); |
| |
| // (x+1, y, z) |
| // If the width of mip is 1, texel2 will equal to texel1: |
| float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 1), atEdge.x); |
| // (x, y+1, z) |
| float4 texel3 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + kThreadGroupX), atEdge.y); |
| // (x, y, z+1) |
| float4 texel4 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + kThreadGroupXY), atEdge.z); |
| // (x+1, y+1, z) |
| float4 texel5 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (kThreadGroupX + 1)), |
| atEdge.x | atEdge.y); |
| // (x+1, y, z+1) |
| float4 texel6 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (kThreadGroupXY + 1)), |
| atEdge.x | atEdge.z); |
| // (x, y+1, z+1) |
| float4 texel7 = OUT_OF_BOUND_CHECK( |
| texel3, TEXEL_LOAD(lIndex + (kThreadGroupXY + kThreadGroupX)), atEdge.y | atEdge.z); |
| // (x+1, y+1, z+1) |
| float4 texel8 = |
| OUT_OF_BOUND_CHECK(texel5, TEXEL_LOAD(lIndex + (kThreadGroupXY + kThreadGroupX + 1)), |
| atEdge.x | atEdge.y | atEdge.z); |
| |
| texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; |
| |
| dstMip2.write(TO_LINEAR(texel1), gIndices >> 1); |
| |
| // Write to shared memory |
| TEXEL_STORE(lIndex, texel1); |
| } |
| |
| if (options.numMipLevelsToGen == 2) |
| { |
| return; |
| } |
| |
| // ---- 3rd mip level -------- |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| |
| // Index must be multiple of 4 |
| if ((lIndex & 0xdb) == 0) // (lIndex & b11011011) == 0 |
| { |
| mipSize = max(mipSize >> 1, ushort3(1)); |
| bool3 atEdge = (gIndices >> 1) == (mipSize - ushort3(1)); |
| |
| // (x+1, y, z) |
| // If the width of mip is 1, texel2 will equal to texel1: |
| float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 2), atEdge.x); |
| // (x, y+1, z) |
| float4 texel3 = |
| OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + (2 * kThreadGroupX)), atEdge.y); |
| // (x, y, z+1) |
| float4 texel4 = |
| OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + (2 * kThreadGroupXY)), atEdge.z); |
| // (x+1, y+1, z) |
| float4 texel5 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (2 * kThreadGroupX + 2)), |
| atEdge.x | atEdge.y); |
| // (x+1, y, z+1) |
| float4 texel6 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (2 * kThreadGroupXY + 2)), |
| atEdge.x | atEdge.z); |
| // (x, y+1, z+1) |
| float4 texel7 = OUT_OF_BOUND_CHECK( |
| texel3, TEXEL_LOAD(lIndex + (2 * kThreadGroupXY + 2 * kThreadGroupX)), |
| atEdge.y | atEdge.z); |
| // (x+1, y+1, z+1) |
| float4 texel8 = OUT_OF_BOUND_CHECK( |
| texel5, TEXEL_LOAD(lIndex + (2 * kThreadGroupXY + 2 * kThreadGroupX + 2)), |
| atEdge.x | atEdge.y | atEdge.z); |
| |
| texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; |
| |
| dstMip3.write(TO_LINEAR(texel1), gIndices >> 2); |
| |
| // Write to shared memory |
| TEXEL_STORE(lIndex, texel1); |
| } |
| |
| if (options.numMipLevelsToGen == 3) |
| { |
| return; |
| } |
| |
| // ---- 4th mip level -------- |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| |
| // Index must be multiple of 8 |
| if ((lIndex & 0x1ff) == 0) // (lIndex & b111111111) == 0 |
| { |
| mipSize = max(mipSize >> 1, ushort3(1)); |
| bool3 atEdge = (gIndices >> 2) == (mipSize - ushort3(1)); |
| |
| // (x+1, y, z) |
| // If the width of mip is 1, texel2 will equal to texel1: |
| float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 4), atEdge.x); |
| // (x, y+1, z) |
| float4 texel3 = |
| OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + (4 * kThreadGroupX)), atEdge.y); |
| // (x, y, z+1) |
| float4 texel4 = |
| OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + (4 * kThreadGroupXY)), atEdge.z); |
| // (x+1, y+1, z) |
| float4 texel5 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (4 * kThreadGroupX + 4)), |
| atEdge.x | atEdge.y); |
| // (x+1, y, z+1) |
| float4 texel6 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (4 * kThreadGroupXY + 4)), |
| atEdge.x | atEdge.z); |
| // (x, y+1, z+1) |
| float4 texel7 = OUT_OF_BOUND_CHECK( |
| texel3, TEXEL_LOAD(lIndex + (4 * kThreadGroupXY + 4 * kThreadGroupX)), |
| atEdge.y | atEdge.z); |
| // (x+1, y+1, z+1) |
| float4 texel8 = OUT_OF_BOUND_CHECK( |
| texel5, TEXEL_LOAD(lIndex + (4 * kThreadGroupXY + 4 * kThreadGroupX + 4)), |
| atEdge.x | atEdge.y | atEdge.z); |
| |
| texel1 = (texel1 + texel2 + texel3 + texel4 + texel5 + texel6 + texel7 + texel8) / 8.0; |
| |
| dstMip4.write(TO_LINEAR(texel1), gIndices >> 3); |
| } |
| } |
| |
| kernel void generate2DMipmaps(uint lIndex [[thread_index_in_threadgroup]], |
| ushort2 gIndices [[thread_position_in_grid]], |
| texture2d<float> srcTexture [[texture(0)]], |
| texture2d<float, access::write> dstMip1 [[texture(1)]], |
| texture2d<float, access::write> dstMip2 [[texture(2)]], |
| texture2d<float, access::write> dstMip3 [[texture(3)]], |
| texture2d<float, access::write> dstMip4 [[texture(4)]], |
| constant GenMipParams &options [[buffer(0)]]) |
| { |
| uint firstMipLevel = options.srcLevel + 1; |
| ushort2 mipSize = |
| ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); |
| bool validThread = gIndices.x < mipSize.x && gIndices.y < mipSize.y; |
| |
| constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); |
| |
| // NOTE(hqle): Use simd_group function whenever available. That could avoid barrier use. |
| |
| // Use struct of array style to avoid bank conflict. |
| threadgroup float sR[kThreadGroupXY]; |
| threadgroup float sG[kThreadGroupXY]; |
| threadgroup float sB[kThreadGroupXY]; |
| threadgroup float sA[kThreadGroupXY]; |
| |
| // ----- First mip level ------- |
| float4 texel1; |
| if (validThread) |
| { |
| float2 texCoords = (float2(gIndices) + float2(0.5, 0.5)) / float2(mipSize); |
| texel1 = srcTexture.sample(textureSampler, texCoords, level(options.srcLevel)); |
| |
| // Write to texture |
| dstMip1.write(TO_LINEAR(texel1), gIndices); |
| } |
| else |
| { |
| // This will invalidate all subsequent checks |
| lIndex = 0xffffffff; |
| } |
| |
| if (options.numMipLevelsToGen == 1) |
| { |
| return; |
| } |
| |
| // ---- Second mip level -------- |
| |
| // Write to shared memory |
| TEXEL_STORE(lIndex, texel1); |
| |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| |
| // Index must be even |
| if ((lIndex & 0x09) == 0) // (lIndex & b001001) == 0 |
| { |
| bool2 atEdge = gIndices == (mipSize - ushort2(1)); |
| |
| // (x+1, y) |
| // If the width of mip is 1, texel2 will equal to texel1: |
| float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 1), atEdge.x); |
| // (x, y+1) |
| float4 texel3 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + kThreadGroupX), atEdge.y); |
| // (x+1, y+1) |
| float4 texel4 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (kThreadGroupX + 1)), |
| atEdge.x | atEdge.y); |
| |
| texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; |
| |
| dstMip2.write(TO_LINEAR(texel1), gIndices >> 1); |
| |
| // Write to shared memory |
| TEXEL_STORE(lIndex, texel1); |
| } |
| |
| if (options.numMipLevelsToGen == 2) |
| { |
| return; |
| } |
| |
| // ---- 3rd mip level -------- |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| |
| // Index must be multiple of 4 |
| if ((lIndex & 0x1b) == 0) // (lIndex & b011011) == 0 |
| { |
| mipSize = max(mipSize >> 1, ushort2(1)); |
| bool2 atEdge = (gIndices >> 1) == (mipSize - ushort2(1)); |
| |
| // (x+1, y) |
| // If the width of mip is 1, texel2 will equal to texel1: |
| float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 2), atEdge.x); |
| // (x, y+1) |
| float4 texel3 = |
| OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 2 * kThreadGroupX), atEdge.y); |
| // (x+1, y+1) |
| float4 texel4 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (2 * kThreadGroupX + 2)), |
| atEdge.x | atEdge.y); |
| |
| texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; |
| |
| dstMip3.write(TO_LINEAR(texel1), gIndices >> 2); |
| |
| // Write to shared memory |
| TEXEL_STORE(lIndex, texel1); |
| } |
| |
| if (options.numMipLevelsToGen == 3) |
| { |
| return; |
| } |
| |
| // ---- 4th mip level -------- |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| |
| // Index must be multiple of 8 |
| if ((lIndex & 0x3f) == 0) // (lIndex & b111111) == 0 |
| { |
| mipSize = max(mipSize >> 1, ushort2(1)); |
| bool2 atEdge = (gIndices >> 2) == (mipSize - ushort2(1)); |
| |
| // (x+1, y) |
| // If the width of mip is 1, texel2 will equal to texel1: |
| float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 4), atEdge.x); |
| // (x, y+1) |
| float4 texel3 = |
| OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 4 * kThreadGroupX), atEdge.y); |
| // (x+1, y+1) |
| float4 texel4 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (4 * kThreadGroupX + 4)), |
| atEdge.x | atEdge.y); |
| |
| texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; |
| |
| dstMip4.write(TO_LINEAR(texel1), gIndices >> 3); |
| } |
| } |
| |
| template <typename TextureTypeR, typename TextureTypeW> |
| static __attribute__((always_inline)) void generateCubeOr2DArray2ndAndMoreMipmaps( |
| uint lIndex, |
| ushort3 gIndices, |
| TextureTypeR srcTexture, |
| TextureTypeW dstMip2, |
| TextureTypeW dstMip3, |
| TextureTypeW dstMip4, |
| ushort2 mip1Size, |
| float4 mip1Texel, |
| threadgroup float *sR, |
| threadgroup float *sG, |
| threadgroup float *sB, |
| threadgroup float *sA, |
| constant GenMipParams &options) |
| { |
| ushort2 mipSize = mip1Size; |
| float4 texel1 = mip1Texel; |
| |
| // ---- Second mip level -------- |
| |
| // Write to shared memory |
| TEXEL_STORE(lIndex, texel1); |
| |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| |
| // Index must be even |
| if ((lIndex & 0x09) == 0) // (lIndex & b001001) == 0 |
| { |
| bool2 atEdge = gIndices.xy == (mipSize - ushort2(1)); |
| |
| // (x+1, y) |
| // If the width of mip is 1, texel2 will equal to texel1: |
| float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 1), atEdge.x); |
| // (x, y+1) |
| float4 texel3 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + kThreadGroupX), atEdge.y); |
| // (x+1, y+1) |
| float4 texel4 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (kThreadGroupX + 1)), |
| atEdge.x | atEdge.y); |
| |
| texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; |
| |
| dstMip2.write(TO_LINEAR(texel1), gIndices.xy >> 1, gIndices.z); |
| |
| // Write to shared memory |
| TEXEL_STORE(lIndex, texel1); |
| } |
| |
| if (options.numMipLevelsToGen == 2) |
| { |
| return; |
| } |
| |
| // ---- 3rd mip level -------- |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| |
| // Index must be multiple of 4 |
| if ((lIndex & 0x1b) == 0) // (lIndex & b011011) == 0 |
| { |
| mipSize = max(mipSize >> 1, ushort2(1)); |
| bool2 atEdge = (gIndices.xy >> 1) == (mipSize - ushort2(1)); |
| |
| // (x+1, y) |
| // If the width of mip is 1, texel2 will equal to texel1: |
| float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 2), atEdge.x); |
| // (x, y+1) |
| float4 texel3 = |
| OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 2 * kThreadGroupX), atEdge.y); |
| // (x+1, y+1) |
| float4 texel4 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (2 * kThreadGroupX + 2)), |
| atEdge.x | atEdge.y); |
| |
| texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; |
| |
| dstMip3.write(TO_LINEAR(texel1), gIndices.xy >> 2, gIndices.z); |
| |
| // Write to shared memory |
| TEXEL_STORE(lIndex, texel1); |
| } |
| |
| if (options.numMipLevelsToGen == 3) |
| { |
| return; |
| } |
| |
| // ---- 4th mip level -------- |
| threadgroup_barrier(mem_flags::mem_threadgroup); |
| |
| // Index must be multiple of 8 |
| if ((lIndex & 0x3f) == 0) // (lIndex & b111111) == 0 |
| { |
| mipSize = max(mipSize >> 1, ushort2(1)); |
| bool2 atEdge = (gIndices.xy >> 2) == (mipSize - ushort2(1)); |
| |
| // (x+1, y) |
| // If the width of mip is 1, texel2 will equal to texel1: |
| float4 texel2 = OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 4), atEdge.x); |
| // (x, y+1) |
| float4 texel3 = |
| OUT_OF_BOUND_CHECK(texel1, TEXEL_LOAD(lIndex + 4 * kThreadGroupX), atEdge.y); |
| // (x+1, y+1) |
| float4 texel4 = OUT_OF_BOUND_CHECK(texel2, TEXEL_LOAD(lIndex + (4 * kThreadGroupX + 4)), |
| atEdge.x | atEdge.y); |
| |
| texel1 = (texel1 + texel2 + texel3 + texel4) / 4.0; |
| |
| dstMip4.write(TO_LINEAR(texel1), gIndices.xy >> 3, gIndices.z); |
| } |
| } |
| |
| kernel void generateCubeMipmaps(uint lIndex [[thread_index_in_threadgroup]], |
| ushort3 gIndices [[thread_position_in_grid]], |
| texturecube<float> srcTexture [[texture(0)]], |
| texturecube<float, access::write> dstMip1 [[texture(1)]], |
| texturecube<float, access::write> dstMip2 [[texture(2)]], |
| texturecube<float, access::write> dstMip3 [[texture(3)]], |
| texturecube<float, access::write> dstMip4 [[texture(4)]], |
| constant GenMipParams &options [[buffer(0)]]) |
| { |
| uint firstMipLevel = options.srcLevel + 1; |
| ushort2 mip1Size = |
| ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); |
| bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; |
| |
| constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); |
| |
| // ----- First mip level ------- |
| float4 mip1Texel; |
| if (validThread) |
| { |
| float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); |
| mip1Texel = srcTexture.sample(textureSampler, cubeTexcoords(texCoords, int(gIndices.z)), |
| level(options.srcLevel)); |
| |
| // Write to texture |
| dstMip1.write(TO_LINEAR(mip1Texel), gIndices.xy, gIndices.z); |
| } |
| else |
| { |
| // This will invalidate all subsequent checks |
| lIndex = 0xffffffff; |
| } |
| |
| if (options.numMipLevelsToGen == 1) |
| { |
| return; |
| } |
| |
| // Use struct of array style to avoid bank conflict. |
| threadgroup float sR[kThreadGroupXY]; |
| threadgroup float sG[kThreadGroupXY]; |
| threadgroup float sB[kThreadGroupXY]; |
| threadgroup float sA[kThreadGroupXY]; |
| |
| generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, |
| mip1Size, mip1Texel, sR, sG, sB, sA, options); |
| } |
| |
| kernel void generate2DArrayMipmaps(uint lIndex [[thread_index_in_threadgroup]], |
| ushort3 gIndices [[thread_position_in_grid]], |
| texture2d_array<float> srcTexture [[texture(0)]], |
| texture2d_array<float, access::write> dstMip1 [[texture(1)]], |
| texture2d_array<float, access::write> dstMip2 [[texture(2)]], |
| texture2d_array<float, access::write> dstMip3 [[texture(3)]], |
| texture2d_array<float, access::write> dstMip4 [[texture(4)]], |
| constant GenMipParams &options [[buffer(0)]]) |
| { |
| uint firstMipLevel = options.srcLevel + 1; |
| ushort2 mip1Size = |
| ushort2(srcTexture.get_width(firstMipLevel), srcTexture.get_height(firstMipLevel)); |
| bool validThread = gIndices.x < mip1Size.x && gIndices.y < mip1Size.y; |
| |
| constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); |
| |
| // ----- First mip level ------- |
| float4 mip1Texel; |
| if (validThread) |
| { |
| float2 texCoords = (float2(gIndices.xy) + float2(0.5, 0.5)) / float2(mip1Size); |
| mip1Texel = |
| srcTexture.sample(textureSampler, texCoords, gIndices.z, level(options.srcLevel)); |
| |
| // Write to texture |
| dstMip1.write(TO_LINEAR(mip1Texel), gIndices.xy, gIndices.z); |
| } |
| else |
| { |
| // This will invalidate all subsequent checks |
| lIndex = 0xffffffff; |
| } |
| |
| if (options.numMipLevelsToGen == 1) |
| { |
| return; |
| } |
| |
| // Use struct of array style to avoid bank conflict. |
| threadgroup float sR[kThreadGroupXY]; |
| threadgroup float sG[kThreadGroupXY]; |
| threadgroup float sB[kThreadGroupXY]; |
| threadgroup float sA[kThreadGroupXY]; |
| |
| generateCubeOr2DArray2ndAndMoreMipmaps(lIndex, gIndices, srcTexture, dstMip2, dstMip3, dstMip4, |
| mip1Size, mip1Texel, sR, sG, sB, sA, options); |
| } |