[WHLSL] Matrix memory layout should match HLSL by laying out columns linearly https://bugs.webkit.org/show_bug.cgi?id=199215 Reviewed by Myles C. Maxfield. Source/WebCore: This patch makes it so that we lay out matrices in memory in the same way HLSL does. This is by laying out columns linearly in memory. So a float4x4 composed by this series of floats in memory: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] composes this logical matrix: [[ 0, 4, 8, 12] [ 1, 5, 9, 13] [ 2, 6, 10, 14] [ 3, 7, 11, 15]] To implement this, we switch to using an array to represent the memory contents linear memory layout of a matrix. So the matrix float4x3 will now be an array<float, 12> in metal. Then, we change the indexed getter and setter methods for matrices to load and store from and to the correct memory locations. The memory layout of matrices is observable to WHLSL when using a matrix as an input/output to a shader. Test: webgpu/whlsl-matrix-memory-layout.html * Modules/webgpu/WHLSL/Metal/WHLSLNativeFunctionWriter.cpp: (WebCore::WHLSL::Metal::writeNativeFunction): * Modules/webgpu/WHLSL/Metal/WHLSLNativeTypeWriter.cpp: (WebCore::WHLSL::Metal::writeNativeType): LayoutTests: * webgpu/whlsl-matrix-memory-layout-expected.txt: Added. * webgpu/whlsl-matrix-memory-layout.html: Added. * webgpu/whlsl-test-harness-test.html: git-svn-id: http://svn.webkit.org/repository/webkit/trunk@247468 268f45cc-cd09-0410-ab3c-d52691b4dbfc

commit: 095943d286f615698d511d73ddf00eab247b2ab3 [log] [tgz]
author: sbarati@apple.com <sbarati@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc> Tue Jul 16 03:50:25 2019 +0000
committer: sbarati@apple.com <sbarati@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc> Tue Jul 16 03:50:25 2019 +0000
tree: 41a1f79a5b96a2ec469bc271b8e159bdcf703800
parent: c471ec24de521f338db464794f1edd9185ad38bf [diff]
diff --git a/LayoutTests/ChangeLog b/LayoutTests/ChangeLog
index 9f4e7db..365a177 100644
--- a/LayoutTests/ChangeLog
+++ b/LayoutTests/ChangeLog

@@ -1,3 +1,14 @@
+2019-07-15  Saam Barati  <sbarati@apple.com>
+
+        [WHLSL] Matrix memory layout should match HLSL by laying out columns linearly
+        https://bugs.webkit.org/show_bug.cgi?id=199215
+
+        Reviewed by Myles C. Maxfield.
+
+        * webgpu/whlsl-matrix-memory-layout-expected.txt: Added.
+        * webgpu/whlsl-matrix-memory-layout.html: Added.
+        * webgpu/whlsl-test-harness-test.html:
+
 2019-07-15  Wenson Hsieh  <wenson_hsieh@apple.com>
 
         [Text autosizing] [iPadOS] Product label text is clipped in portrait mode on the front page of sephora.com

diff --git a/LayoutTests/webgpu/whlsl-matrix-memory-layout-expected.txt b/LayoutTests/webgpu/whlsl-matrix-memory-layout-expected.txt
new file mode 100644
index 0000000..4882f90
--- /dev/null
+++ b/LayoutTests/webgpu/whlsl-matrix-memory-layout-expected.txt

@@ -0,0 +1,5 @@
+PASS 
+PASS successfullyParsed is true
+
+TEST COMPLETE
+

diff --git a/LayoutTests/webgpu/whlsl-matrix-memory-layout.html b/LayoutTests/webgpu/whlsl-matrix-memory-layout.html
new file mode 100644
index 0000000..eb08148
--- /dev/null
+++ b/LayoutTests/webgpu/whlsl-matrix-memory-layout.html

@@ -0,0 +1,139 @@
+<!DOCTYPE html>
+<html>
+<head>
+<script src="js/webgpu-functions.js"></script>
+<script src="../resources/js-test-pre.js"></script>
+</head>
+<body>
+<script>
+const shaderSource = `
+[numthreads(1, 1, 1)]
+compute void computeShader(device float4x4[] buffer : register(u0), float3 threadID : SV_DispatchThreadID) {
+    float4x4 result;
+    result[0] = float4(1, 2, 3, 4);
+    result[1] = float4(5, 6, 7, 8);
+    result[2] = float4(9, 10, 11, 12);
+    result[3] = float4(13, 14, 15, 16);
+
+    float4x4 mat = buffer[0];
+
+    if (mat[0][0] == 0
+        && mat[0][1] == 1
+        && mat[0][2] == 2
+        && mat[0][3] == 3
+
+        && mat[1][0] == 4
+        && mat[1][1] == 5
+        && mat[1][2] == 6
+        && mat[1][3] == 7
+
+        && mat[2][0] == 8
+        && mat[2][1] == 9
+        && mat[2][2] == 10
+        && mat[2][3] == 11
+
+        && mat[3][0] == 12
+        && mat[3][1] == 13
+        && mat[3][2] == 14
+        && mat[3][3] == 15) {
+        buffer[0] = result;
+    }
+}
+`;
+
+async function start(device) {
+    const shaderModule = device.createShaderModule({code: shaderSource, isWHLSL: true});
+    const computeStage = {module: shaderModule, entryPoint: "computeShader"};
+
+    const bindGroupLayoutDescriptor = {bindings: [{binding: 0, visibility: 7, type: "storage-buffer"}]};
+    const bindGroupLayout = device.createBindGroupLayout(bindGroupLayoutDescriptor);
+    const pipelineLayoutDescriptor = {bindGroupLayouts: [bindGroupLayout]};
+    const pipelineLayout = device.createPipelineLayout(pipelineLayoutDescriptor);
+
+    const computePipelineDescriptor = {computeStage, layout: pipelineLayout};
+    const computePipeline = device.createComputePipeline(computePipelineDescriptor);
+
+    const size = Float32Array.BYTES_PER_ELEMENT * 16;
+
+    const bufferDescriptor = {size, usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.TRANSFER_SRC};
+    const buffer = device.createBuffer(bufferDescriptor);
+    const bufferArrayBuffer = await buffer.mapWriteAsync();
+    const bufferFloat32Array = new Float32Array(bufferArrayBuffer);
+    bufferFloat32Array[0] = 0;
+    bufferFloat32Array[1] = 4;
+    bufferFloat32Array[2] = 8;
+    bufferFloat32Array[3] = 12;
+    bufferFloat32Array[4] = 1;
+    bufferFloat32Array[5] = 5;
+    bufferFloat32Array[6] = 9;
+    bufferFloat32Array[7] = 13;
+    bufferFloat32Array[8] = 2;
+    bufferFloat32Array[9] = 6;
+    bufferFloat32Array[10] = 10;
+    bufferFloat32Array[11] = 14;
+    bufferFloat32Array[12] = 3;
+    bufferFloat32Array[13] = 7;
+    bufferFloat32Array[14] = 11;
+    bufferFloat32Array[15] = 15;
+    buffer.unmap();
+
+    const resultsBufferDescriptor = {size, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.TRANSFER_DST | GPUBufferUsage.MAP_READ};
+    const resultsBuffer = device.createBuffer(resultsBufferDescriptor);
+
+    const bufferBinding = {buffer: resultsBuffer, size};
+    const bindGroupBinding = {binding: 0, resource: bufferBinding};
+    const bindGroupDescriptor = {layout: bindGroupLayout, bindings: [bindGroupBinding]};
+    const bindGroup = device.createBindGroup(bindGroupDescriptor);
+
+    const commandEncoder = device.createCommandEncoder(); // {}
+    commandEncoder.copyBufferToBuffer(buffer, 0, resultsBuffer, 0, size);
+    const computePassEncoder = commandEncoder.beginComputePass();
+    computePassEncoder.setPipeline(computePipeline);
+    computePassEncoder.setBindGroup(0, bindGroup);
+    computePassEncoder.dispatch(2, 1, 1);
+    computePassEncoder.endPass();
+    const commandBuffer = commandEncoder.finish();
+    device.getQueue().submit([commandBuffer]);
+
+    const resultsArrayBuffer = await resultsBuffer.mapReadAsync();
+    const resultsFloat32Array = new Float32Array(resultsArrayBuffer);
+    if (resultsFloat32Array[0] === 1
+        && resultsFloat32Array[1] === 5
+        && resultsFloat32Array[2] === 9
+        && resultsFloat32Array[3] === 13
+
+        && resultsFloat32Array[4] === 2
+        && resultsFloat32Array[5] === 6
+        && resultsFloat32Array[6] === 10
+        && resultsFloat32Array[7] === 14
+
+        && resultsFloat32Array[8] === 3
+        && resultsFloat32Array[9] === 7
+        && resultsFloat32Array[10] === 11
+        && resultsFloat32Array[11] === 15
+
+        && resultsFloat32Array[12] === 4
+        && resultsFloat32Array[13] === 8
+        && resultsFloat32Array[14] === 12
+        && resultsFloat32Array[15] === 16)
+        testPassed("");
+    else
+        testFailed("");
+    resultsBuffer.unmap();
+}
+window.jsTestIsAsync = true;
+getBasicDevice().then(function(device) {
+    start(device).then(function() {
+        finishJSTest();
+    }, function() {
+        testFailed("");
+        finishJSTest();
+    });
+}, function() {
+    testPassed("");
+    finishJSTest();
+});
+</script>
+<script src="../resources/js-test-post.js"></script>
+</body>
+</html>

diff --git a/LayoutTests/webgpu/whlsl-test-harness-test.html b/LayoutTests/webgpu/whlsl-test-harness-test.html
index cd1ac1f..4529146 100644
--- a/LayoutTests/webgpu/whlsl-test-harness-test.html
+++ b/LayoutTests/webgpu/whlsl-test-harness-test.html

@@ -42,8 +42,9 @@
     "float4x4": makeFloat4x4
 };
 
-const float4x4expected = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
-const float4expected = float4x4expected.slice(0, 4);
+const float4x4expected = [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15];
+const float4x4ColumnExpected = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
+const float4expected = [0, 1, 2, 3];
 
 let whlslTests = {};
 
@@ -172,7 +173,7 @@
         arg[i] = i;
         multiple4x4args.push(arg);
     }
-    checkArrays("float4x4", "return in0 + in1 + in2 + in3 + in4 + in5 + in6 + in7 + in8 + in9 + in10 + in11 + in12 + in13 + in14 + in15;", multiple4x4args, float4x4expected);
+    checkArrays("float4x4", "return in0 + in1 + in2 + in3 + in4 + in5 + in6 + in7 + in8 + in9 + in10 + in11 + in12 + in13 + in14 + in15;", multiple4x4args, float4x4ColumnExpected);
     checkArrays("float4x4", "return in0[0];", [[float4x4expected]], float4x4expected);
 };
 

diff --git a/Source/WebCore/ChangeLog b/Source/WebCore/ChangeLog
index 3851112..72ec529 100644
--- a/Source/WebCore/ChangeLog
+++ b/Source/WebCore/ChangeLog

@@ -1,3 +1,35 @@
+2019-07-15  Saam Barati  <sbarati@apple.com>
+
+        [WHLSL] Matrix memory layout should match HLSL by laying out columns linearly
+        https://bugs.webkit.org/show_bug.cgi?id=199215
+
+        Reviewed by Myles C. Maxfield.
+
+        This patch makes it so that we lay out matrices in memory in the same
+        way HLSL does. This is by laying out columns linearly in memory. So a float4x4
+        composed by this series of floats in memory:
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+        
+        composes this logical matrix:
+        [[ 0,  4,  8, 12]
+         [ 1,  5,  9, 13]
+         [ 2,  6, 10, 14]
+         [ 3,  7, 11, 15]]
+        
+        To implement this, we switch to using an array to represent the memory
+        contents linear memory layout of a matrix. So the matrix float4x3 will now
+        be an array<float, 12> in metal. Then, we change the indexed getter and
+        setter methods for matrices to load and store from and to the correct
+        memory locations. The memory layout of matrices is observable to WHLSL
+        when using a matrix as an input/output to a shader.
+
+        Test: webgpu/whlsl-matrix-memory-layout.html
+
+        * Modules/webgpu/WHLSL/Metal/WHLSLNativeFunctionWriter.cpp:
+        (WebCore::WHLSL::Metal::writeNativeFunction):
+        * Modules/webgpu/WHLSL/Metal/WHLSLNativeTypeWriter.cpp:
+        (WebCore::WHLSL::Metal::writeNativeType):
+
 2019-07-15  Wenson Hsieh  <wenson_hsieh@apple.com>
 
         [Text autosizing] [iPadOS] Product label text is clipped in portrait mode on the front page of sephora.com

diff --git a/Source/WebCore/Modules/webgpu/WHLSL/Metal/WHLSLNativeFunctionWriter.cpp b/Source/WebCore/Modules/webgpu/WHLSL/Metal/WHLSLNativeFunctionWriter.cpp
index f250faa..40b3ecf 100644
--- a/Source/WebCore/Modules/webgpu/WHLSL/Metal/WHLSLNativeFunctionWriter.cpp
+++ b/Source/WebCore/Modules/webgpu/WHLSL/Metal/WHLSLNativeFunctionWriter.cpp

@@ -250,12 +250,18 @@
         return stringBuilder.toString();
     }
 
-    auto numberOfMatrixRows = [&] {
+    auto matrixDimension = [&] (unsigned typeArgumentIndex) -> unsigned {
         auto& typeReference = downcast<AST::TypeReference>(*nativeFunctionDeclaration.parameters()[0]->type());
         auto& matrixType = downcast<AST::NativeTypeDeclaration>(downcast<AST::TypeReference>(downcast<AST::TypeDefinition>(typeReference.resolvedType()).type()).resolvedType());
         ASSERT(matrixType.name() == "matrix");
         ASSERT(matrixType.typeArguments().size() == 3);
-        return String::number(WTF::get<AST::ConstantExpression>(matrixType.typeArguments()[1]).integerLiteral().value());
+        return WTF::get<AST::ConstantExpression>(matrixType.typeArguments()[typeArgumentIndex]).integerLiteral().value();
+    };
+    auto numberOfMatrixRows = [&] {
+        return matrixDimension(1);
+    };
+    auto numberOfMatrixColumns = [&] {
+        return matrixDimension(2);
     };
 
     if (nativeFunctionDeclaration.name() == "operator[]") {
@@ -263,9 +269,20 @@
         auto metalParameter1Name = typeNamer.mangledNameForType(*nativeFunctionDeclaration.parameters()[0]->type());
         auto metalParameter2Name = typeNamer.mangledNameForType(*nativeFunctionDeclaration.parameters()[1]->type());
         auto metalReturnName = typeNamer.mangledNameForType(nativeFunctionDeclaration.type());
+
+        unsigned numberOfRows = numberOfMatrixRows();
+        unsigned numberOfColumns = numberOfMatrixColumns();
+
         stringBuilder.append(makeString(metalReturnName, ' ', outputFunctionName, '(', metalParameter1Name, " m, ", metalParameter2Name, " i) {\n"));
-        stringBuilder.append(makeString("    if (i < ", numberOfMatrixRows(), ") return m[i];\n"));
-        stringBuilder.append(makeString("    return ", metalReturnName, "(0);\n"));
+        stringBuilder.append(makeString("    if (i >= ", numberOfRows, ") return ", metalReturnName, "(0);\n"));
+        stringBuilder.append(makeString("    ", metalReturnName, " result;\n"));
+        stringBuilder.append("    result[0] = m[i];\n");
+        stringBuilder.append(makeString("    result[1] = m[i + ", numberOfRows, "];\n"));
+        if (numberOfColumns >= 3)
+            stringBuilder.append(makeString("    result[2] = m[i + ", numberOfRows * 2, "];\n"));
+        if (numberOfColumns >= 4)
+            stringBuilder.append(makeString("    result[3] = m[i + ", numberOfRows * 3, "];\n"));
+        stringBuilder.append("    return result;\n");
         stringBuilder.append("}\n");
         return stringBuilder.toString();
     }
@@ -276,9 +293,19 @@
         auto metalParameter2Name = typeNamer.mangledNameForType(*nativeFunctionDeclaration.parameters()[1]->type());
         auto metalParameter3Name = typeNamer.mangledNameForType(*nativeFunctionDeclaration.parameters()[2]->type());
         auto metalReturnName = typeNamer.mangledNameForType(nativeFunctionDeclaration.type());
+
+        unsigned numberOfRows = numberOfMatrixRows();
+        unsigned numberOfColumns = numberOfMatrixColumns();
+
         stringBuilder.append(makeString(metalReturnName, ' ', outputFunctionName, '(', metalParameter1Name, " m, ", metalParameter2Name, " i, ", metalParameter3Name, " v) {\n"));
-        stringBuilder.append(makeString("    if (i < ", numberOfMatrixRows(), ") m[i] = v;\n"));
-        stringBuilder.append("    return m;\n");
+        stringBuilder.append(makeString("    if (i >= ", numberOfRows, ") return m;\n"));
+        stringBuilder.append(makeString("    m[i] = v[0];\n"));
+        stringBuilder.append(makeString("    m[i + ", numberOfRows, "] = v[1];\n"));
+        if (numberOfColumns >= 3)
+            stringBuilder.append(makeString("    m[i + ", numberOfRows * 2, "] = v[2];\n"));
+        if (numberOfColumns >= 4)
+            stringBuilder.append(makeString("    m[i + ", numberOfRows * 3, "] = v[3];\n"));
+        stringBuilder.append("    return m;");
         stringBuilder.append("}\n");
         return stringBuilder.toString();
     }

diff --git a/Source/WebCore/Modules/webgpu/WHLSL/Metal/WHLSLNativeTypeWriter.cpp b/Source/WebCore/Modules/webgpu/WHLSL/Metal/WHLSLNativeTypeWriter.cpp
index ebfe61d..9be42c3 100644
--- a/Source/WebCore/Modules/webgpu/WHLSL/Metal/WHLSLNativeTypeWriter.cpp
+++ b/Source/WebCore/Modules/webgpu/WHLSL/Metal/WHLSLNativeTypeWriter.cpp

@@ -122,35 +122,19 @@
             ASSERT(parameterType.name() == "float");
             return "float";
         })();
+
         ASSERT(WTF::holds_alternative<AST::ConstantExpression>(nativeTypeDeclaration.typeArguments()[1]));
         auto& constantExpression1 = WTF::get<AST::ConstantExpression>(nativeTypeDeclaration.typeArguments()[1]);
         auto& integerLiteral1 = constantExpression1.integerLiteral();
-        auto middle = ([&]() -> String {
-            switch (integerLiteral1.value()) {
-            case 2:
-                return "2"_str;
-            case 3:
-                return "3"_str;
-            default:
-                ASSERT(integerLiteral1.value() == 4);
-                return "4"_str;
-            }
-        })();
+        unsigned rows = integerLiteral1.value();
+        ASSERT(rows == 2 || rows == 3 || rows == 4);
+
         ASSERT(WTF::holds_alternative<AST::ConstantExpression>(nativeTypeDeclaration.typeArguments()[2]));
         auto& constantExpression2 = WTF::get<AST::ConstantExpression>(nativeTypeDeclaration.typeArguments()[2]);
         auto& integerLiteral2 = constantExpression2.integerLiteral();
-        auto suffix = ([&]() -> String {
-            switch (integerLiteral2.value()) {
-            case 2:
-                return "2"_str;
-            case 3:
-                return "3"_str;
-            default:
-                ASSERT(integerLiteral2.value() == 4);
-                return "4"_str;
-            }
-        })();
-        return makeString(prefix, middle, 'x', suffix);
+        unsigned columns = integerLiteral2.value();
+        ASSERT(columns == 2 || columns == 3 || columns == 4);
+        return makeString("array<", prefix, ", ", columns * rows, ">");
     }
     ASSERT(nativeTypeDeclaration.typeArguments().size() == 1);
     ASSERT(WTF::holds_alternative<UniqueRef<AST::TypeReference>>(nativeTypeDeclaration.typeArguments()[0]));
commit	095943d286f615698d511d73ddf00eab247b2ab3	[log] [tgz]
author	sbarati@apple.com <sbarati@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>	Tue Jul 16 03:50:25 2019 +0000
committer	sbarati@apple.com <sbarati@apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>	Tue Jul 16 03:50:25 2019 +0000
tree	41a1f79a5b96a2ec469bc271b8e159bdcf703800
parent	c471ec24de521f338db464794f1edd9185ad38bf [diff]