blob: 5df19436895ccb45a125512ee5ec922a9f53d215 [file] [log] [blame]
// Copyright (C) Microsoft Corporation and contributors. All rights reserved.
// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
function asmModule(stdlib, imports, buffer) {
"use asm";
var i4 = stdlib.SIMD.Int32x4;
var i4check = i4.check;
var i4splat = i4.splat;
var i4fromFloat32x4 = i4.fromFloat32x4;
var i4fromFloat32x4Bits = i4.fromFloat32x4Bits;
//var i4abs = i4.abs;
var i4neg = i4.neg;
var i4add = i4.add;
var i4sub = i4.sub;
var i4mul = i4.mul;
var i4swizzle = i4.swizzle;
var i4shuffle = i4.shuffle;
var i4lessThan = i4.lessThan;
var i4equal = i4.equal;
var i4greaterThan = i4.greaterThan;
var i4select =;
var i4and = i4.and;
var i4or = i4.or;
var i4xor = i4.xor;
var i4not = i4.not;
var i4load = i4.load;
var i4load1 = i4.load1;
var i4load2 = i4.load2;
var i4load3 = i4.load3;
var i4store =
var i4store1 = i4.store1;
var i4store2 = i4.store2;
var i4store3 = i4.store3;
//var i4shiftLeftByScalar = i4.shiftLeftByScalar;
//var i4shiftRightByScalar = i4.shiftRightByScalar;
//var i4shiftRightArithmeticByScalar = i4.shiftRightArithmeticByScalar;
var f4 = stdlib.SIMD.Float32x4;
var f4check = f4.check;
var f4splat = f4.splat;
var f4fromInt32x4 = f4.fromInt32x4;
var f4fromInt32x4Bits = f4.fromInt32x4Bits;
var f4abs = f4.abs;
var f4neg = f4.neg;
var f4add = f4.add;
var f4sub = f4.sub;
var f4mul = f4.mul;
var f4div = f4.div;
var f4min = f4.min;
var f4max = f4.max;
var f4sqrt = f4.sqrt;
var f4swizzle = f4.swizzle;
var f4shuffle = f4.shuffle;
var f4lessThan = f4.lessThan;
var f4lessThanOrEqual = f4.lessThanOrEqual;
var f4equal = f4.equal;
var f4notEqual = f4.notEqual;
var f4greaterThan = f4.greaterThan;
var f4greaterThanOrEqual = f4.greaterThanOrEqual;
var f4select =;
var f4load = f4.load;
var f4load1 = f4.load1;
var f4load2 = f4.load2;
var f4load3 = f4.load3;
var f4store =;
var f4store1 = f4.store1;
var f4store2 = f4.store2;
var f4store3 = f4.store3;
var fround = stdlib.Math.fround;
var globImportF4 = f4check(imports.g1); // global var import
var globImportI4 = i4check(imports.g2); // global var import
var g1 = f4(-5033.2,-3401.0,665.34,32234.1); // global var initialized
var g2 = i4(1065353216, -1073741824, -1077936128, 1082130432); // global var initialized
var gval = 1234;
var gval2 = 1234.0;
var loopCOUNT = 3;
var Int8Heap = new stdlib.Int8Array (buffer);
var Uint8Heap = new stdlib.Uint8Array (buffer);
var Int16Heap = new stdlib.Int16Array(buffer);
var Uint16Heap = new stdlib.Uint16Array(buffer);
var Int32Heap = new stdlib.Int32Array(buffer);
var Uint32Heap = new stdlib.Uint32Array(buffer);
var Float32Heap = new stdlib.Float32Array(buffer);
function storeF32(value, idx)
value= i4check(value);
idx = idx|0;
idx = idx<<2;
i4store(Float32Heap, (idx>>2), value);
function loadF32(idx)
idx = idx|0;
idx = idx<<2;
return i4load(Float32Heap, (idx>>2));
function storeUI32(value, idx)
{ value= i4check(value); idx = idx|0; idx = idx<<2;
i4store(Uint32Heap, (idx>>2), value);}
function loadUI32(idx)
{ idx = idx|0; idx = idx<<2; return i4load(Uint32Heap, (idx>>2)); }
function storeI32(value, idx)
{ value= i4check(value); idx = idx|0; idx = idx<<2;
i4store(Int32Heap, (idx>>2), value);}
function loadI32(idx)
{ idx = idx|0; idx = idx<<2; return i4load(Int32Heap, (idx>>2)); }
function storeI16(value, idx)
{ value= i4check(value); idx = idx|0; idx = idx<<1;
i4store(Int16Heap, (idx>>1), value);}
function loadI16(idx)
{ idx = idx|0; idx = idx<<1; return i4load(Int16Heap, (idx>>1)); }
function storeUI16(value, idx)
{ value= i4check(value); idx = idx|0; idx = idx<<1;
i4store(Uint16Heap, (idx>>1), value);}
function loadUI16(idx)
{ idx = idx|0; idx = idx<<1; return i4load(Uint16Heap, (idx>>1)); }
function storeI8(value, idx)
{ value= i4check(value); idx = idx|0; idx = idx<<0;
i4store(Int8Heap, (idx>>0), value);}
function loadI8(idx)
{ idx = idx|0; idx = idx<<0; return i4load(Int8Heap, (idx>>0)); }
function storeUI8(value, idx)
{ value= i4check(value); idx = idx|0; idx = idx<<0;
i4store(Uint8Heap, (idx>>0), value);}
function loadUI8(idx)
{ idx = idx|0; idx = idx<<0; return i4load(Uint8Heap, (idx>>0)); }
function loadStoreIndex1()
i4store(Float32Heap, 0, i4(-1,-2,3,-4));
return i4load(Float32Heap, 0);
function store_1(functionPicker) //Function picker to pick store1/store2/store3/store
functionPicker = functionPicker|0;
var v0 = i4(0,0,0,0);
var loopIndex = 0, idx = 0, end = 256;
while((loopIndex|0) < (loopCOUNT|0))
idx = 0;
v0 = i4(5, -12, 3, 0);
for(idx = idx << 2; (idx|0) < (end|0 << 2); idx = (idx + 16)|0)
case 5:
i4store(Float32Heap, idx>>2, v0);
case 6:
i4store1(Float32Heap, idx>>2, v0);
case 7:
i4store2(Float32Heap, idx>>2, v0);
case 8:
i4store3(Float32Heap, idx>>2, v0);
v0 = i4add(v0, i4(1,1,1,1));
loopIndex = (loopIndex + 1)|0;
return i4load(Float32Heap, 0);
function store_2(functionPicker)
functionPicker = functionPicker|0;
var v0 = i4(0,0,0,0);
var loopIndex = 0, idx = 0, end = 256;
for (loopIndex = 0; (loopIndex | 0) < (loopCOUNT | 0) ; loopIndex = (loopIndex + 1) | 0)
idx = 0;
v0 = i4(0,0,0,0);
for(idx = idx << 2; (idx|0) < (end|0 << 2); idx = (idx + 16)|0)
case 5:
i4store(Float32Heap, idx>>2, v0);
case 6:
i4store1(Float32Heap, idx>>2, v0);
case 7:
i4store2(Float32Heap, idx>>2, v0);
case 8:
i4store3(Float32Heap, idx>>2, v0);
v0 = i4add(v0, i4(1, 1, 1, 1));
return i4load(Float32Heap, 8);
function store_3(functionPicker)
functionPicker = functionPicker|0;
var v0 = i4(0,0,0,0);
var loopIndex = 0, idx = 0, end = 256;
loopIndex = loopCOUNT | 0;
do {
idx = 0;
v0 = i4(0,0,0,0);
for(idx = idx << 2; (idx|0) < (end|0 << 2); idx = (idx + 16)|0)
case 5:
i4store(Float32Heap, idx>>2, v0);
case 6:
i4store1(Float32Heap, idx>>2, v0);
case 7:
i4store2(Float32Heap, idx>>2, v0);
case 8:
i4store3(Float32Heap, idx>>2, v0);
v0 = i4add(v0, i4(1, 1, 1, 1));
loopIndex = (loopIndex - 1) | 0;
while ( (loopIndex | 0) > 0);
return i4load(Float32Heap, 8);
function store_1_Int8(length)
length = length|0;
var v0 = i4(0,0,0,0);
var loopIndex = 0, idx = 0, end = 0;
end = (length * 4)|0;
while((loopIndex|0) < (loopCOUNT|0))
idx = 0;
v0 = i4(0,0,0,0);
for(idx = idx << 0; (idx|0) < (end|0 << 0); idx = (idx + 16)|0)
i4store(Int8Heap, idx>>0, v0);
v0 = i4add(v0, i4(1, 1, 1, 1));
loopIndex = (loopIndex + 1)|0;
//Expects the heap to be: 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3...15,15,15,15,0,0,0,0...
return i4load(Float32Heap, 2);
function store_1_Uint8(length)
length = length|0;
var v0 = i4(0,0,0,0);
var loopIndex = 0, idx = 0, end = 0;
end = (length * 4)|0;
while((loopIndex|0) < (loopCOUNT|0))
idx = 0;
v0 = i4(0,0,0,0);
for(idx = idx << 0; (idx|0) < (end|0 << 0); idx = (idx + 16)|0)
i4store(Uint8Heap, idx>>0, v0);
v0 = i4add(v0, i4(1, 1, 1, 1));
loopIndex = (loopIndex + 1)|0;
//Expects the heap to be: 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3...15,15,15,15,0,0,0,0...
return i4load(Float32Heap, 2);
function store_1_Int16(length)
length = length|0;
var v0 = i4(0,0,0,0);
var loopIndex = 0, idx = 0, end = 0;
end = (length * 4)|0;
while((loopIndex|0) < (loopCOUNT|0))
idx = 0;
v0 = i4(0,0,0,0);
for(idx = idx << 1; (idx|0) < (end|0 << 1); idx = (idx + 16)|0)
i4store(Int16Heap, idx>>1, v0);
v0 = i4add(v0, i4(1, 1, 1, 1));
loopIndex = (loopIndex + 1)|0;
//Expects the heap to be: 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3...15,15,15,15,0,0,0,0...
return i4load(Float32Heap, 2);
function store_1_Uint16(length)
length = length|0;
var v0 = i4(0,0,0,0);
var loopIndex = 0, idx = 0, end = 0;
end = (length * 4)|0;
while((loopIndex|0) < (loopCOUNT|0))
idx = 0;
v0 = i4(0,0,0,0);
for(idx = idx << 1; (idx|0) < (end|0 << 1); idx = (idx + 16)|0)
i4store(Uint16Heap, idx>>1, v0);
v0 = i4add(v0, i4(1, 1, 1, 1));
loopIndex = (loopIndex + 1)|0;
//Expects the heap to be: 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3...15,15,15,15,0,0,0,0...
return i4load(Float32Heap, 2);
function store_1_Int32(length)
length = length|0;
var v0 = i4(0,0,0,0);
var loopIndex = 0, idx = 0, end = 0;
end = (length * 4)|0;
while((loopIndex|0) < (loopCOUNT|0))
idx = 0;
v0 = i4(0,0,0,0);
for(idx = idx << 2; (idx|0) < (end|0 << 2); idx = (idx + 16)|0)
i4store(Int32Heap, idx>>2, v0);
v0 = i4add(v0, i4(1, 1, 1, 1));
loopIndex = (loopIndex + 1)|0;
//Expects the heap to be: 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3...15,15,15,15,0,0,0,0...
return i4load(Float32Heap, 2);
function store_1_Uint32(length)
length = length|0;
var v0 = i4(0,0,0,0);
var loopIndex = 0, idx = 0, end = 0;
end = (length * 4)|0;
while((loopIndex|0) < (loopCOUNT|0))
idx = 0;
v0 = i4(0,0,0,0);
for(idx = idx << 2; (idx|0) < (end|0 << 2); idx = (idx + 16)|0)
i4store(Uint32Heap, idx>>2, v0);
v0 = i4add(v0, i4(1, 1, 1, 1));
loopIndex = (loopIndex + 1)|0;
//Expects the heap to be: 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3...15,15,15,15,0,0,0,0...
return i4load(Float32Heap, 2);
function load_1(functionPicker)
//length = length|0;
functionPicker = functionPicker|0;
var idx=0,end=16;//(length-4)|0;;
var loopIndex = 0;
var v = i4(0,0,0,0);
while ( (loopIndex|0) < (loopCOUNT|0)) {
for(idx = idx << 2; (idx|0) <= (end << 2); idx = (idx + 1)|0)
case 1:
v = i4load(Float32Heap, idx>>2);
case 2:
v = i4load1(Float32Heap, idx>>2);
case 3:
v = i4load2(Float32Heap, idx>>2);
case 4:
v = i4load3(Float32Heap, idx>>2);
loopIndex = (loopIndex + 1) | 0;
return v;
function load_2(functionPicker)
//length = length|0;
functionPicker = functionPicker|0;
var idx=0,end=16;//(length-4)|0;;
var loopIndex = 0;
var v = i4(0,0,0,0);
for (loopIndex = 0; (loopIndex | 0) < (loopCOUNT | 0) ; loopIndex = (loopIndex + 1) | 0)
for(idx = idx << 2; (idx|0) <= (end << 2); idx = (idx + 1)|0)
case 1:
v = i4load(Float32Heap, idx>>2);
case 2:
v = i4load1(Float32Heap, idx>>2);
case 3:
v = i4load2(Float32Heap, idx>>2);
case 4:
v = i4load3(Float32Heap, idx>>2);
return v;
function load_3(functionPicker)
//length = length|0;
functionPicker = functionPicker|0;
var idx=0,end=16;//(length-4)|0;;
var loopIndex = 0;
var v = i4(0,0,0,0);
loopIndex = loopCOUNT | 0;
do {
idx = 0;
for(idx = idx << 2; (idx|0) <= (end << 2); idx = (idx + 1)|0)
case 1:
v = i4load(Float32Heap, idx>>2);
case 2:
v = i4load1(Float32Heap, idx>>2);
case 3:
v = i4load2(Float32Heap, idx>>2);
case 4:
v = i4load3(Float32Heap, idx>>2);
loopIndex = (loopIndex - 1) | 0;
while ( (loopIndex | 0) > 0);
return v;
function load_1_Int8(length)
length = length|0;
var idx=0,end=0;
var loopIndex = 0;
var v = i4(0,0,0,0);
end = (((length * 4)|0) - 16)|0;
while ( (loopIndex|0) < (loopCOUNT|0)) {
for(idx = idx << 0; (idx|0) <= (end << 0); idx = (idx + 1)|0)
v = i4load(Int8Heap, idx>>0);
loopIndex = (loopIndex + 1) | 0;
return v;
function load_1_Uint8(length)
length = length|0;
var idx=0,end=0;
var loopIndex = 0;
var v = i4(0,0,0,0);
end = (((length * 4)|0) - 16)|0;
while ( (loopIndex|0) < (loopCOUNT|0)) {
for(idx = idx << 0; (idx|0) <= (end << 0); idx = (idx + 1)|0)
v = i4load(Uint8Heap, idx>>0);
loopIndex = (loopIndex + 1) | 0;
return v;
function load_1_Int16(length)
length = length|0;
var idx=0,end=0;
var loopIndex = 0;
var v = i4(0,0,0,0);
end = (((length * 2)|0) - 8)|0;
while ( (loopIndex|0) < (loopCOUNT|0)) {
for(idx = idx << 1; (idx|0) <= (end << 1); idx = (idx + 1)|0)
v = i4load(Int16Heap, idx>>1);
loopIndex = (loopIndex + 1) | 0;
return v;
function load_1_Uint16(length)
length = length|0;
var idx=0,end=120;
var loopIndex = 0;
var v = i4(0,0,0,0);
end = (((length * 2)|0) - 8)|0;
while ( (loopIndex|0) < (loopCOUNT|0)) {
for(idx = idx << 1; (idx|0) <= (end << 1); idx = (idx + 1)|0)
v = i4load(Uint16Heap, idx>>1);
loopIndex = (loopIndex + 1) | 0;
return v;
function load_1_Int32(length)
length = length|0;
var idx=0,end=60;
var loopIndex = 0;
var v = i4(0,0,0,0);
end = (((length * 1)|0) - 4)|0;
while ( (loopIndex|0) < (loopCOUNT|0)) {
for(idx = idx << 2; (idx|0) <= (end << 2); idx = (idx + 1)|0)
v = i4load(Int32Heap, idx>>2);
loopIndex = (loopIndex + 1) | 0;
return v;
function load_1_Uint32(length)
length = length|0;
var idx=0,end=60;
var loopIndex = 0;
var v = i4(0,0,0,0);
end = (((length * 1)|0) - 4)|0;
while ( (loopIndex|0) < (loopCOUNT|0)) {
for(idx = idx << 2; (idx|0) <= (end << 2); idx = (idx + 1)|0)
v = i4load(Uint32Heap, idx>>2);
loopIndex = (loopIndex + 1) | 0;
return v;
return {store1:store_1
var buffer = new ArrayBuffer(0x10000); //16mb min 2^12
//Reset or flush the buffer
function initF32(buffer) {
var values = new Float32Array( buffer );
for( var i=0; i < values.length ; ++i ) {
values[i] = i * 10;
return values.length;
function printBuffer(buffer, count)
var i4;
for (var i = 0; i < count/* * 16*/; i += 16)
i4 = SIMD.Int32x4.load(buffer, i);
function printResults(res)
inputLength = initF32(buffer);
//Enumerating SIMD loads to test.
SIMDLoad = 1;
SIMDLoad1 = 2;
SIMDLoad2 = 3;
SIMDLoad3 = 4;
SIMDStore = 5;
SIMDStore1 = 6;
SIMDStore2 = 7;
SIMDStore3 = 8;
//Module initialization
var m = asmModule(this, {g0:initF32(buffer),g1:SIMD.Float32x4(9,9,9,9), g2:SIMD.Int32x4(1, 2, 3, 4)}, buffer);
var values = new Float32Array(buffer);
var ret = m.store1(SIMDStore1);//Lane1 store
equalSimd([5, 1092616192, 1101004800, 1106247680], ret, SIMD.Int32x4, "");
inputLength = initF32(buffer);
var ret = m.store1(SIMDStore2);//Lane 1,2 store
equalSimd([5, -12, 1101004800, 1106247680], ret, SIMD.Int32x4, "");
inputLength = initF32(buffer);
var ret = m.store1(SIMDStore3);//Lane 1,2,3 store
equalSimd([5, -12, 3, 1106247680], ret, SIMD.Int32x4, "");
inputLength = initF32(buffer);
//Should change the buffer to 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3...15,15,15,15,0,0,0,0...
var ret = m.store1(SIMDStore);//Generic Store
equalSimd([5, -12, 3, 0], ret, SIMD.Int32x4, "");
inputLength = initF32(buffer);
var ret = m.store2(SIMDStore);//Generic store
equalSimd([2, 2, 2, 2], ret, SIMD.Int32x4, "");
inputLength = initF32(buffer);
var ret = m.store3(SIMDStore);//Generic store
equalSimd([2, 2, 2, 2], ret, SIMD.Int32x4, "");
inputLength = initF32(buffer);
var ret = m.store1Int8(inputLength);//Int8Heap store
equalSimd([0, 0, 1, 1], ret, SIMD.Int32x4, "");
inputLength = initF32(buffer);
var ret = m.store1Uint8(inputLength);//Uint8Heap store
equalSimd([0, 0, 1, 1], ret, SIMD.Int32x4, "");
inputLength = initF32(buffer);
var ret = m.store1Int16(inputLength);//Int16Heap store
equalSimd([0, 0, 1, 1], ret, SIMD.Int32x4, "");
inputLength = initF32(buffer);
var ret = m.store1Uint16(inputLength);//Uint16Heap store
equalSimd([0, 0, 1, 1], ret, SIMD.Int32x4, "");
inputLength = initF32(buffer);
var ret = m.store1Int32(inputLength);//Int32Heap store
equalSimd([0, 0, 1, 1], ret, SIMD.Int32x4, "");
inputLength = initF32(buffer);
var ret = m.store1Uint32(inputLength);//Uint32Heap store
equalSimd([0, 0, 1, 1], ret, SIMD.Int32x4, "");
inputLength = initF32(buffer);
var ret = m.loadStoreIndex1();//Uint32Heap store
equalSimd([-1, -2, 3, -4], ret, SIMD.Int32x4, "");
var ret = m.load1(SIMDLoad1);
equalSimd([1126170624, 0, 0, 0], ret, SIMD.Int32x4, "");
var ret = m.load1(SIMDLoad2);
equalSimd([1126170624, 1126825984, 0, 0], ret, SIMD.Int32x4, "");
var ret = m.load1(SIMDLoad3);
equalSimd([1126170624, 1126825984, 1127481344, 0], ret, SIMD.Int32x4, "");
var ret = m.load1(SIMDLoad);
equalSimd([1126170624, 1126825984, 1127481344, 1128136704], ret, SIMD.Int32x4, "");
var ret = m.load2(SIMDLoad);
equalSimd([1126170624, 1126825984, 1127481344, 1128136704], ret, SIMD.Int32x4, "");
var ret = m.load3(SIMDLoad);
equalSimd([1126170624, 1126825984, 1127481344, 1128136704], ret, SIMD.Int32x4, "");
var ret = m.load1Int8(inputLength); //Int8Heap load
equalSimd([1210054144, 1210054784, 1210055424, 1210056064], ret, SIMD.Int32x4, "");
var ret = m.load1Uint8(inputLength); //Int8Heap load
equalSimd([1210054144, 1210054784, 1210055424, 1210056064], ret, SIMD.Int32x4, "");
var ret = m.load1Int16(inputLength); //Int16Heap load
equalSimd([1210054144, 1210054784, 1210055424, 1210056064], ret, SIMD.Int32x4, "");
var ret = m.load1Uint16(inputLength); //Int16Heap load
equalSimd([1210054144, 1210054784, 1210055424, 1210056064], ret, SIMD.Int32x4, "");
var ret = m.load1Int32(inputLength); //Int32Heap load
equalSimd([1210054144, 1210054784, 1210055424, 1210056064], ret, SIMD.Int32x4, "");
var ret = m.load1Uint32(inputLength); //Int32Heap load
equalSimd([1210054144, 1210054784, 1210055424, 1210056064], ret, SIMD.Int32x4, "");
var value = SIMD.Int32x4(9,1,3,5);
try {m.storeF32(value, inputLength); print("Wrong");} catch(err) {}
try {m.loadF32(inputLength); print("Wrong");} catch(err) {}
try {m.storeF32(value, inputLength-1); print("Wrong");} catch(err) {}
try {m.loadF32(inputLength-1); print("Wrong");} catch(err) {}
try {m.storeF32(value, inputLength-4);} catch(err) {print("Wrong");}
try {var v = m.loadF32(inputLength-4);} catch(err) {print("Wrong");}
try {m.storeUI32(value, inputLength+1);print("Wrong");} catch(err) {}
try { m.loadUI32(inputLength+1); print("Wrong"); } catch(err) { }
try {m.storeI32(value, inputLength+1); print("Wrong");} catch(err) {}
try {m.loadI32(inputLength+1);print("Wrong");} catch(err) {}
m.storeI16(value, inputLength*2-8);
m.storeUI16(value, inputLength*2-8);
m.storeI8(value, inputLength*4-16);
m.storeUI8(value, inputLength*4-16);
} catch(err){ print("Wrong"); }
try {m.storeUI16(value, inputLength*2);print("Wrong");} catch(err) {}
try {m.loadUI16(inputLength*2-7); print("Wrong");} catch(err) {}
try {m.storeI16(value, inputLength*2-7); print("Wrong");} catch(err) {}
try {m.loadI16(inputLength*2-7); print("Wrong");} catch(err) {}
try {m.storeUI8(value, inputLength*4-15); print("Wrong");} catch(err) {}
try {m.loadUI8(inputLength*4-15); print("Wrong");} catch(err) {}
try {m.storeI8(value, inputLength*4-15); print("Wrong");} catch(err) {}
try {m.loadI8(inputLength*4+15); print("Wrong");} catch(err) {}