[JSC] Extend MacroAssemblerARM64::load/store for datasize = 16
https://bugs.webkit.org/show_bug.cgi?id=204442
<rdar://problem/57366761>

Reviewed by Mark Lam.

Our `void load16(const void* address, RegisterID dest)` and `void store16(RegisterID src, const void* address)` are not aware of
the condition that passed register can be memoryTempRegister, while `MacroAssemblerARM64::{load,store}` handles it correctly, e.g.
`load` invalidates `cachedMemoryTempRegister` if destination register is memoryTempRegister. As a result, when we are emitting
`or16(TrustedImm32 imm, AbsoluteAddress address)` with address where the address's value does not fit in imm, the generated code
is reusing memoryTempRegister incorrectly.

    0xedf8d4fb4: mov    x17, #0x7af0
    0xedf8d4fb8: movk   x17, #0xd5a, lsl #16
    0xedf8d4fbc: movk   x17, #0x1, lsl #32        // Construct imm register on x17.
    0xedf8d4fc0: ldrh   w17, [x17]                // Load half word from x17 to w17 (we should invalidate x17 memoryTempRegister here).
    0xedf8d4fc4: mov    w16, #0x1b
    0xedf8d4fc8: orr    w16, w17, w16
    0xedf8d4fcc: strh   w16, [x17]                // x17 memoryTempRegister is reused while its content is invalid.

The problem is that `load` and `store` functions are not supporting datasize = 16 case. This patch extends `MacroAssemblerARM64::{load,store}`
to support 16 so that `or16` implementation looks is similar to `or32` etc.

* assembler/MacroAssemblerARM64.h:
(JSC::MacroAssemblerARM64::load16):
(JSC::MacroAssemblerARM64::store16):
(JSC::MacroAssemblerARM64::load):
(JSC::MacroAssemblerARM64::store):
* assembler/testmasm.cpp:
(JSC::testOrImmMem):

git-svn-id: http://svn.webkit.org/repository/webkit/trunk@252728 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/Source/JavaScriptCore/ChangeLog b/Source/JavaScriptCore/ChangeLog
index 0e429db..5c7763a 100644
--- a/Source/JavaScriptCore/ChangeLog
+++ b/Source/JavaScriptCore/ChangeLog
@@ -1,3 +1,36 @@
+2019-11-20  Yusuke Suzuki  <ysuzuki@apple.com>
+
+        [JSC] Extend MacroAssemblerARM64::load/store for datasize = 16
+        https://bugs.webkit.org/show_bug.cgi?id=204442
+        <rdar://problem/57366761>
+
+        Reviewed by Mark Lam.
+
+        Our `void load16(const void* address, RegisterID dest)` and `void store16(RegisterID src, const void* address)` are not aware of
+        the condition that passed register can be memoryTempRegister, while `MacroAssemblerARM64::{load,store}` handles it correctly, e.g.
+        `load` invalidates `cachedMemoryTempRegister` if destination register is memoryTempRegister. As a result, when we are emitting
+        `or16(TrustedImm32 imm, AbsoluteAddress address)` with address where the address's value does not fit in imm, the generated code
+        is reusing memoryTempRegister incorrectly.
+
+            0xedf8d4fb4: mov    x17, #0x7af0
+            0xedf8d4fb8: movk   x17, #0xd5a, lsl #16
+            0xedf8d4fbc: movk   x17, #0x1, lsl #32        // Construct imm register on x17.
+            0xedf8d4fc0: ldrh   w17, [x17]                // Load half word from x17 to w17 (we should invalidate x17 memoryTempRegister here).
+            0xedf8d4fc4: mov    w16, #0x1b
+            0xedf8d4fc8: orr    w16, w17, w16
+            0xedf8d4fcc: strh   w16, [x17]                // x17 memoryTempRegister is reused while its content is invalid.
+
+        The problem is that `load` and `store` functions are not supporting datasize = 16 case. This patch extends `MacroAssemblerARM64::{load,store}`
+        to support 16 so that `or16` implementation looks is similar to `or32` etc.
+
+        * assembler/MacroAssemblerARM64.h:
+        (JSC::MacroAssemblerARM64::load16):
+        (JSC::MacroAssemblerARM64::store16):
+        (JSC::MacroAssemblerARM64::load):
+        (JSC::MacroAssemblerARM64::store):
+        * assembler/testmasm.cpp:
+        (JSC::testOrImmMem):
+
 2019-11-20  Saam Barati  <sbarati@apple.com>
 
         Baseline JIT should fill in StructureStubInfo's propertyIsInt32 and the slow path should update the array profile more frequently
diff --git a/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h b/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h
index dbff2d7..3fe354e 100644
--- a/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h
+++ b/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h
@@ -1254,8 +1254,7 @@
 
     void load16(const void* address, RegisterID dest)
     {
-        moveToCachedReg(TrustedImmPtr(address), cachedMemoryTempRegister());
-        m_assembler.ldrh(dest, memoryTempRegister, 0);
+        load<16>(address, dest);
     }
 
     void load16Unaligned(ImplicitAddress address, RegisterID dest)
@@ -1568,8 +1567,7 @@
 
     void store16(RegisterID src, const void* address)
     {
-        moveToCachedReg(TrustedImmPtr(address), cachedMemoryTempRegister());
-        m_assembler.strh(src, memoryTempRegister, 0);
+        store<16>(src, address);
     }
 
     void store16(TrustedImm32 imm, const void* address)
@@ -4220,12 +4218,12 @@
 
             if (isInt<32>(addressDelta)) {
                 if (Assembler::canEncodeSImmOffset(addressDelta)) {
-                    m_assembler.ldur<datasize>(dest,  memoryTempRegister, addressDelta);
+                    loadUnscaledImmediate<datasize>(dest, memoryTempRegister, addressDelta);
                     return;
                 }
 
                 if (Assembler::canEncodePImmOffset<datasize>(addressDelta)) {
-                    m_assembler.ldr<datasize>(dest,  memoryTempRegister, addressDelta);
+                    loadUnsignedImmediate<datasize>(dest, memoryTempRegister, addressDelta);
                     return;
                 }
             }
@@ -4233,7 +4231,10 @@
             if ((addressAsInt & (~maskHalfWord0)) == (currentRegisterContents & (~maskHalfWord0))) {
                 m_assembler.movk<64>(memoryTempRegister, addressAsInt & maskHalfWord0, 0);
                 cachedMemoryTempRegister().setValue(reinterpret_cast<intptr_t>(address));
-                m_assembler.ldr<datasize>(dest, memoryTempRegister, ARM64Registers::zr);
+                if constexpr (datasize == 16)
+                    m_assembler.ldrh(dest, memoryTempRegister, ARM64Registers::zr);
+                else
+                    m_assembler.ldr<datasize>(dest, memoryTempRegister, ARM64Registers::zr);
                 return;
             }
         }
@@ -4243,7 +4244,10 @@
             cachedMemoryTempRegister().invalidate();
         else
             cachedMemoryTempRegister().setValue(reinterpret_cast<intptr_t>(address));
-        m_assembler.ldr<datasize>(dest, memoryTempRegister, ARM64Registers::zr);
+        if constexpr (datasize == 16)
+            m_assembler.ldrh(dest, memoryTempRegister, ARM64Registers::zr);
+        else
+            m_assembler.ldr<datasize>(dest, memoryTempRegister, ARM64Registers::zr);
     }
 
     template<int datasize>
@@ -4257,12 +4261,12 @@
 
             if (isInt<32>(addressDelta)) {
                 if (Assembler::canEncodeSImmOffset(addressDelta)) {
-                    m_assembler.stur<datasize>(src, memoryTempRegister, addressDelta);
+                    storeUnscaledImmediate<datasize>(src, memoryTempRegister, addressDelta);
                     return;
                 }
 
                 if (Assembler::canEncodePImmOffset<datasize>(addressDelta)) {
-                    m_assembler.str<datasize>(src, memoryTempRegister, addressDelta);
+                    storeUnsignedImmediate<datasize>(src, memoryTempRegister, addressDelta);
                     return;
                 }
             }
@@ -4270,14 +4274,20 @@
             if ((addressAsInt & (~maskHalfWord0)) == (currentRegisterContents & (~maskHalfWord0))) {
                 m_assembler.movk<64>(memoryTempRegister, addressAsInt & maskHalfWord0, 0);
                 cachedMemoryTempRegister().setValue(reinterpret_cast<intptr_t>(address));
-                m_assembler.str<datasize>(src, memoryTempRegister, ARM64Registers::zr);
+                if constexpr (datasize == 16)
+                    m_assembler.strh(src, memoryTempRegister, ARM64Registers::zr);
+                else
+                    m_assembler.str<datasize>(src, memoryTempRegister, ARM64Registers::zr);
                 return;
             }
         }
 
         move(TrustedImmPtr(address), memoryTempRegister);
         cachedMemoryTempRegister().setValue(reinterpret_cast<intptr_t>(address));
-        m_assembler.str<datasize>(src, memoryTempRegister, ARM64Registers::zr);
+        if constexpr (datasize == 16)
+            m_assembler.strh(src, memoryTempRegister, ARM64Registers::zr);
+        else
+            m_assembler.str<datasize>(src, memoryTempRegister, ARM64Registers::zr);
     }
 
     template <int dataSize>
diff --git a/Source/JavaScriptCore/assembler/testmasm.cpp b/Source/JavaScriptCore/assembler/testmasm.cpp
index 59bbc27..f11332f 100644
--- a/Source/JavaScriptCore/assembler/testmasm.cpp
+++ b/Source/JavaScriptCore/assembler/testmasm.cpp
@@ -1127,6 +1127,16 @@
     });
     invoke<void>(or16);
     CHECK_EQ(memoryLocation, 0x12341234 | 42);
+
+    memoryLocation = 0x12341234;
+    auto or16InvalidLogicalImmInARM64 = compile([&] (CCallHelpers& jit) {
+        emitFunctionPrologue(jit);
+        jit.or16(CCallHelpers::TrustedImm32(0), CCallHelpers::AbsoluteAddress(&memoryLocation));
+        emitFunctionEpilogue(jit);
+        jit.ret();
+    });
+    invoke<void>(or16InvalidLogicalImmInARM64);
+    CHECK_EQ(memoryLocation, 0x12341234);
 }
 
 void testByteSwap()