Update RegExp and related classes to use 8 bit strings when available
https://bugs.webkit.org/show_bug.cgi?id=67337

Source/JavaScriptCore: 

Modified both the Yarr interpreter and JIT to handle 8 bit subject strings.
The code paths are triggered by the UString::is8bit() method which currently
returns false.  Implemented JIT changes for all current architectures.
Tested X86_64 and ARM v7.

This includes some code that will likely change as we complete the
8 bit string changes.  This includes the way the raw buffer pointers
are accessed as well as replacing the CharAccess class with a
string interator returned from UString.

Fixed build breakage in testRegExp.cpp due to globalObject construction
changes.

Reviewed by Gavin Barraclough.

* JavaScriptCore.exp:
* JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.def:
* testRegExp.cpp:
(GlobalObject::finishCreation):
(GlobalObject::GlobalObject):
* assembler/ARMAssembler.cpp:
(JSC::ARMAssembler::baseIndexTransfer32):
* assembler/ARMAssembler.h:
* assembler/ARMv7Assembler.h:
(JSC::ARMv7Assembler::ubfx):
(JSC::ARMv7Assembler::ARMInstructionFormatter::twoWordOp12Reg40Imm3Reg4Imm20Imm5):
* assembler/MacroAssemblerARM.h:
(JSC::MacroAssemblerARM::load8):
(JSC::MacroAssemblerARM::branch8):
(JSC::MacroAssemblerARM::branch16):
* assembler/MacroAssemblerARMv7.h:
(JSC::MacroAssemblerARMv7::load8):
(JSC::MacroAssemblerARMv7::branch16):
(JSC::MacroAssemblerARMv7::branch8):
* assembler/MacroAssemblerMIPS.h:
(JSC::MacroAssemblerMIPS::load8):
(JSC::MacroAssemblerMIPS::branch8):
(JSC::MacroAssemblerMIPS::branch16):
* assembler/MacroAssemblerSH4.h:
(JSC::MacroAssemblerSH4::load8):
(JSC::MacroAssemblerSH4::branch8):
(JSC::MacroAssemblerSH4::branch16):
* assembler/MacroAssemblerX86Common.h:
(JSC::MacroAssemblerX86Common::load8):
(JSC::MacroAssemblerX86Common::branch16):
(JSC::MacroAssemblerX86Common::branch8):
* assembler/SH4Assembler.h:
(JSC::SH4Assembler::extub):
(JSC::SH4Assembler::printInstr):
* assembler/X86Assembler.h:
(JSC::X86Assembler::cmpw_ir):
(JSC::X86Assembler::movzbl_mr):
* runtime/RegExp.cpp:
(JSC::RegExp::compile):
(JSC::RegExp::compileIfNecessary):
(JSC::RegExp::match):
(JSC::RegExp::matchCompareWithInterpreter):
* runtime/RegExp.h:
* runtime/UString.h:
(JSC::UString::is8Bit):
* yarr/Yarr.h:
* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::CharAccess::CharAccess):
(JSC::Yarr::Interpreter::CharAccess::~CharAccess):
(JSC::Yarr::Interpreter::CharAccess::operator[]):
(JSC::Yarr::Interpreter::InputStream::InputStream):
(JSC::Yarr::Interpreter::Interpreter):
(JSC::Yarr::interpret):
* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::jumpIfCharNotEquals):
(JSC::Yarr::YarrGenerator::readCharacter):
(JSC::Yarr::YarrGenerator::generatePatternCharacterOnce):
(JSC::Yarr::YarrGenerator::generatePatternCharacterFixed):
(JSC::Yarr::YarrGenerator::generatePatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::backtrackPatternCharacterNonGreedy):
(JSC::Yarr::YarrGenerator::generateCharacterClassFixed):
(JSC::Yarr::YarrGenerator::generateDotStarEnclosure):
(JSC::Yarr::YarrGenerator::YarrGenerator):
(JSC::Yarr::YarrGenerator::compile):
(JSC::Yarr::jitCompile):
(JSC::Yarr::execute):
* yarr/YarrJIT.h:
(JSC::Yarr::YarrCodeBlock::has8BitCode):
(JSC::Yarr::YarrCodeBlock::has16BitCode):
(JSC::Yarr::YarrCodeBlock::set8BitCode):
(JSC::Yarr::YarrCodeBlock::set16BitCode):
(JSC::Yarr::YarrCodeBlock::execute):
* yarr/YarrParser.h:
(JSC::Yarr::Parser::Parser):

Source/WebCore: 

Updated call to match to use UString& instead of UChar*.

Reviewed by Gavin Barraclough.

No new tests, Covered by existing tests.

* platform/text/RegularExpression.cpp:
(WebCore::RegularExpression::match):


git-svn-id: http://svn.webkit.org/repository/webkit/trunk@94981 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/Source/JavaScriptCore/runtime/RegExp.cpp b/Source/JavaScriptCore/runtime/RegExp.cpp
index 09831a4..0c87b15 100644
--- a/Source/JavaScriptCore/runtime/RegExp.cpp
+++ b/Source/JavaScriptCore/runtime/RegExp.cpp
@@ -267,26 +267,26 @@
     return globalData.regExpCache()->lookupOrCreate(patternString, flags);
 }
 
-void RegExp::compile(JSGlobalData* globalData)
+void RegExp::compile(JSGlobalData* globalData, Yarr::YarrCharSize charSize)
 {
-    ASSERT(m_state == NotCompiled);
-    m_representation = adoptPtr(new RegExpRepresentation);
     Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
     if (m_constructionError) {
         ASSERT_NOT_REACHED();
         m_state = ParseError;
         return;
     }
-
-    globalData->regExpCache()->addToStrongCache(this);
-
     ASSERT(m_numSubpatterns == pattern.m_numSubpatterns);
 
-    m_state = ByteCode;
+    if (!m_representation) {
+        ASSERT(m_state == NotCompiled);
+        m_representation = adoptPtr(new RegExpRepresentation);
+        globalData->regExpCache()->addToStrongCache(this);
+        m_state = ByteCode;
+    }
 
 #if ENABLE(YARR_JIT)
     if (!pattern.m_containsBackreferences && globalData->canUseJIT()) {
-        Yarr::jitCompile(pattern, globalData, m_representation->m_regExpJITCode);
+        Yarr::jitCompile(pattern, charSize, globalData, m_representation->m_regExpJITCode);
 #if ENABLE(YARR_JIT_DEBUG)
         if (!m_representation->m_regExpJITCode.isFallBack())
             m_state = JITCode;
@@ -299,11 +299,36 @@
         }
 #endif
     }
+#else
+    UNUSED_PARAM(charSize);
 #endif
 
     m_representation->m_regExpBytecode = Yarr::byteCompile(pattern, &globalData->m_regExpAllocator);
 }
 
+void RegExp::compileIfNecessary(JSGlobalData& globalData, Yarr::YarrCharSize charSize)
+{
+    // If the state is NotCompiled or ParseError, then there is no representation.
+    // If there is a representation, and the state must be either JITCode or ByteCode.
+    ASSERT(!!m_representation == (m_state == JITCode || m_state == ByteCode));
+    
+    if (m_representation) {
+#if ENABLE(YARR_JIT)
+        if (m_state != JITCode)
+            return;
+        if ((charSize == Yarr::Char8) && (m_representation->m_regExpJITCode.has8BitCode()))
+            return;
+        if ((charSize == Yarr::Char16) && (m_representation->m_regExpJITCode.has16BitCode()))
+            return;
+#else
+        return;
+#endif
+    }
+
+    compile(&globalData, charSize);
+}
+
+
 int RegExp::match(JSGlobalData& globalData, const UString& s, int startOffset, Vector<int, 32>* ovector)
 {
     if (startOffset < 0)
@@ -317,7 +342,7 @@
         return -1;
 
     if (m_state != ParseError) {
-        compileIfNecessary(globalData);
+        compileIfNecessary(globalData, s.is8Bit() ? Yarr::Char8 : Yarr::Char16);
 
         int offsetVectorSize = (m_numSubpatterns + 1) * 2;
         int* offsetVector;
@@ -340,13 +365,16 @@
         int result;
 #if ENABLE(YARR_JIT)
         if (m_state == JITCode) {
-            result = Yarr::execute(m_representation->m_regExpJITCode, s.characters(), startOffset, s.length(), offsetVector);
+            if (s.is8Bit())
+                result = Yarr::execute(m_representation->m_regExpJITCode, s.latin1().data(), startOffset, s.length(), offsetVector);
+            else
+                result = Yarr::execute(m_representation->m_regExpJITCode, s.characters(), startOffset, s.length(), offsetVector);
 #if ENABLE(YARR_JIT_DEBUG)
             matchCompareWithInterpreter(s, startOffset, offsetVector, result);
 #endif
         } else
 #endif
-            result = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), offsetVector);
+            result = Yarr::interpret(m_representation->m_regExpBytecode.get(), s, startOffset, s.length(), offsetVector);
         ASSERT(result >= -1);
 
 #if REGEXP_FUNC_TEST_DATA_GEN
@@ -388,7 +416,7 @@
     for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
         interpreterOffsetVector[j] = -1;
 
-    interpreterResult = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), interpreterOffsetVector);
+    interpreterResult = Yarr::interpret(m_representation->m_regExpBytecode.get(), s, startOffset, s.length(), interpreterOffsetVector);
 
     if (jitResult != interpreterResult)
         differences++;
diff --git a/Source/JavaScriptCore/runtime/RegExp.h b/Source/JavaScriptCore/runtime/RegExp.h
index 0cf80a0..aa3a61f 100644
--- a/Source/JavaScriptCore/runtime/RegExp.h
+++ b/Source/JavaScriptCore/runtime/RegExp.h
@@ -26,6 +26,7 @@
 #include "ExecutableAllocator.h"
 #include "Structure.h"
 #include "RegExpKey.h"
+#include "yarr/Yarr.h"
 #include <wtf/Forward.h>
 #include <wtf/RefCounted.h>
 
@@ -91,13 +92,8 @@
             NotCompiled
         } m_state;
 
-        void compile(JSGlobalData*);
-        void compileIfNecessary(JSGlobalData& globalData)
-        {
-            if (m_representation)
-                return;
-            compile(&globalData);
-        }
+        void compile(JSGlobalData*, Yarr::YarrCharSize);
+        void compileIfNecessary(JSGlobalData&, Yarr::YarrCharSize);
 
 #if ENABLE(YARR_JIT_DEBUG)
         void matchCompareWithInterpreter(const UString&, int startOffset, int* offsetVector, int jitResult);
diff --git a/Source/JavaScriptCore/runtime/UString.h b/Source/JavaScriptCore/runtime/UString.h
index 2d76809..0954e65 100644
--- a/Source/JavaScriptCore/runtime/UString.h
+++ b/Source/JavaScriptCore/runtime/UString.h
@@ -76,6 +76,8 @@
         return m_impl->characters();
     }
 
+    bool is8Bit() const { return false; }
+
     CString ascii() const;
     CString latin1() const;
     CString utf8(bool strict = false) const;