Update RegExp and related classes to use 8 bit strings when available
https://bugs.webkit.org/show_bug.cgi?id=67337
Source/JavaScriptCore:
Modified both the Yarr interpreter and JIT to handle 8 bit subject strings.
The code paths are triggered by the UString::is8bit() method which currently
returns false. Implemented JIT changes for all current architectures.
Tested X86_64 and ARM v7.
This includes some code that will likely change as we complete the
8 bit string changes. This includes the way the raw buffer pointers
are accessed as well as replacing the CharAccess class with a
string interator returned from UString.
Fixed build breakage in testRegExp.cpp due to globalObject construction
changes.
Reviewed by Gavin Barraclough.
* JavaScriptCore.exp:
* JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.def:
* testRegExp.cpp:
(GlobalObject::finishCreation):
(GlobalObject::GlobalObject):
* assembler/ARMAssembler.cpp:
(JSC::ARMAssembler::baseIndexTransfer32):
* assembler/ARMAssembler.h:
* assembler/ARMv7Assembler.h:
(JSC::ARMv7Assembler::ubfx):
(JSC::ARMv7Assembler::ARMInstructionFormatter::twoWordOp12Reg40Imm3Reg4Imm20Imm5):
* assembler/MacroAssemblerARM.h:
(JSC::MacroAssemblerARM::load8):
(JSC::MacroAssemblerARM::branch8):
(JSC::MacroAssemblerARM::branch16):
* assembler/MacroAssemblerARMv7.h:
(JSC::MacroAssemblerARMv7::load8):
(JSC::MacroAssemblerARMv7::branch16):
(JSC::MacroAssemblerARMv7::branch8):
* assembler/MacroAssemblerMIPS.h:
(JSC::MacroAssemblerMIPS::load8):
(JSC::MacroAssemblerMIPS::branch8):
(JSC::MacroAssemblerMIPS::branch16):
* assembler/MacroAssemblerSH4.h:
(JSC::MacroAssemblerSH4::load8):
(JSC::MacroAssemblerSH4::branch8):
(JSC::MacroAssemblerSH4::branch16):
* assembler/MacroAssemblerX86Common.h:
(JSC::MacroAssemblerX86Common::load8):
(JSC::MacroAssemblerX86Common::branch16):
(JSC::MacroAssemblerX86Common::branch8):
* assembler/SH4Assembler.h:
(JSC::SH4Assembler::extub):
(JSC::SH4Assembler::printInstr):
* assembler/X86Assembler.h:
(JSC::X86Assembler::cmpw_ir):
(JSC::X86Assembler::movzbl_mr):
* runtime/RegExp.cpp:
(JSC::RegExp::compile):
(JSC::RegExp::compileIfNecessary):
(JSC::RegExp::match):
(JSC::RegExp::matchCompareWithInterpreter):
* runtime/RegExp.h:
* runtime/UString.h:
(JSC::UString::is8Bit):
* yarr/Yarr.h:
* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::CharAccess::CharAccess):
(JSC::Yarr::Interpreter::CharAccess::~CharAccess):
(JSC::Yarr::Interpreter::CharAccess::operator[]):
(JSC::Yarr::Interpreter::InputStream::InputStream):
(JSC::Yarr::Interpreter::Interpreter):
(JSC::Yarr::interpret):
* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::jumpIfCharNotEquals):
(JSC::Yarr::YarrGenerator::readCharacter):
(JSC::Yarr::YarrGenerator::generatePatternCharacterOnce):
(JSC::Yarr::YarrGenerator::generatePatternCharacterFixed):
(JSC::Yarr::YarrGenerator::generatePatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::backtrackPatternCharacterNonGreedy):
(JSC::Yarr::YarrGenerator::generateCharacterClassFixed):
(JSC::Yarr::YarrGenerator::generateDotStarEnclosure):
(JSC::Yarr::YarrGenerator::YarrGenerator):
(JSC::Yarr::YarrGenerator::compile):
(JSC::Yarr::jitCompile):
(JSC::Yarr::execute):
* yarr/YarrJIT.h:
(JSC::Yarr::YarrCodeBlock::has8BitCode):
(JSC::Yarr::YarrCodeBlock::has16BitCode):
(JSC::Yarr::YarrCodeBlock::set8BitCode):
(JSC::Yarr::YarrCodeBlock::set16BitCode):
(JSC::Yarr::YarrCodeBlock::execute):
* yarr/YarrParser.h:
(JSC::Yarr::Parser::Parser):
Source/WebCore:
Updated call to match to use UString& instead of UChar*.
Reviewed by Gavin Barraclough.
No new tests, Covered by existing tests.
* platform/text/RegularExpression.cpp:
(WebCore::RegularExpression::match):
git-svn-id: http://svn.webkit.org/repository/webkit/trunk@94981 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/Source/JavaScriptCore/runtime/RegExp.cpp b/Source/JavaScriptCore/runtime/RegExp.cpp
index 09831a4..0c87b15 100644
--- a/Source/JavaScriptCore/runtime/RegExp.cpp
+++ b/Source/JavaScriptCore/runtime/RegExp.cpp
@@ -267,26 +267,26 @@
return globalData.regExpCache()->lookupOrCreate(patternString, flags);
}
-void RegExp::compile(JSGlobalData* globalData)
+void RegExp::compile(JSGlobalData* globalData, Yarr::YarrCharSize charSize)
{
- ASSERT(m_state == NotCompiled);
- m_representation = adoptPtr(new RegExpRepresentation);
Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
if (m_constructionError) {
ASSERT_NOT_REACHED();
m_state = ParseError;
return;
}
-
- globalData->regExpCache()->addToStrongCache(this);
-
ASSERT(m_numSubpatterns == pattern.m_numSubpatterns);
- m_state = ByteCode;
+ if (!m_representation) {
+ ASSERT(m_state == NotCompiled);
+ m_representation = adoptPtr(new RegExpRepresentation);
+ globalData->regExpCache()->addToStrongCache(this);
+ m_state = ByteCode;
+ }
#if ENABLE(YARR_JIT)
if (!pattern.m_containsBackreferences && globalData->canUseJIT()) {
- Yarr::jitCompile(pattern, globalData, m_representation->m_regExpJITCode);
+ Yarr::jitCompile(pattern, charSize, globalData, m_representation->m_regExpJITCode);
#if ENABLE(YARR_JIT_DEBUG)
if (!m_representation->m_regExpJITCode.isFallBack())
m_state = JITCode;
@@ -299,11 +299,36 @@
}
#endif
}
+#else
+ UNUSED_PARAM(charSize);
#endif
m_representation->m_regExpBytecode = Yarr::byteCompile(pattern, &globalData->m_regExpAllocator);
}
+void RegExp::compileIfNecessary(JSGlobalData& globalData, Yarr::YarrCharSize charSize)
+{
+ // If the state is NotCompiled or ParseError, then there is no representation.
+ // If there is a representation, and the state must be either JITCode or ByteCode.
+ ASSERT(!!m_representation == (m_state == JITCode || m_state == ByteCode));
+
+ if (m_representation) {
+#if ENABLE(YARR_JIT)
+ if (m_state != JITCode)
+ return;
+ if ((charSize == Yarr::Char8) && (m_representation->m_regExpJITCode.has8BitCode()))
+ return;
+ if ((charSize == Yarr::Char16) && (m_representation->m_regExpJITCode.has16BitCode()))
+ return;
+#else
+ return;
+#endif
+ }
+
+ compile(&globalData, charSize);
+}
+
+
int RegExp::match(JSGlobalData& globalData, const UString& s, int startOffset, Vector<int, 32>* ovector)
{
if (startOffset < 0)
@@ -317,7 +342,7 @@
return -1;
if (m_state != ParseError) {
- compileIfNecessary(globalData);
+ compileIfNecessary(globalData, s.is8Bit() ? Yarr::Char8 : Yarr::Char16);
int offsetVectorSize = (m_numSubpatterns + 1) * 2;
int* offsetVector;
@@ -340,13 +365,16 @@
int result;
#if ENABLE(YARR_JIT)
if (m_state == JITCode) {
- result = Yarr::execute(m_representation->m_regExpJITCode, s.characters(), startOffset, s.length(), offsetVector);
+ if (s.is8Bit())
+ result = Yarr::execute(m_representation->m_regExpJITCode, s.latin1().data(), startOffset, s.length(), offsetVector);
+ else
+ result = Yarr::execute(m_representation->m_regExpJITCode, s.characters(), startOffset, s.length(), offsetVector);
#if ENABLE(YARR_JIT_DEBUG)
matchCompareWithInterpreter(s, startOffset, offsetVector, result);
#endif
} else
#endif
- result = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), offsetVector);
+ result = Yarr::interpret(m_representation->m_regExpBytecode.get(), s, startOffset, s.length(), offsetVector);
ASSERT(result >= -1);
#if REGEXP_FUNC_TEST_DATA_GEN
@@ -388,7 +416,7 @@
for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
interpreterOffsetVector[j] = -1;
- interpreterResult = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), interpreterOffsetVector);
+ interpreterResult = Yarr::interpret(m_representation->m_regExpBytecode.get(), s, startOffset, s.length(), interpreterOffsetVector);
if (jitResult != interpreterResult)
differences++;
diff --git a/Source/JavaScriptCore/runtime/RegExp.h b/Source/JavaScriptCore/runtime/RegExp.h
index 0cf80a0..aa3a61f 100644
--- a/Source/JavaScriptCore/runtime/RegExp.h
+++ b/Source/JavaScriptCore/runtime/RegExp.h
@@ -26,6 +26,7 @@
#include "ExecutableAllocator.h"
#include "Structure.h"
#include "RegExpKey.h"
+#include "yarr/Yarr.h"
#include <wtf/Forward.h>
#include <wtf/RefCounted.h>
@@ -91,13 +92,8 @@
NotCompiled
} m_state;
- void compile(JSGlobalData*);
- void compileIfNecessary(JSGlobalData& globalData)
- {
- if (m_representation)
- return;
- compile(&globalData);
- }
+ void compile(JSGlobalData*, Yarr::YarrCharSize);
+ void compileIfNecessary(JSGlobalData&, Yarr::YarrCharSize);
#if ENABLE(YARR_JIT_DEBUG)
void matchCompareWithInterpreter(const UString&, int startOffset, int* offsetVector, int jitResult);
diff --git a/Source/JavaScriptCore/runtime/UString.h b/Source/JavaScriptCore/runtime/UString.h
index 2d76809..0954e65 100644
--- a/Source/JavaScriptCore/runtime/UString.h
+++ b/Source/JavaScriptCore/runtime/UString.h
@@ -76,6 +76,8 @@
return m_impl->characters();
}
+ bool is8Bit() const { return false; }
+
CString ascii() const;
CString latin1() const;
CString utf8(bool strict = false) const;