kocienda | 66a6d36 | 2001-08-24 14:24:45 +0000 | [diff] [blame] | 1 | /* |
ggaren@apple.com | a862ba2 | 2007-11-12 04:27:33 +0000 | [diff] [blame] | 2 | * Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org) |
msaboff@apple.com | 6e73b41 | 2017-08-22 22:43:08 +0000 | [diff] [blame] | 3 | * Copyright (c) 2007, 2008, 2016-2017 Apple Inc. All rights reserved. |
mjs@apple.com | 0a30b7a | 2009-07-04 14:21:30 +0000 | [diff] [blame] | 4 | * Copyright (C) 2009 Torch Mobile, Inc. |
abecsi@webkit.org | 59e1c41 | 2010-12-02 13:36:45 +0000 | [diff] [blame] | 5 | * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged |
kocienda | 66a6d36 | 2001-08-24 14:24:45 +0000 | [diff] [blame] | 6 | * |
| 7 | * This library is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU Lesser General Public |
| 9 | * License as published by the Free Software Foundation; either |
| 10 | * version 2 of the License, or (at your option) any later version. |
| 11 | * |
| 12 | * This library is distributed in the hope that it will be useful, |
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | * Lesser General Public License for more details. |
| 16 | * |
| 17 | * You should have received a copy of the GNU Lesser General Public |
| 18 | * License along with this library; if not, write to the Free Software |
mjs | cdff33b | 2006-01-23 21:41:36 +0000 | [diff] [blame] | 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
mjs | 6f821c8 | 2002-03-22 00:31:57 +0000 | [diff] [blame] | 20 | * |
kocienda | 66a6d36 | 2001-08-24 14:24:45 +0000 | [diff] [blame] | 21 | */ |
| 22 | |
mjs | b64c50a | 2005-10-03 21:13:12 +0000 | [diff] [blame] | 23 | #include "config.h" |
cwzwarich@webkit.org | 0b51a73 | 2008-11-05 23:21:32 +0000 | [diff] [blame] | 24 | #include "RegExp.h" |
barraclough@apple.com | 7dd3056 | 2011-01-10 21:08:28 +0000 | [diff] [blame] | 25 | |
cwzwarich@webkit.org | 3ff0e6a | 2008-11-07 00:18:07 +0000 | [diff] [blame] | 26 | #include "Lexer.h" |
fpizlo@apple.com | fb7eff2 | 2014-02-11 01:45:50 +0000 | [diff] [blame] | 27 | #include "JSCInlines.h" |
oliver@apple.com | d7523c1 | 2011-05-26 22:58:52 +0000 | [diff] [blame] | 28 | #include "RegExpCache.h" |
fpizlo@apple.com | 6ea42db | 2016-03-08 21:15:07 +0000 | [diff] [blame] | 29 | #include "RegExpInlines.h" |
fpizlo@apple.com | a4b4cbe | 2013-01-12 04:47:03 +0000 | [diff] [blame] | 30 | #include "Yarr.h" |
| 31 | #include "YarrJIT.h" |
ggaren | 7213ee0 | 2007-10-16 23:25:33 +0000 | [diff] [blame] | 32 | #include <wtf/Assertions.h> |
kocienda | 66a6d36 | 2001-08-24 14:24:45 +0000 | [diff] [blame] | 33 | |
ggaren@apple.com | e2dfa52 | 2008-11-17 02:33:58 +0000 | [diff] [blame] | 34 | namespace JSC { |
| 35 | |
utatane.tea@gmail.com | a5544f1 | 2017-05-19 09:23:20 +0000 | [diff] [blame] | 36 | const ClassInfo RegExp::s_info = { "RegExp", nullptr, nullptr, nullptr, CREATE_METHOD_TABLE(RegExp) }; |
oliver@apple.com | 5652af7 | 2011-05-26 01:12:46 +0000 | [diff] [blame] | 37 | |
benjamin@webkit.org | cff06e4 | 2012-08-30 21:23:51 +0000 | [diff] [blame] | 38 | RegExpFlags regExpFlags(const String& string) |
barraclough@apple.com | 1281293 | 2011-03-09 23:04:27 +0000 | [diff] [blame] | 39 | { |
| 40 | RegExpFlags flags = NoFlags; |
| 41 | |
| 42 | for (unsigned i = 0; i < string.length(); ++i) { |
msaboff@apple.com | aeb7a4a | 2011-09-01 20:04:34 +0000 | [diff] [blame] | 43 | switch (string[i]) { |
barraclough@apple.com | 1281293 | 2011-03-09 23:04:27 +0000 | [diff] [blame] | 44 | case 'g': |
| 45 | if (flags & FlagGlobal) |
| 46 | return InvalidFlags; |
| 47 | flags = static_cast<RegExpFlags>(flags | FlagGlobal); |
| 48 | break; |
| 49 | |
| 50 | case 'i': |
| 51 | if (flags & FlagIgnoreCase) |
| 52 | return InvalidFlags; |
| 53 | flags = static_cast<RegExpFlags>(flags | FlagIgnoreCase); |
| 54 | break; |
| 55 | |
| 56 | case 'm': |
| 57 | if (flags & FlagMultiline) |
| 58 | return InvalidFlags; |
| 59 | flags = static_cast<RegExpFlags>(flags | FlagMultiline); |
| 60 | break; |
| 61 | |
msaboff@apple.com | a021c7d | 2017-08-24 21:14:43 +0000 | [diff] [blame] | 62 | case 's': |
| 63 | if (flags & FlagDotAll) |
| 64 | return InvalidFlags; |
| 65 | flags = static_cast<RegExpFlags>(flags | FlagDotAll); |
| 66 | break; |
| 67 | |
msaboff@apple.com | 5e9b065 | 2016-03-02 00:39:01 +0000 | [diff] [blame] | 68 | case 'u': |
| 69 | if (flags & FlagUnicode) |
| 70 | return InvalidFlags; |
| 71 | flags = static_cast<RegExpFlags>(flags | FlagUnicode); |
| 72 | break; |
| 73 | |
msaboff@apple.com | 3f19465 | 2016-03-09 20:11:46 +0000 | [diff] [blame] | 74 | case 'y': |
| 75 | if (flags & FlagSticky) |
| 76 | return InvalidFlags; |
| 77 | flags = static_cast<RegExpFlags>(flags | FlagSticky); |
| 78 | break; |
| 79 | |
barraclough@apple.com | 1281293 | 2011-03-09 23:04:27 +0000 | [diff] [blame] | 80 | default: |
| 81 | return InvalidFlags; |
| 82 | } |
| 83 | } |
| 84 | |
| 85 | return flags; |
| 86 | } |
msaboff@apple.com | b1189a9 | 2011-09-03 00:20:43 +0000 | [diff] [blame] | 87 | |
| 88 | #if REGEXP_FUNC_TEST_DATA_GEN |
msaboff@apple.com | b1189a9 | 2011-09-03 00:20:43 +0000 | [diff] [blame] | 89 | const char* const RegExpFunctionalTestCollector::s_fileName = "/tmp/RegExpTestsData"; |
| 90 | RegExpFunctionalTestCollector* RegExpFunctionalTestCollector::s_instance = 0; |
| 91 | |
| 92 | RegExpFunctionalTestCollector* RegExpFunctionalTestCollector::get() |
| 93 | { |
| 94 | if (!s_instance) |
| 95 | s_instance = new RegExpFunctionalTestCollector(); |
| 96 | |
| 97 | return s_instance; |
| 98 | } |
| 99 | |
joepeck@webkit.org | c0e5f9d | 2015-05-13 18:51:44 +0000 | [diff] [blame] | 100 | void RegExpFunctionalTestCollector::outputOneTest(RegExp* regExp, const String& s, int startOffset, int* ovector, int result) |
msaboff@apple.com | b1189a9 | 2011-09-03 00:20:43 +0000 | [diff] [blame] | 101 | { |
| 102 | if ((!m_lastRegExp) || (m_lastRegExp != regExp)) { |
| 103 | m_lastRegExp = regExp; |
| 104 | fputc('/', m_file); |
benjamin@webkit.org | cff06e4 | 2012-08-30 21:23:51 +0000 | [diff] [blame] | 105 | outputEscapedString(regExp->pattern(), true); |
msaboff@apple.com | b1189a9 | 2011-09-03 00:20:43 +0000 | [diff] [blame] | 106 | fputc('/', m_file); |
| 107 | if (regExp->global()) |
| 108 | fputc('g', m_file); |
| 109 | if (regExp->ignoreCase()) |
| 110 | fputc('i', m_file); |
| 111 | if (regExp->multiline()) |
| 112 | fputc('m', m_file); |
msaboff@apple.com | a021c7d | 2017-08-24 21:14:43 +0000 | [diff] [blame] | 113 | if (regExp->dotAll()) |
| 114 | fputc('s', m_file); |
msaboff@apple.com | 5e9b065 | 2016-03-02 00:39:01 +0000 | [diff] [blame] | 115 | if (regExp->unicode()) |
| 116 | fputc('u', m_file); |
msaboff@apple.com | a021c7d | 2017-08-24 21:14:43 +0000 | [diff] [blame] | 117 | if (regExp->sticky()) |
| 118 | fputc('y', m_file); |
msaboff@apple.com | b1189a9 | 2011-09-03 00:20:43 +0000 | [diff] [blame] | 119 | fprintf(m_file, "\n"); |
| 120 | } |
| 121 | |
| 122 | fprintf(m_file, " \""); |
benjamin@webkit.org | cff06e4 | 2012-08-30 21:23:51 +0000 | [diff] [blame] | 123 | outputEscapedString(s); |
msaboff@apple.com | b1189a9 | 2011-09-03 00:20:43 +0000 | [diff] [blame] | 124 | fprintf(m_file, "\", %d, %d, (", startOffset, result); |
| 125 | for (unsigned i = 0; i <= regExp->numSubpatterns(); i++) { |
barraclough@apple.com | 0a0af1a | 2012-03-23 19:57:28 +0000 | [diff] [blame] | 126 | int subpatternBegin = ovector[i * 2]; |
| 127 | int subpatternEnd = ovector[i * 2 + 1]; |
| 128 | if (subpatternBegin == -1) |
| 129 | subpatternEnd = -1; |
| 130 | fprintf(m_file, "%d, %d", subpatternBegin, subpatternEnd); |
msaboff@apple.com | b1189a9 | 2011-09-03 00:20:43 +0000 | [diff] [blame] | 131 | if (i < regExp->numSubpatterns()) |
| 132 | fputs(", ", m_file); |
| 133 | } |
| 134 | |
| 135 | fprintf(m_file, ")\n"); |
| 136 | fflush(m_file); |
| 137 | } |
| 138 | |
| 139 | RegExpFunctionalTestCollector::RegExpFunctionalTestCollector() |
| 140 | { |
| 141 | m_file = fopen(s_fileName, "r+"); |
| 142 | if (!m_file) |
| 143 | m_file = fopen(s_fileName, "w+"); |
| 144 | |
| 145 | fseek(m_file, 0L, SEEK_END); |
| 146 | } |
| 147 | |
| 148 | RegExpFunctionalTestCollector::~RegExpFunctionalTestCollector() |
| 149 | { |
| 150 | fclose(m_file); |
| 151 | s_instance = 0; |
| 152 | } |
| 153 | |
benjamin@webkit.org | cff06e4 | 2012-08-30 21:23:51 +0000 | [diff] [blame] | 154 | void RegExpFunctionalTestCollector::outputEscapedString(const String& s, bool escapeSlash) |
msaboff@apple.com | b1189a9 | 2011-09-03 00:20:43 +0000 | [diff] [blame] | 155 | { |
| 156 | int len = s.length(); |
| 157 | |
| 158 | for (int i = 0; i < len; ++i) { |
| 159 | UChar c = s[i]; |
| 160 | |
| 161 | switch (c) { |
| 162 | case '\0': |
| 163 | fputs("\\0", m_file); |
| 164 | break; |
| 165 | case '\a': |
| 166 | fputs("\\a", m_file); |
| 167 | break; |
| 168 | case '\b': |
| 169 | fputs("\\b", m_file); |
| 170 | break; |
| 171 | case '\f': |
| 172 | fputs("\\f", m_file); |
| 173 | break; |
| 174 | case '\n': |
| 175 | fputs("\\n", m_file); |
| 176 | break; |
| 177 | case '\r': |
| 178 | fputs("\\r", m_file); |
| 179 | break; |
| 180 | case '\t': |
| 181 | fputs("\\t", m_file); |
| 182 | break; |
| 183 | case '\v': |
| 184 | fputs("\\v", m_file); |
| 185 | break; |
| 186 | case '/': |
| 187 | if (escapeSlash) |
| 188 | fputs("\\/", m_file); |
| 189 | else |
| 190 | fputs("/", m_file); |
| 191 | break; |
| 192 | case '\"': |
| 193 | fputs("\\\"", m_file); |
| 194 | break; |
| 195 | case '\\': |
| 196 | fputs("\\\\", m_file); |
| 197 | break; |
| 198 | case '\?': |
| 199 | fputs("\?", m_file); |
| 200 | break; |
| 201 | default: |
| 202 | if (c > 0x7f) |
| 203 | fprintf(m_file, "\\u%04x", c); |
| 204 | else |
| 205 | fputc(c, m_file); |
| 206 | break; |
| 207 | } |
| 208 | } |
| 209 | } |
| 210 | #endif |
| 211 | |
ggaren@apple.com | 9a9a4b5 | 2013-04-18 19:32:17 +0000 | [diff] [blame] | 212 | RegExp::RegExp(VM& vm, const String& patternString, RegExpFlags flags) |
| 213 | : JSCell(vm, vm.regExpStructure.get()) |
oliver@apple.com | d4c3fd0 | 2011-05-25 22:49:56 +0000 | [diff] [blame] | 214 | , m_patternString(patternString) |
barraclough@apple.com | 1281293 | 2011-03-09 23:04:27 +0000 | [diff] [blame] | 215 | , m_flags(flags) |
ggaren@apple.com | 8a50ec5 | 2007-11-07 17:18:39 +0000 | [diff] [blame] | 216 | { |
commit-queue@webkit.org | 214ac02 | 2011-08-25 23:30:14 +0000 | [diff] [blame] | 217 | } |
| 218 | |
ggaren@apple.com | 9a9a4b5 | 2013-04-18 19:32:17 +0000 | [diff] [blame] | 219 | void RegExp::finishCreation(VM& vm) |
commit-queue@webkit.org | 214ac02 | 2011-08-25 23:30:14 +0000 | [diff] [blame] | 220 | { |
ggaren@apple.com | 9a9a4b5 | 2013-04-18 19:32:17 +0000 | [diff] [blame] | 221 | Base::finishCreation(vm); |
utatane.tea@gmail.com | fa8d279 | 2017-12-19 19:16:21 +0000 | [diff] [blame] | 222 | Yarr::YarrPattern pattern(m_patternString, m_flags, m_constructionErrorCode, vm.stackLimit()); |
msaboff@apple.com | 79ffd9d | 2016-09-14 23:17:59 +0000 | [diff] [blame] | 223 | if (!isValid()) |
oliver@apple.com | d4c3fd0 | 2011-05-25 22:49:56 +0000 | [diff] [blame] | 224 | m_state = ParseError; |
msaboff@apple.com | 8e26fe2 | 2017-09-07 23:13:38 +0000 | [diff] [blame] | 225 | else { |
oliver@apple.com | d4c3fd0 | 2011-05-25 22:49:56 +0000 | [diff] [blame] | 226 | m_numSubpatterns = pattern.m_numSubpatterns; |
msaboff@apple.com | 8e26fe2 | 2017-09-07 23:13:38 +0000 | [diff] [blame] | 227 | m_captureGroupNames.swap(pattern.m_captureGroupNames); |
| 228 | m_namedGroupToParenIndex.swap(pattern.m_namedGroupToParenIndex); |
| 229 | } |
barraclough@apple.com | 20ab73b | 2009-04-14 07:06:41 +0000 | [diff] [blame] | 230 | } |
| 231 | |
mhahnenberg@apple.com | c58d54d | 2011-12-16 19:06:44 +0000 | [diff] [blame] | 232 | void RegExp::destroy(JSCell* cell) |
barraclough@apple.com | 20ab73b | 2009-04-14 07:06:41 +0000 | [diff] [blame] | 233 | { |
ggaren@apple.com | 72da811 | 2012-05-26 22:40:46 +0000 | [diff] [blame] | 234 | RegExp* thisObject = static_cast<RegExp*>(cell); |
msaboff@apple.com | b1189a9 | 2011-09-03 00:20:43 +0000 | [diff] [blame] | 235 | #if REGEXP_FUNC_TEST_DATA_GEN |
| 236 | RegExpFunctionalTestCollector::get()->clearRegExp(this); |
| 237 | #endif |
mhahnenberg@apple.com | c58d54d | 2011-12-16 19:06:44 +0000 | [diff] [blame] | 238 | thisObject->RegExp::~RegExp(); |
barraclough@apple.com | 20ab73b | 2009-04-14 07:06:41 +0000 | [diff] [blame] | 239 | } |
barraclough@apple.com | 20ab73b | 2009-04-14 07:06:41 +0000 | [diff] [blame] | 240 | |
commit-queue@webkit.org | 00eb52f | 2016-03-01 02:07:12 +0000 | [diff] [blame] | 241 | size_t RegExp::estimatedSize(JSCell* cell) |
| 242 | { |
| 243 | RegExp* thisObject = static_cast<RegExp*>(cell); |
| 244 | size_t regexDataSize = thisObject->m_regExpBytecode ? thisObject->m_regExpBytecode->estimatedSizeInBytes() : 0; |
| 245 | #if ENABLE(YARR_JIT) |
| 246 | regexDataSize += thisObject->m_regExpJITCode.size(); |
| 247 | #endif |
| 248 | return Base::estimatedSize(cell) + regexDataSize; |
| 249 | } |
| 250 | |
ggaren@apple.com | 9a9a4b5 | 2013-04-18 19:32:17 +0000 | [diff] [blame] | 251 | RegExp* RegExp::createWithoutCaching(VM& vm, const String& patternString, RegExpFlags flags) |
darin@apple.com | e4ba8cf | 2008-02-09 18:09:42 +0000 | [diff] [blame] | 252 | { |
ggaren@apple.com | 9a9a4b5 | 2013-04-18 19:32:17 +0000 | [diff] [blame] | 253 | RegExp* regExp = new (NotNull, allocateCell<RegExp>(vm.heap)) RegExp(vm, patternString, flags); |
| 254 | regExp->finishCreation(vm); |
commit-queue@webkit.org | 403bfdf | 2011-08-26 22:32:53 +0000 | [diff] [blame] | 255 | return regExp; |
oliver@apple.com | fcacd3c | 2011-07-18 17:47:13 +0000 | [diff] [blame] | 256 | } |
| 257 | |
ggaren@apple.com | 9a9a4b5 | 2013-04-18 19:32:17 +0000 | [diff] [blame] | 258 | RegExp* RegExp::create(VM& vm, const String& patternString, RegExpFlags flags) |
oliver@apple.com | fcacd3c | 2011-07-18 17:47:13 +0000 | [diff] [blame] | 259 | { |
ggaren@apple.com | 9a9a4b5 | 2013-04-18 19:32:17 +0000 | [diff] [blame] | 260 | return vm.regExpCache()->lookupOrCreate(patternString, flags); |
darin@apple.com | e4ba8cf | 2008-02-09 18:09:42 +0000 | [diff] [blame] | 261 | } |
| 262 | |
msaboff@apple.com | aeec0ba | 2017-12-14 22:16:38 +0000 | [diff] [blame] | 263 | |
| 264 | static std::unique_ptr<Yarr::BytecodePattern> byteCodeCompilePattern(VM* vm, Yarr::YarrPattern& pattern) |
| 265 | { |
| 266 | return Yarr::byteCompile(pattern, &vm->m_regExpAllocator, &vm->m_regExpAllocatorLock); |
| 267 | } |
| 268 | |
| 269 | void RegExp::byteCodeCompileIfNecessary(VM* vm) |
| 270 | { |
| 271 | if (m_regExpBytecode) |
| 272 | return; |
| 273 | |
utatane.tea@gmail.com | fa8d279 | 2017-12-19 19:16:21 +0000 | [diff] [blame] | 274 | Yarr::YarrPattern pattern(m_patternString, m_flags, m_constructionErrorCode, vm->stackLimit()); |
| 275 | if (hasError(m_constructionErrorCode)) { |
msaboff@apple.com | aeec0ba | 2017-12-14 22:16:38 +0000 | [diff] [blame] | 276 | RELEASE_ASSERT_NOT_REACHED(); |
| 277 | #if COMPILER_QUIRK(CONSIDERS_UNREACHABLE_CODE) |
| 278 | m_state = ParseError; |
| 279 | return; |
| 280 | #endif |
| 281 | } |
| 282 | ASSERT(m_numSubpatterns == pattern.m_numSubpatterns); |
| 283 | |
| 284 | m_regExpBytecode = byteCodeCompilePattern(vm, pattern); |
| 285 | } |
| 286 | |
ggaren@apple.com | 9a9a4b5 | 2013-04-18 19:32:17 +0000 | [diff] [blame] | 287 | void RegExp::compile(VM* vm, Yarr::YarrCharSize charSize) |
kocienda | 66a6d36 | 2001-08-24 14:24:45 +0000 | [diff] [blame] | 288 | { |
fpizlo@apple.com | 171d06f | 2016-11-15 23:21:50 +0000 | [diff] [blame] | 289 | ConcurrentJSLocker locker(m_lock); |
fpizlo@apple.com | 280ef00 | 2016-04-05 22:13:16 +0000 | [diff] [blame] | 290 | |
utatane.tea@gmail.com | fa8d279 | 2017-12-19 19:16:21 +0000 | [diff] [blame] | 291 | Yarr::YarrPattern pattern(m_patternString, m_flags, m_constructionErrorCode, vm->stackLimit()); |
| 292 | if (hasError(m_constructionErrorCode)) { |
oliver@apple.com | 5598c18 | 2013-01-23 22:25:07 +0000 | [diff] [blame] | 293 | RELEASE_ASSERT_NOT_REACHED(); |
mjs@apple.com | 0a66116 | 2014-09-08 02:16:47 +0000 | [diff] [blame] | 294 | #if COMPILER_QUIRK(CONSIDERS_UNREACHABLE_CODE) |
oliver@apple.com | d4c3fd0 | 2011-05-25 22:49:56 +0000 | [diff] [blame] | 295 | m_state = ParseError; |
| 296 | return; |
mjs@apple.com | 0a66116 | 2014-09-08 02:16:47 +0000 | [diff] [blame] | 297 | #endif |
oliver@apple.com | d4c3fd0 | 2011-05-25 22:49:56 +0000 | [diff] [blame] | 298 | } |
oliver@apple.com | d4c3fd0 | 2011-05-25 22:49:56 +0000 | [diff] [blame] | 299 | ASSERT(m_numSubpatterns == pattern.m_numSubpatterns); |
abecsi@webkit.org | 59e1c41 | 2010-12-02 13:36:45 +0000 | [diff] [blame] | 300 | |
barraclough@apple.com | f280e17 | 2012-03-28 22:18:20 +0000 | [diff] [blame] | 301 | if (!hasCode()) { |
msaboff@apple.com | 2cc4150 | 2011-09-12 22:17:53 +0000 | [diff] [blame] | 302 | ASSERT(m_state == NotCompiled); |
ggaren@apple.com | 9a9a4b5 | 2013-04-18 19:32:17 +0000 | [diff] [blame] | 303 | vm->regExpCache()->addToStrongCache(this); |
msaboff@apple.com | 2cc4150 | 2011-09-12 22:17:53 +0000 | [diff] [blame] | 304 | m_state = ByteCode; |
| 305 | } |
slewis@apple.com | 6ba7d15 | 2011-07-27 21:44:49 +0000 | [diff] [blame] | 306 | |
barraclough@apple.com | 20ab73b | 2009-04-14 07:06:41 +0000 | [diff] [blame] | 307 | #if ENABLE(YARR_JIT) |
utatane.tea@gmail.com | 6863b23 | 2017-12-17 19:35:38 +0000 | [diff] [blame] | 308 | if (!pattern.m_containsBackreferences && !pattern.containsUnsignedLengthPattern() && VM::canUseRegExpJIT()) { |
jlewis3@apple.com | 5d712de | 2018-01-29 17:47:30 +0000 | [diff] [blame] | 309 | Yarr::jitCompile(pattern, charSize, vm, m_regExpJITCode); |
utatane.tea@gmail.com | e20cc98 | 2018-01-24 04:51:08 +0000 | [diff] [blame] | 310 | if (!m_regExpJITCode.failureReason()) { |
oliver@apple.com | d4c3fd0 | 2011-05-25 22:49:56 +0000 | [diff] [blame] | 311 | m_state = JITCode; |
| 312 | return; |
| 313 | } |
abecsi@webkit.org | 59e1c41 | 2010-12-02 13:36:45 +0000 | [diff] [blame] | 314 | } |
msaboff@apple.com | 2cc4150 | 2011-09-12 22:17:53 +0000 | [diff] [blame] | 315 | #else |
| 316 | UNUSED_PARAM(charSize); |
barraclough@apple.com | 20ab73b | 2009-04-14 07:06:41 +0000 | [diff] [blame] | 317 | #endif |
abecsi@webkit.org | 59e1c41 | 2010-12-02 13:36:45 +0000 | [diff] [blame] | 318 | |
msaboff@apple.com | aeec0ba | 2017-12-14 22:16:38 +0000 | [diff] [blame] | 319 | if (Options::dumpCompiledRegExpPatterns()) |
| 320 | dataLog("Can't JIT this regular expression: \"", m_patternString, "\"\n"); |
| 321 | |
mark.lam@apple.com | e69fb6a | 2015-07-17 02:27:22 +0000 | [diff] [blame] | 322 | m_state = ByteCode; |
msaboff@apple.com | aeec0ba | 2017-12-14 22:16:38 +0000 | [diff] [blame] | 323 | m_regExpBytecode = byteCodeCompilePattern(vm, pattern); |
barraclough@apple.com | 20ab73b | 2009-04-14 07:06:41 +0000 | [diff] [blame] | 324 | } |
| 325 | |
fpizlo@apple.com | bc16ddb | 2016-09-06 01:02:22 +0000 | [diff] [blame] | 326 | int RegExp::match(VM& vm, const String& s, unsigned startOffset, Vector<int>& ovector) |
barraclough@apple.com | 20ab73b | 2009-04-14 07:06:41 +0000 | [diff] [blame] | 327 | { |
jlewis3@apple.com | 5d712de | 2018-01-29 17:47:30 +0000 | [diff] [blame] | 328 | return matchInline(vm, s, startOffset, ovector); |
barraclough@apple.com | 20ab73b | 2009-04-14 07:06:41 +0000 | [diff] [blame] | 329 | } |
| 330 | |
fpizlo@apple.com | 280ef00 | 2016-04-05 22:13:16 +0000 | [diff] [blame] | 331 | bool RegExp::matchConcurrently( |
fpizlo@apple.com | bc16ddb | 2016-09-06 01:02:22 +0000 | [diff] [blame] | 332 | VM& vm, const String& s, unsigned startOffset, int& position, Vector<int>& ovector) |
fpizlo@apple.com | 280ef00 | 2016-04-05 22:13:16 +0000 | [diff] [blame] | 333 | { |
fpizlo@apple.com | 171d06f | 2016-11-15 23:21:50 +0000 | [diff] [blame] | 334 | ConcurrentJSLocker locker(m_lock); |
fpizlo@apple.com | 280ef00 | 2016-04-05 22:13:16 +0000 | [diff] [blame] | 335 | |
| 336 | if (!hasCodeFor(s.is8Bit() ? Yarr::Char8 : Yarr::Char16)) |
| 337 | return false; |
| 338 | |
jlewis3@apple.com | 5d712de | 2018-01-29 17:47:30 +0000 | [diff] [blame] | 339 | position = match(vm, s, startOffset, ovector); |
fpizlo@apple.com | 280ef00 | 2016-04-05 22:13:16 +0000 | [diff] [blame] | 340 | return true; |
| 341 | } |
| 342 | |
ggaren@apple.com | 9a9a4b5 | 2013-04-18 19:32:17 +0000 | [diff] [blame] | 343 | void RegExp::compileMatchOnly(VM* vm, Yarr::YarrCharSize charSize) |
barraclough@apple.com | f280e17 | 2012-03-28 22:18:20 +0000 | [diff] [blame] | 344 | { |
fpizlo@apple.com | 171d06f | 2016-11-15 23:21:50 +0000 | [diff] [blame] | 345 | ConcurrentJSLocker locker(m_lock); |
fpizlo@apple.com | 280ef00 | 2016-04-05 22:13:16 +0000 | [diff] [blame] | 346 | |
utatane.tea@gmail.com | fa8d279 | 2017-12-19 19:16:21 +0000 | [diff] [blame] | 347 | Yarr::YarrPattern pattern(m_patternString, m_flags, m_constructionErrorCode, vm->stackLimit()); |
| 348 | if (hasError(m_constructionErrorCode)) { |
oliver@apple.com | 5598c18 | 2013-01-23 22:25:07 +0000 | [diff] [blame] | 349 | RELEASE_ASSERT_NOT_REACHED(); |
mjs@apple.com | 0a66116 | 2014-09-08 02:16:47 +0000 | [diff] [blame] | 350 | #if COMPILER_QUIRK(CONSIDERS_UNREACHABLE_CODE) |
barraclough@apple.com | f280e17 | 2012-03-28 22:18:20 +0000 | [diff] [blame] | 351 | m_state = ParseError; |
| 352 | return; |
mjs@apple.com | 0a66116 | 2014-09-08 02:16:47 +0000 | [diff] [blame] | 353 | #endif |
barraclough@apple.com | f280e17 | 2012-03-28 22:18:20 +0000 | [diff] [blame] | 354 | } |
| 355 | ASSERT(m_numSubpatterns == pattern.m_numSubpatterns); |
| 356 | |
| 357 | if (!hasCode()) { |
| 358 | ASSERT(m_state == NotCompiled); |
ggaren@apple.com | 9a9a4b5 | 2013-04-18 19:32:17 +0000 | [diff] [blame] | 359 | vm->regExpCache()->addToStrongCache(this); |
barraclough@apple.com | f280e17 | 2012-03-28 22:18:20 +0000 | [diff] [blame] | 360 | m_state = ByteCode; |
| 361 | } |
| 362 | |
| 363 | #if ENABLE(YARR_JIT) |
utatane.tea@gmail.com | 6863b23 | 2017-12-17 19:35:38 +0000 | [diff] [blame] | 364 | if (!pattern.m_containsBackreferences && !pattern.containsUnsignedLengthPattern() && VM::canUseRegExpJIT()) { |
jlewis3@apple.com | 5d712de | 2018-01-29 17:47:30 +0000 | [diff] [blame] | 365 | Yarr::jitCompile(pattern, charSize, vm, m_regExpJITCode, Yarr::MatchOnly); |
utatane.tea@gmail.com | e20cc98 | 2018-01-24 04:51:08 +0000 | [diff] [blame] | 366 | if (!m_regExpJITCode.failureReason()) { |
barraclough@apple.com | f280e17 | 2012-03-28 22:18:20 +0000 | [diff] [blame] | 367 | m_state = JITCode; |
| 368 | return; |
| 369 | } |
barraclough@apple.com | f280e17 | 2012-03-28 22:18:20 +0000 | [diff] [blame] | 370 | } |
| 371 | #else |
| 372 | UNUSED_PARAM(charSize); |
| 373 | #endif |
| 374 | |
msaboff@apple.com | aeec0ba | 2017-12-14 22:16:38 +0000 | [diff] [blame] | 375 | if (Options::dumpCompiledRegExpPatterns()) |
| 376 | dataLog("Can't JIT this regular expression: \"", m_patternString, "\"\n"); |
| 377 | |
mark.lam@apple.com | e69fb6a | 2015-07-17 02:27:22 +0000 | [diff] [blame] | 378 | m_state = ByteCode; |
msaboff@apple.com | aeec0ba | 2017-12-14 22:16:38 +0000 | [diff] [blame] | 379 | m_regExpBytecode = byteCodeCompilePattern(vm, pattern); |
barraclough@apple.com | f280e17 | 2012-03-28 22:18:20 +0000 | [diff] [blame] | 380 | } |
| 381 | |
ggaren@apple.com | 9a9a4b5 | 2013-04-18 19:32:17 +0000 | [diff] [blame] | 382 | MatchResult RegExp::match(VM& vm, const String& s, unsigned startOffset) |
barraclough@apple.com | f280e17 | 2012-03-28 22:18:20 +0000 | [diff] [blame] | 383 | { |
jlewis3@apple.com | 5d712de | 2018-01-29 17:47:30 +0000 | [diff] [blame] | 384 | return matchInline(vm, s, startOffset); |
barraclough@apple.com | f280e17 | 2012-03-28 22:18:20 +0000 | [diff] [blame] | 385 | } |
| 386 | |
fpizlo@apple.com | 280ef00 | 2016-04-05 22:13:16 +0000 | [diff] [blame] | 387 | bool RegExp::matchConcurrently(VM& vm, const String& s, unsigned startOffset, MatchResult& result) |
| 388 | { |
fpizlo@apple.com | 171d06f | 2016-11-15 23:21:50 +0000 | [diff] [blame] | 389 | ConcurrentJSLocker locker(m_lock); |
fpizlo@apple.com | 280ef00 | 2016-04-05 22:13:16 +0000 | [diff] [blame] | 390 | |
| 391 | if (!hasMatchOnlyCodeFor(s.is8Bit() ? Yarr::Char8 : Yarr::Char16)) |
| 392 | return false; |
| 393 | |
jlewis3@apple.com | 5d712de | 2018-01-29 17:47:30 +0000 | [diff] [blame] | 394 | result = match(vm, s, startOffset); |
fpizlo@apple.com | 280ef00 | 2016-04-05 22:13:16 +0000 | [diff] [blame] | 395 | return true; |
| 396 | } |
| 397 | |
ggaren@apple.com | 05627c5 | 2015-08-13 20:17:02 +0000 | [diff] [blame] | 398 | void RegExp::deleteCode() |
oliver@apple.com | 5652af7 | 2011-05-26 01:12:46 +0000 | [diff] [blame] | 399 | { |
fpizlo@apple.com | 171d06f | 2016-11-15 23:21:50 +0000 | [diff] [blame] | 400 | ConcurrentJSLocker locker(m_lock); |
fpizlo@apple.com | 280ef00 | 2016-04-05 22:13:16 +0000 | [diff] [blame] | 401 | |
barraclough@apple.com | f280e17 | 2012-03-28 22:18:20 +0000 | [diff] [blame] | 402 | if (!hasCode()) |
oliver@apple.com | 1db480d | 2011-06-28 01:32:01 +0000 | [diff] [blame] | 403 | return; |
| 404 | m_state = NotCompiled; |
paroga@webkit.org | c6c0e15 | 2012-03-29 16:11:36 +0000 | [diff] [blame] | 405 | #if ENABLE(YARR_JIT) |
barraclough@apple.com | f280e17 | 2012-03-28 22:18:20 +0000 | [diff] [blame] | 406 | m_regExpJITCode.clear(); |
paroga@webkit.org | c6c0e15 | 2012-03-29 16:11:36 +0000 | [diff] [blame] | 407 | #endif |
gyuyoung.kim@samsung.com | c6ae179 | 2014-11-28 00:51:32 +0000 | [diff] [blame] | 408 | m_regExpBytecode = nullptr; |
oliver@apple.com | 5652af7 | 2011-05-26 01:12:46 +0000 | [diff] [blame] | 409 | } |
msaboff@apple.com | fcb0c9f | 2011-01-07 00:17:23 +0000 | [diff] [blame] | 410 | |
| 411 | #if ENABLE(YARR_JIT_DEBUG) |
benjamin@webkit.org | cff06e4 | 2012-08-30 21:23:51 +0000 | [diff] [blame] | 412 | void RegExp::matchCompareWithInterpreter(const String& s, int startOffset, int* offsetVector, int jitResult) |
msaboff@apple.com | fcb0c9f | 2011-01-07 00:17:23 +0000 | [diff] [blame] | 413 | { |
| 414 | int offsetVectorSize = (m_numSubpatterns + 1) * 2; |
fpizlo@apple.com | bc16ddb | 2016-09-06 01:02:22 +0000 | [diff] [blame] | 415 | Vector<int> interpreterOvector; |
msaboff@apple.com | fcb0c9f | 2011-01-07 00:17:23 +0000 | [diff] [blame] | 416 | interpreterOvector.resize(offsetVectorSize); |
| 417 | int* interpreterOffsetVector = interpreterOvector.data(); |
| 418 | int interpreterResult = 0; |
| 419 | int differences = 0; |
| 420 | |
| 421 | // Initialize interpreterOffsetVector with the return value (index 0) and the |
| 422 | // first subpattern start indicies (even index values) set to -1. |
| 423 | // No need to init the subpattern end indicies. |
| 424 | for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++) |
| 425 | interpreterOffsetVector[j] = -1; |
| 426 | |
barraclough@apple.com | a269c163 | 2012-03-29 20:16:03 +0000 | [diff] [blame] | 427 | interpreterResult = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, interpreterOffsetVector); |
msaboff@apple.com | fcb0c9f | 2011-01-07 00:17:23 +0000 | [diff] [blame] | 428 | |
| 429 | if (jitResult != interpreterResult) |
| 430 | differences++; |
| 431 | |
| 432 | for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) |
| 433 | if ((offsetVector[j] != interpreterOffsetVector[j]) |
| 434 | || ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1]))) |
| 435 | differences++; |
| 436 | |
| 437 | if (differences) { |
fpizlo@apple.com | 01902c8 | 2012-11-22 04:23:36 +0000 | [diff] [blame] | 438 | dataLogF("RegExp Discrepency for /%s/\n string input ", pattern().utf8().data()); |
msaboff@apple.com | fcb0c9f | 2011-01-07 00:17:23 +0000 | [diff] [blame] | 439 | unsigned segmentLen = s.length() - static_cast<unsigned>(startOffset); |
| 440 | |
fpizlo@apple.com | 01902c8 | 2012-11-22 04:23:36 +0000 | [diff] [blame] | 441 | dataLogF((segmentLen < 150) ? "\"%s\"\n" : "\"%148s...\"\n", s.utf8().data() + startOffset); |
msaboff@apple.com | fcb0c9f | 2011-01-07 00:17:23 +0000 | [diff] [blame] | 442 | |
| 443 | if (jitResult != interpreterResult) { |
fpizlo@apple.com | 01902c8 | 2012-11-22 04:23:36 +0000 | [diff] [blame] | 444 | dataLogF(" JIT result = %d, blah interpreted result = %d\n", jitResult, interpreterResult); |
msaboff@apple.com | fcb0c9f | 2011-01-07 00:17:23 +0000 | [diff] [blame] | 445 | differences--; |
| 446 | } else { |
fpizlo@apple.com | 01902c8 | 2012-11-22 04:23:36 +0000 | [diff] [blame] | 447 | dataLogF(" Correct result = %d\n", jitResult); |
msaboff@apple.com | fcb0c9f | 2011-01-07 00:17:23 +0000 | [diff] [blame] | 448 | } |
| 449 | |
| 450 | if (differences) { |
| 451 | for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) { |
| 452 | if (offsetVector[j] != interpreterOffsetVector[j]) |
fpizlo@apple.com | 01902c8 | 2012-11-22 04:23:36 +0000 | [diff] [blame] | 453 | dataLogF(" JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j, offsetVector[j], j, interpreterOffsetVector[j]); |
msaboff@apple.com | fcb0c9f | 2011-01-07 00:17:23 +0000 | [diff] [blame] | 454 | if ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1])) |
fpizlo@apple.com | 01902c8 | 2012-11-22 04:23:36 +0000 | [diff] [blame] | 455 | dataLogF(" JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j+1, offsetVector[j+1], j+1, interpreterOffsetVector[j+1]); |
msaboff@apple.com | fcb0c9f | 2011-01-07 00:17:23 +0000 | [diff] [blame] | 456 | } |
| 457 | } |
| 458 | } |
| 459 | } |
| 460 | #endif |
| 461 | |
msaboff@apple.com | 02931f0 | 2010-09-10 02:10:37 +0000 | [diff] [blame] | 462 | #if ENABLE(REGEXP_TRACING) |
| 463 | void RegExp::printTraceData() |
| 464 | { |
| 465 | char formattedPattern[41]; |
| 466 | char rawPattern[41]; |
msaboff@apple.com | fcb0c9f | 2011-01-07 00:17:23 +0000 | [diff] [blame] | 467 | |
| 468 | strncpy(rawPattern, pattern().utf8().data(), 40); |
msaboff@apple.com | 02931f0 | 2010-09-10 02:10:37 +0000 | [diff] [blame] | 469 | rawPattern[40]= '\0'; |
msaboff@apple.com | fcb0c9f | 2011-01-07 00:17:23 +0000 | [diff] [blame] | 470 | |
msaboff@apple.com | 02931f0 | 2010-09-10 02:10:37 +0000 | [diff] [blame] | 471 | int pattLen = strlen(rawPattern); |
msaboff@apple.com | fcb0c9f | 2011-01-07 00:17:23 +0000 | [diff] [blame] | 472 | |
msaboff@apple.com | 02931f0 | 2010-09-10 02:10:37 +0000 | [diff] [blame] | 473 | snprintf(formattedPattern, 41, (pattLen <= 38) ? "/%.38s/" : "/%.36s...", rawPattern); |
| 474 | |
| 475 | #if ENABLE(YARR_JIT) |
barraclough@apple.com | f280e17 | 2012-03-28 22:18:20 +0000 | [diff] [blame] | 476 | Yarr::YarrCodeBlock& codeBlock = m_regExpJITCode; |
msaboff@apple.com | 02931f0 | 2010-09-10 02:10:37 +0000 | [diff] [blame] | 477 | |
cwzwarich@webkit.org | 317e781 | 2011-01-29 20:31:29 +0000 | [diff] [blame] | 478 | const size_t jitAddrSize = 20; |
msaboff@apple.com | 72ca76c | 2014-03-18 23:53:49 +0000 | [diff] [blame] | 479 | char jit8BitMatchOnlyAddr[jitAddrSize]; |
| 480 | char jit16BitMatchOnlyAddr[jitAddrSize]; |
| 481 | char jit8BitMatchAddr[jitAddrSize]; |
| 482 | char jit16BitMatchAddr[jitAddrSize]; |
| 483 | if (m_state == ByteCode) { |
| 484 | snprintf(jit8BitMatchOnlyAddr, jitAddrSize, "fallback "); |
| 485 | snprintf(jit16BitMatchOnlyAddr, jitAddrSize, "---- "); |
| 486 | snprintf(jit8BitMatchAddr, jitAddrSize, "fallback "); |
| 487 | snprintf(jit16BitMatchAddr, jitAddrSize, "---- "); |
| 488 | } else { |
| 489 | snprintf(jit8BitMatchOnlyAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.get8BitMatchOnlyAddr())); |
| 490 | snprintf(jit16BitMatchOnlyAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.get16BitMatchOnlyAddr())); |
| 491 | snprintf(jit8BitMatchAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.get8BitMatchAddr())); |
| 492 | snprintf(jit16BitMatchAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.get16BitMatchAddr())); |
| 493 | } |
msaboff@apple.com | 02931f0 | 2010-09-10 02:10:37 +0000 | [diff] [blame] | 494 | #else |
msaboff@apple.com | 72ca76c | 2014-03-18 23:53:49 +0000 | [diff] [blame] | 495 | const char* jit8BitMatchOnlyAddr = "JIT Off"; |
| 496 | const char* jit16BitMatchOnlyAddr = ""; |
| 497 | const char* jit8BitMatchAddr = "JIT Off"; |
| 498 | const char* jit16BitMatchAddr = ""; |
msaboff@apple.com | 02931f0 | 2010-09-10 02:10:37 +0000 | [diff] [blame] | 499 | #endif |
msaboff@apple.com | 72ca76c | 2014-03-18 23:53:49 +0000 | [diff] [blame] | 500 | unsigned averageMatchOnlyStringLen = (unsigned)(m_rtMatchOnlyTotalSubjectStringLen / m_rtMatchOnlyCallCount); |
| 501 | unsigned averageMatchStringLen = (unsigned)(m_rtMatchTotalSubjectStringLen / m_rtMatchCallCount); |
msaboff@apple.com | fcb0c9f | 2011-01-07 00:17:23 +0000 | [diff] [blame] | 502 | |
msaboff@apple.com | 72ca76c | 2014-03-18 23:53:49 +0000 | [diff] [blame] | 503 | printf("%-40.40s %16.16s %16.16s %10d %10d %10u\n", formattedPattern, jit8BitMatchOnlyAddr, jit16BitMatchOnlyAddr, m_rtMatchOnlyCallCount, m_rtMatchOnlyFoundCount, averageMatchOnlyStringLen); |
| 504 | printf(" %16.16s %16.16s %10d %10d %10u\n", jit8BitMatchAddr, jit16BitMatchAddr, m_rtMatchCallCount, m_rtMatchFoundCount, averageMatchStringLen); |
msaboff@apple.com | 02931f0 | 2010-09-10 02:10:37 +0000 | [diff] [blame] | 505 | } |
| 506 | #endif |
rniwa@webkit.org | 7d76d9b | 2011-05-26 05:19:25 +0000 | [diff] [blame] | 507 | |
cwzwarich@webkit.org | 3f782f6 | 2008-09-08 01:28:33 +0000 | [diff] [blame] | 508 | } // namespace JSC |