blob: 1dcede6a4489823492ce6264eab753df2cfccc6e [file] [log] [blame]
darinb9481ed2006-03-20 02:57:59 +00001/*
darin@apple.comfaced262009-01-12 07:44:27 +00002 * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved.
jmalonzo@webkit.org9654c2b2008-08-06 12:46:40 +00003 * Copyright (C) 2008 Collabora Ltd.
pvarga@webkit.org4ab82552011-02-09 12:00:56 +00004 * Copyright (C) 2011 Peter Varga (pvarga@webkit.org), University of Szeged
darinb9481ed2006-03-20 02:57:59 +00005 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
mjs@apple.com92047332014-03-15 04:08:27 +000015 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
darinb9481ed2006-03-20 02:57:59 +000016 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
mjs@apple.com92047332014-03-15 04:08:27 +000018 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
darinb9481ed2006-03-20 02:57:59 +000019 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
joepeck@webkit.org40a3ebb2014-01-24 06:07:24 +000025 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
darinb9481ed2006-03-20 02:57:59 +000026 */
27
28#include "config.h"
29#include "RegularExpression.h"
darinf4b05b22006-07-10 05:20:17 +000030
joepeck@webkit.org40a3ebb2014-01-24 06:07:24 +000031#include "Yarr.h"
fpizlo@apple.com280ef002016-04-05 22:13:16 +000032#include "YarrInterpreter.h"
joepeck@webkit.org40a3ebb2014-01-24 06:07:24 +000033#include <wtf/Assertions.h>
pvarga@webkit.org4ab82552011-02-09 12:00:56 +000034#include <wtf/BumpPointerAllocator.h>
darinb9481ed2006-03-20 02:57:59 +000035
joepeck@webkit.org40a3ebb2014-01-24 06:07:24 +000036namespace JSC { namespace Yarr {
darinf4b05b22006-07-10 05:20:17 +000037
eric@webkit.org302c99f2009-09-11 08:43:32 +000038class RegularExpression::Private : public RefCounted<RegularExpression::Private> {
darinb9481ed2006-03-20 02:57:59 +000039public:
utatane.tea@gmail.com2856b092018-03-07 17:44:06 +000040 static Ref<Private> create(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode, UnicodeMode unicodeMode)
pvarga@webkit.org4ab82552011-02-09 12:00:56 +000041 {
utatane.tea@gmail.com2856b092018-03-07 17:44:06 +000042 return adoptRef(*new Private(pattern, caseSensitivity, multilineMode, unicodeMode));
pvarga@webkit.org4ab82552011-02-09 12:00:56 +000043 }
darinb9481ed2006-03-20 02:57:59 +000044
utatane.tea@gmail.comfa8d2792017-12-19 19:16:21 +000045 int lastMatchLength { -1 };
pvarga@webkit.org4ab82552011-02-09 12:00:56 +000046
47 unsigned m_numSubpatterns;
gyuyoung.kim@samsung.comc6ae1792014-11-28 00:51:32 +000048 std::unique_ptr<JSC::Yarr::BytecodePattern> m_regExpByteCode;
darinb9481ed2006-03-20 02:57:59 +000049
andersca@apple.com0f509b12008-06-04 21:12:16 +000050private:
utatane.tea@gmail.com2856b092018-03-07 17:44:06 +000051 Private(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode, UnicodeMode unicodeMode)
52 : m_regExpByteCode(compile(pattern, caseSensitivity, multilineMode, unicodeMode))
pvarga@webkit.org4ab82552011-02-09 12:00:56 +000053 {
54 }
darin@apple.comfaced262009-01-12 07:44:27 +000055
utatane.tea@gmail.com2856b092018-03-07 17:44:06 +000056 std::unique_ptr<JSC::Yarr::BytecodePattern> compile(const String& patternString, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode, UnicodeMode unicodeMode)
pvarga@webkit.org4ab82552011-02-09 12:00:56 +000057 {
msaboff@apple.com3f194652016-03-09 20:11:46 +000058 RegExpFlags flags = NoFlags;
59
60 if (caseSensitivity == TextCaseInsensitive)
61 flags = static_cast<RegExpFlags>(flags | FlagIgnoreCase);
62
63 if (multilineMode == MultilineEnabled)
64 flags = static_cast<RegExpFlags>(flags | FlagMultiline);
65
utatane.tea@gmail.com2856b092018-03-07 17:44:06 +000066 if (unicodeMode == UnicodeAwareMode)
67 flags = static_cast<RegExpFlags>(flags | FlagUnicode);
68
utatane.tea@gmail.comfa8d2792017-12-19 19:16:21 +000069 JSC::Yarr::YarrPattern pattern(patternString, flags, m_constructionErrorCode);
70 if (JSC::Yarr::hasError(m_constructionErrorCode)) {
71 LOG_ERROR("RegularExpression: YARR compile failed with '%s'", JSC::Yarr::errorMessage(m_constructionErrorCode));
aroben@apple.com71e211b2011-05-03 13:54:58 +000072 return nullptr;
pvarga@webkit.org4ab82552011-02-09 12:00:56 +000073 }
74
75 m_numSubpatterns = pattern.m_numSubpatterns;
76
77 return JSC::Yarr::byteCompile(pattern, &m_regexAllocator);
78 }
79
80 BumpPointerAllocator m_regexAllocator;
utatane.tea@gmail.comfa8d2792017-12-19 19:16:21 +000081 JSC::Yarr::ErrorCode m_constructionErrorCode { Yarr::ErrorCode::NoError };
darinb9481ed2006-03-20 02:57:59 +000082};
83
utatane.tea@gmail.com2856b092018-03-07 17:44:06 +000084RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode, UnicodeMode unicodeMode)
85 : d(Private::create(pattern, caseSensitivity, multilineMode, unicodeMode))
darinb9481ed2006-03-20 02:57:59 +000086{
87}
88
weinig@apple.com5ca5e8b2008-02-22 22:30:57 +000089RegularExpression::RegularExpression(const RegularExpression& re)
90 : d(re.d)
darinb9481ed2006-03-20 02:57:59 +000091{
92}
93
94RegularExpression::~RegularExpression()
95{
96}
97
weinig@apple.com5ca5e8b2008-02-22 22:30:57 +000098RegularExpression& RegularExpression::operator=(const RegularExpression& re)
darinb9481ed2006-03-20 02:57:59 +000099{
darin@apple.comfaced262009-01-12 07:44:27 +0000100 d = re.d;
darinb9481ed2006-03-20 02:57:59 +0000101 return *this;
102}
103
weinig@apple.com5ca5e8b2008-02-22 22:30:57 +0000104int RegularExpression::match(const String& str, int startFrom, int* matchLength) const
darinb9481ed2006-03-20 02:57:59 +0000105{
pvarga@webkit.org4ab82552011-02-09 12:00:56 +0000106 if (!d->m_regExpByteCode)
darin@apple.comfaced262009-01-12 07:44:27 +0000107 return -1;
108
jmalonzo@webkit.org9654c2b2008-08-06 12:46:40 +0000109 if (str.isNull())
110 return -1;
111
pvarga@webkit.org4ab82552011-02-09 12:00:56 +0000112 int offsetVectorSize = (d->m_numSubpatterns + 1) * 2;
msaboff@apple.com540e7d92012-02-24 23:55:01 +0000113 unsigned* offsetVector;
114 Vector<unsigned, 32> nonReturnedOvector;
pvarga@webkit.org4ab82552011-02-09 12:00:56 +0000115
cdumez@apple.com85709512017-07-20 21:06:12 +0000116 nonReturnedOvector.grow(offsetVectorSize);
pvarga@webkit.org4ab82552011-02-09 12:00:56 +0000117 offsetVector = nonReturnedOvector.data();
118
119 ASSERT(offsetVector);
120 for (unsigned j = 0, i = 0; i < d->m_numSubpatterns + 1; j += 2, i++)
msaboff@apple.comdd6b6fe2012-02-25 01:11:14 +0000121 offsetVector[j] = JSC::Yarr::offsetNoMatch;
pvarga@webkit.org4ab82552011-02-09 12:00:56 +0000122
msaboff@apple.com540e7d92012-02-24 23:55:01 +0000123 unsigned result;
124 if (str.length() <= INT_MAX)
benjamin@webkit.orgcff06e42012-08-30 21:23:51 +0000125 result = JSC::Yarr::interpret(d->m_regExpByteCode.get(), str, startFrom, offsetVector);
msaboff@apple.com540e7d92012-02-24 23:55:01 +0000126 else {
joepeck@webkit.org40a3ebb2014-01-24 06:07:24 +0000127 // This code can't handle unsigned offsets. Limit our processing to strings with offsets that
msaboff@apple.com540e7d92012-02-24 23:55:01 +0000128 // can be represented as ints.
129 result = JSC::Yarr::offsetNoMatch;
130 }
pvarga@webkit.org4ab82552011-02-09 12:00:56 +0000131
msaboff@apple.com540e7d92012-02-24 23:55:01 +0000132 if (result == JSC::Yarr::offsetNoMatch) {
darinb9481ed2006-03-20 02:57:59 +0000133 d->lastMatchLength = -1;
darinb9481ed2006-03-20 02:57:59 +0000134 return -1;
135 }
darin@apple.comfaced262009-01-12 07:44:27 +0000136
pvarga@webkit.org4ab82552011-02-09 12:00:56 +0000137 // 1 means 1 match; 0 means more than one match. First match is recorded in offsetVector.
138 d->lastMatchLength = offsetVector[1] - offsetVector[0];
weinig@apple.com5ca5e8b2008-02-22 22:30:57 +0000139 if (matchLength)
darinb9481ed2006-03-20 02:57:59 +0000140 *matchLength = d->lastMatchLength;
pvarga@webkit.org4ab82552011-02-09 12:00:56 +0000141 return offsetVector[0];
darinb9481ed2006-03-20 02:57:59 +0000142}
143
weinig@apple.com5ca5e8b2008-02-22 22:30:57 +0000144int RegularExpression::searchRev(const String& str) const
darinb9481ed2006-03-20 02:57:59 +0000145{
darin@apple.comfaced262009-01-12 07:44:27 +0000146 // FIXME: This could be faster if it actually searched backwards.
147 // Instead, it just searches forwards, multiple times until it finds the last match.
148
darinb9481ed2006-03-20 02:57:59 +0000149 int start = 0;
150 int pos;
151 int lastPos = -1;
152 int lastMatchLength = -1;
153 do {
154 int matchLength;
155 pos = match(str, start, &matchLength);
156 if (pos >= 0) {
weinig@apple.com5ca5e8b2008-02-22 22:30:57 +0000157 if (pos + matchLength > lastPos + lastMatchLength) {
darinb9481ed2006-03-20 02:57:59 +0000158 // replace last match if this one is later and not a subset of the last match
159 lastPos = pos;
160 lastMatchLength = matchLength;
161 }
162 start = pos + 1;
163 }
164 } while (pos != -1);
darinb9481ed2006-03-20 02:57:59 +0000165 d->lastMatchLength = lastMatchLength;
166 return lastPos;
167}
168
darinb9481ed2006-03-20 02:57:59 +0000169int RegularExpression::matchedLength() const
170{
171 return d->lastMatchLength;
172}
173
weinig@apple.com5ca5e8b2008-02-22 22:30:57 +0000174void replace(String& string, const RegularExpression& target, const String& replacement)
175{
176 int index = 0;
177 while (index < static_cast<int>(string.length())) {
178 int matchLength;
179 index = target.match(string, index, &matchLength);
180 if (index < 0)
181 break;
182 string.replace(index, matchLength, replacement);
183 index += replacement.length();
184 if (!matchLength)
joepeck@webkit.org40a3ebb2014-01-24 06:07:24 +0000185 break; // Avoid infinite loop on 0-length matches, e.g. [a-z]*
weinig@apple.com5ca5e8b2008-02-22 22:30:57 +0000186 }
darinf4b05b22006-07-10 05:20:17 +0000187}
weinig@apple.com5ca5e8b2008-02-22 22:30:57 +0000188
tkent@chromium.orgf71b54f2013-04-25 22:51:08 +0000189bool RegularExpression::isValid() const
190{
gyuyoung.kim@samsung.comc6ae1792014-11-28 00:51:32 +0000191 return d->m_regExpByteCode.get();
tkent@chromium.orgf71b54f2013-04-25 22:51:08 +0000192}
193
joepeck@webkit.org40a3ebb2014-01-24 06:07:24 +0000194} } // namespace JSC::Yarr