blob: 3dbf8b8660f2759ee4bf955ffd41d52f58a47277 [file] [log] [blame]
darinb9481ed2006-03-20 02:57:59 +00001/*
darin@apple.comfaced262009-01-12 07:44:27 +00002 * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved.
jmalonzo@webkit.org9654c2b2008-08-06 12:46:40 +00003 * Copyright (C) 2008 Collabora Ltd.
pvarga@webkit.org4ab82552011-02-09 12:00:56 +00004 * Copyright (C) 2011 Peter Varga (pvarga@webkit.org), University of Szeged
darinb9481ed2006-03-20 02:57:59 +00005 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
mjs@apple.com92047332014-03-15 04:08:27 +000015 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
darinb9481ed2006-03-20 02:57:59 +000016 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
mjs@apple.com92047332014-03-15 04:08:27 +000018 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
darinb9481ed2006-03-20 02:57:59 +000019 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
joepeck@webkit.org40a3ebb2014-01-24 06:07:24 +000025 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
darinb9481ed2006-03-20 02:57:59 +000026 */
27
28#include "config.h"
29#include "RegularExpression.h"
darinf4b05b22006-07-10 05:20:17 +000030
joepeck@webkit.org40a3ebb2014-01-24 06:07:24 +000031#include "Yarr.h"
ross.kirsling@sony.com3d654ba2019-03-11 06:20:53 +000032#include "YarrFlags.h"
fpizlo@apple.com280ef002016-04-05 22:13:16 +000033#include "YarrInterpreter.h"
joepeck@webkit.org40a3ebb2014-01-24 06:07:24 +000034#include <wtf/Assertions.h>
pvarga@webkit.org4ab82552011-02-09 12:00:56 +000035#include <wtf/BumpPointerAllocator.h>
darinb9481ed2006-03-20 02:57:59 +000036
joepeck@webkit.org40a3ebb2014-01-24 06:07:24 +000037namespace JSC { namespace Yarr {
darinf4b05b22006-07-10 05:20:17 +000038
eric@webkit.org302c99f2009-09-11 08:43:32 +000039class RegularExpression::Private : public RefCounted<RegularExpression::Private> {
darinb9481ed2006-03-20 02:57:59 +000040public:
cdumez@apple.comb5f8ebc2022-03-28 17:25:14 +000041 static Ref<Private> create(StringView pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode, UnicodeMode unicodeMode)
pvarga@webkit.org4ab82552011-02-09 12:00:56 +000042 {
utatane.tea@gmail.com2856b092018-03-07 17:44:06 +000043 return adoptRef(*new Private(pattern, caseSensitivity, multilineMode, unicodeMode));
pvarga@webkit.org4ab82552011-02-09 12:00:56 +000044 }
darinb9481ed2006-03-20 02:57:59 +000045
andersca@apple.com0f509b12008-06-04 21:12:16 +000046private:
cdumez@apple.comb5f8ebc2022-03-28 17:25:14 +000047 Private(StringView pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode, UnicodeMode unicodeMode)
utatane.tea@gmail.com2856b092018-03-07 17:44:06 +000048 : m_regExpByteCode(compile(pattern, caseSensitivity, multilineMode, unicodeMode))
pvarga@webkit.org4ab82552011-02-09 12:00:56 +000049 {
50 }
darin@apple.comfaced262009-01-12 07:44:27 +000051
cdumez@apple.comb5f8ebc2022-03-28 17:25:14 +000052 std::unique_ptr<JSC::Yarr::BytecodePattern> compile(StringView patternString, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode, UnicodeMode unicodeMode)
pvarga@webkit.org4ab82552011-02-09 12:00:56 +000053 {
ross.kirsling@sony.com3d654ba2019-03-11 06:20:53 +000054 OptionSet<JSC::Yarr::Flags> flags;
msaboff@apple.com3f194652016-03-09 20:11:46 +000055
56 if (caseSensitivity == TextCaseInsensitive)
ross.kirsling@sony.com3d654ba2019-03-11 06:20:53 +000057 flags.add(Flags::IgnoreCase);
msaboff@apple.com3f194652016-03-09 20:11:46 +000058
59 if (multilineMode == MultilineEnabled)
ross.kirsling@sony.com3d654ba2019-03-11 06:20:53 +000060 flags.add(Flags::Multiline);
msaboff@apple.com3f194652016-03-09 20:11:46 +000061
utatane.tea@gmail.com2856b092018-03-07 17:44:06 +000062 if (unicodeMode == UnicodeAwareMode)
ross.kirsling@sony.com3d654ba2019-03-11 06:20:53 +000063 flags.add(Flags::Unicode);
utatane.tea@gmail.com2856b092018-03-07 17:44:06 +000064
utatane.tea@gmail.comfa8d2792017-12-19 19:16:21 +000065 JSC::Yarr::YarrPattern pattern(patternString, flags, m_constructionErrorCode);
66 if (JSC::Yarr::hasError(m_constructionErrorCode)) {
cdumez@apple.com1392b8b2022-03-24 01:40:35 +000067 LOG_ERROR("RegularExpression: YARR compile failed with '%s'", JSC::Yarr::errorMessage(m_constructionErrorCode).characters());
aroben@apple.com71e211b2011-05-03 13:54:58 +000068 return nullptr;
pvarga@webkit.org4ab82552011-02-09 12:00:56 +000069 }
70
71 m_numSubpatterns = pattern.m_numSubpatterns;
72
ysuzuki@apple.com6137b8e2019-06-13 18:47:22 +000073 return JSC::Yarr::byteCompile(pattern, &m_regexAllocator, m_constructionErrorCode);
pvarga@webkit.org4ab82552011-02-09 12:00:56 +000074 }
75
utatane.tea@gmail.comfa8d2792017-12-19 19:16:21 +000076 JSC::Yarr::ErrorCode m_constructionErrorCode { Yarr::ErrorCode::NoError };
rmorisset@apple.com8e329b62019-04-15 20:39:11 +000077 BumpPointerAllocator m_regexAllocator;
78
79public:
80 int lastMatchLength { -1 };
81 unsigned m_numSubpatterns;
82 std::unique_ptr<JSC::Yarr::BytecodePattern> m_regExpByteCode;
darinb9481ed2006-03-20 02:57:59 +000083};
84
cdumez@apple.comb5f8ebc2022-03-28 17:25:14 +000085RegularExpression::RegularExpression(StringView pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode, UnicodeMode unicodeMode)
utatane.tea@gmail.com2856b092018-03-07 17:44:06 +000086 : d(Private::create(pattern, caseSensitivity, multilineMode, unicodeMode))
darinb9481ed2006-03-20 02:57:59 +000087{
88}
89
weinig@apple.com5ca5e8b2008-02-22 22:30:57 +000090RegularExpression::RegularExpression(const RegularExpression& re)
91 : d(re.d)
darinb9481ed2006-03-20 02:57:59 +000092{
93}
94
95RegularExpression::~RegularExpression()
96{
97}
98
weinig@apple.com5ca5e8b2008-02-22 22:30:57 +000099RegularExpression& RegularExpression::operator=(const RegularExpression& re)
darinb9481ed2006-03-20 02:57:59 +0000100{
darin@apple.comfaced262009-01-12 07:44:27 +0000101 d = re.d;
darinb9481ed2006-03-20 02:57:59 +0000102 return *this;
103}
104
cdumez@apple.comb5f8ebc2022-03-28 17:25:14 +0000105int RegularExpression::match(StringView str, int startFrom, int* matchLength) const
darinb9481ed2006-03-20 02:57:59 +0000106{
pvarga@webkit.org4ab82552011-02-09 12:00:56 +0000107 if (!d->m_regExpByteCode)
darin@apple.comfaced262009-01-12 07:44:27 +0000108 return -1;
109
jmalonzo@webkit.org9654c2b2008-08-06 12:46:40 +0000110 if (str.isNull())
111 return -1;
112
pvarga@webkit.org4ab82552011-02-09 12:00:56 +0000113 int offsetVectorSize = (d->m_numSubpatterns + 1) * 2;
msaboff@apple.com540e7d92012-02-24 23:55:01 +0000114 unsigned* offsetVector;
115 Vector<unsigned, 32> nonReturnedOvector;
pvarga@webkit.org4ab82552011-02-09 12:00:56 +0000116
cdumez@apple.com85709512017-07-20 21:06:12 +0000117 nonReturnedOvector.grow(offsetVectorSize);
pvarga@webkit.org4ab82552011-02-09 12:00:56 +0000118 offsetVector = nonReturnedOvector.data();
119
120 ASSERT(offsetVector);
121 for (unsigned j = 0, i = 0; i < d->m_numSubpatterns + 1; j += 2, i++)
msaboff@apple.comdd6b6fe2012-02-25 01:11:14 +0000122 offsetVector[j] = JSC::Yarr::offsetNoMatch;
pvarga@webkit.org4ab82552011-02-09 12:00:56 +0000123
msaboff@apple.com540e7d92012-02-24 23:55:01 +0000124 unsigned result;
125 if (str.length() <= INT_MAX)
benjamin@webkit.orgcff06e42012-08-30 21:23:51 +0000126 result = JSC::Yarr::interpret(d->m_regExpByteCode.get(), str, startFrom, offsetVector);
msaboff@apple.com540e7d92012-02-24 23:55:01 +0000127 else {
joepeck@webkit.org40a3ebb2014-01-24 06:07:24 +0000128 // This code can't handle unsigned offsets. Limit our processing to strings with offsets that
msaboff@apple.com540e7d92012-02-24 23:55:01 +0000129 // can be represented as ints.
130 result = JSC::Yarr::offsetNoMatch;
131 }
pvarga@webkit.org4ab82552011-02-09 12:00:56 +0000132
msaboff@apple.com540e7d92012-02-24 23:55:01 +0000133 if (result == JSC::Yarr::offsetNoMatch) {
darinb9481ed2006-03-20 02:57:59 +0000134 d->lastMatchLength = -1;
darinb9481ed2006-03-20 02:57:59 +0000135 return -1;
136 }
darin@apple.comfaced262009-01-12 07:44:27 +0000137
pvarga@webkit.org4ab82552011-02-09 12:00:56 +0000138 // 1 means 1 match; 0 means more than one match. First match is recorded in offsetVector.
139 d->lastMatchLength = offsetVector[1] - offsetVector[0];
weinig@apple.com5ca5e8b2008-02-22 22:30:57 +0000140 if (matchLength)
darinb9481ed2006-03-20 02:57:59 +0000141 *matchLength = d->lastMatchLength;
pvarga@webkit.org4ab82552011-02-09 12:00:56 +0000142 return offsetVector[0];
darinb9481ed2006-03-20 02:57:59 +0000143}
144
cdumez@apple.comb5f8ebc2022-03-28 17:25:14 +0000145int RegularExpression::searchRev(StringView str) const
darinb9481ed2006-03-20 02:57:59 +0000146{
darin@apple.comfaced262009-01-12 07:44:27 +0000147 // FIXME: This could be faster if it actually searched backwards.
148 // Instead, it just searches forwards, multiple times until it finds the last match.
149
darinb9481ed2006-03-20 02:57:59 +0000150 int start = 0;
151 int pos;
152 int lastPos = -1;
153 int lastMatchLength = -1;
154 do {
155 int matchLength;
156 pos = match(str, start, &matchLength);
157 if (pos >= 0) {
weinig@apple.com5ca5e8b2008-02-22 22:30:57 +0000158 if (pos + matchLength > lastPos + lastMatchLength) {
darinb9481ed2006-03-20 02:57:59 +0000159 // replace last match if this one is later and not a subset of the last match
160 lastPos = pos;
161 lastMatchLength = matchLength;
162 }
163 start = pos + 1;
164 }
165 } while (pos != -1);
darinb9481ed2006-03-20 02:57:59 +0000166 d->lastMatchLength = lastMatchLength;
167 return lastPos;
168}
169
darinb9481ed2006-03-20 02:57:59 +0000170int RegularExpression::matchedLength() const
171{
172 return d->lastMatchLength;
173}
174
cdumez@apple.comb5f8ebc2022-03-28 17:25:14 +0000175void replace(String& string, const RegularExpression& target, StringView replacement)
weinig@apple.com5ca5e8b2008-02-22 22:30:57 +0000176{
177 int index = 0;
178 while (index < static_cast<int>(string.length())) {
179 int matchLength;
180 index = target.match(string, index, &matchLength);
181 if (index < 0)
182 break;
cdumez@apple.com71b40b02022-04-25 15:25:06 +0000183 string = makeStringByReplacing(string, index, matchLength, replacement);
weinig@apple.com5ca5e8b2008-02-22 22:30:57 +0000184 index += replacement.length();
185 if (!matchLength)
joepeck@webkit.org40a3ebb2014-01-24 06:07:24 +0000186 break; // Avoid infinite loop on 0-length matches, e.g. [a-z]*
weinig@apple.com5ca5e8b2008-02-22 22:30:57 +0000187 }
darinf4b05b22006-07-10 05:20:17 +0000188}
weinig@apple.com5ca5e8b2008-02-22 22:30:57 +0000189
tkent@chromium.orgf71b54f2013-04-25 22:51:08 +0000190bool RegularExpression::isValid() const
191{
gyuyoung.kim@samsung.comc6ae1792014-11-28 00:51:32 +0000192 return d->m_regExpByteCode.get();
tkent@chromium.orgf71b54f2013-04-25 22:51:08 +0000193}
194
joepeck@webkit.org40a3ebb2014-01-24 06:07:24 +0000195} } // namespace JSC::Yarr