[JSC] SamplingProfiler should recognize RegExp execution
https://bugs.webkit.org/show_bug.cgi?id=201702

Reviewed by Saam Barati.

JSTests:

* stress/sampling-profiler-regexp.js: Added.
(platformSupportsSamplingProfiler.getText):
(platformSupportsSamplingProfiler.test):
(platformSupportsSamplingProfiler.baz):
(platformSupportsSamplingProfiler):
* stress/sampling-profiler/samplingProfiler.js: Extend samplingProfiler to show better error information when VERBOSE = true.
(doesTreeHaveStackTrace):

Source/JavaScriptCore:

This patch extends SamplingProfiler to recognize RegExp execution. We record
executing RegExp in VM via MatchingContextHolder so that SamplingProfiler can detect it.
We use MatchingContextHolder even if the RegExp is interpreter mode so that we can still
catch non-JIT RegExp in SamplingProfiler.

The example output is the following.

    Sampling rate: 1000.000000 microseconds. Total samples: 1830
    Top functions as <numSamples  'functionName#hash:sourceID'>
       466    'Exec#<nil>:4'
       225    '/(^|[^\\])\"\\\/Qngr\((-?[0-9]+)\)\\\/\"/g#<nil>:-1'
       173    'runBlock1#<nil>:4'
        88    '/NQ_VQ/g#<nil>:-1'
        83    '/d2/g#<nil>:-1'
        78    '/d1/g#<nil>:-1'
        70    '/\s?;\s?/#<nil>:-1'
        68    'replace#<nil>:3'
        50    '/(((\w+):\/\/)([^\/:]*)(:(\d+))?)?([^#?]*)(\?([^#]*))?(#(.*))?/#<nil>:-1'
        49    'runBlock0#<nil>:4'
        46    '#<nil>:-1'
        24    '/^\s*|\s*$/g#<nil>:-1'

    Sampling rate: 1000.000000 microseconds. Total samples: 1830

    Tier breakdown:
    -----------------------------------
    LLInt:                    13  (0.710383%)
    Baseline:                 54  (2.950820%)
    DFG:                     187  (10.218579%)
    FTL:                     612  (33.442623%)
    js builtin:               73  (3.989071%)
    Wasm:                      0  (0.000000%)
    Host:                      0  (0.000000%)
    RegExp:                  907  (49.562842%)
    C/C++:                     0  (0.000000%)
    Unknown Executable:       57  (3.114754%)

    Hottest bytecodes as <numSamples   'functionName#hash:JITType:bytecodeIndex'>
       378    'Exec#<nil>:FTL:bc#27'
       225    '/(^|[^\\])\"\\\/Qngr\((-?[0-9]+)\)\\\/\"/g#<nil>:RegExp:<nil>'
        88    '/NQ_VQ/g#<nil>:RegExp:<nil>'
        83    '/d2/g#<nil>:RegExp:<nil>'
        78    '/d1/g#<nil>:RegExp:<nil>'
        70    '/\s?;\s?/#<nil>:RegExp:<nil>'
        62    'replace#<nil>:FTL:bc#63'
        53    'Exec#<nil>:DFG:bc#27'
        50    '/(((\w+):\/\/)([^\/:]*)(:(\d+))?)?([^#?]*)(\?([^#]*))?(#(.*))?/#<nil>:RegExp:<nil>'
        46    '#<nil>:None:<nil>'
        42    'runBlock1#<nil>:FTL:bc#1795'
        29    'runBlock1#<nil>:FTL:bc#1849'
        29    'runBlock1#<nil>:FTL:bc#1741'
        24    '/^\s*|\s*$/g#<nil>:RegExp:<nil>'
        17    'runBlock1#<nil>:DFG:bc#1741'
        17    'runBlock1#<nil>:DFG:bc#1795'
        16    '/\b\w+\b/g#<nil>:RegExp:<nil>'
        14    '/TNQP=([^;]*)/i#<nil>:RegExp:<nil>'
        13    '/%\w?$/#<nil>:RegExp:<nil>'
        12    '/^ba/#<nil>:RegExp:<nil>'
        11    '/^\s*(\S*(\s+\S+)*)\s*$/#<nil>:RegExp:<nil>'
        10    'runBlock0#<nil>:FTL:bc#400'
        10    'runBlock1#<nil>:DFG:bc#1849'
        10    'Exec#<nil>:DFG:bc#16'
         9    '/(?:ZFVR.(\d+\.\d+))|(?:(?:Sversbk|TenaCnenqvfb|Vprjrnfry).(\d+\.\d+))|(?:Bcren.(\d+\.\d+))|(?:NccyrJroXvg.(\d+(?:\.\d+)?))/#<nil>:RegExp:<nil>'
         8    '/##yv18##/gi#<nil>:RegExp:<nil>'
         8    '/([a-zA-Z]|\s)+/#<nil>:RegExp:<nil>'
         7    '/JroXvg\/(\S+)/#<nil>:RegExp:<nil>'
         7    'runBlock0#<nil>:FTL:bc#2671'
         7    '/^([#.]?)((?:[\w\u0128-\uffff*_-]|\\.)*)/#<nil>:RegExp:<nil>'
         6    '/##oe##/gi#<nil>:RegExp:<nil>'
         6    'runBlock1#<nil>:FTL:bc#124'
         6    '/[+, ]/#<nil>:RegExp:<nil>'
         6    'runBlock1#<nil>:FTL:bc#1903'
         6    'replace#<nil>:DFG:bc#63'
         5    'runBlock0#<nil>:FTL:bc#3378'
         5    '/'/g#<nil>:RegExp:<nil>'
         5    '/##yv20##/gi#<nil>:RegExp:<nil>'
         4    '/\?[\w\W]*(sevraqvq|punaaryvq|tebhcvq)=([^\&\?#]*)/i#<nil>:RegExp:<nil>'
         4    'load#<nil>:None:<nil>'

* CMakeLists.txt:
* JavaScriptCore.xcodeproj/project.pbxproj:
* runtime/RegExp.cpp:
(JSC::RegExp::matchCompareWithInterpreter):
(JSC::RegExp::dumpToStream):
(JSC::appendLineTerminatorEscape<LChar>):
(JSC::appendLineTerminatorEscape<UChar>):
(JSC::escapePattern):
(JSC::RegExp::escapedPattern const): Moved from RegExpPrototype.cpp to implement RegExp::toSourceString and RegExp::escapedPattern.
(JSC::RegExp::toSourceString const):
(JSC::regexpToSourceString): Deleted.
* runtime/RegExp.h:
* runtime/RegExpInlines.h:
(JSC::RegExp::matchInline):
* runtime/RegExpPrototype.cpp:
(JSC::JSC_DEFINE_HOST_FUNCTION):
(JSC::appendLineTerminatorEscape<LChar>): Deleted.
(JSC::appendLineTerminatorEscape<UChar>): Deleted.
(JSC::regExpProtoGetterSourceInternal): Deleted.
* runtime/SamplingProfiler.cpp:
(JSC::SamplingProfiler::takeSample):
(JSC::SamplingProfiler::processUnverifiedStackTraces):
(JSC::SamplingProfiler::StackFrame::nameFromCallee):
(JSC::SamplingProfiler::StackFrame::displayName):
(JSC::SamplingProfiler::StackFrame::displayNameForJSONTests):
(JSC::SamplingProfiler::StackFrame::functionStartLine):
(JSC::SamplingProfiler::StackFrame::functionStartColumn):
(JSC::SamplingProfiler::StackFrame::sourceID):
(JSC::SamplingProfiler::StackFrame::url):
(JSC::SamplingProfiler::reportTopBytecodes):
(WTF::printInternal):
* runtime/SamplingProfiler.h:
* runtime/VM.h:
* yarr/YarrJIT.cpp:
(JSC::Yarr::MatchingContextHolder::MatchingContextHolder): Deleted.
(JSC::Yarr::MatchingContextHolder::~MatchingContextHolder): Deleted.
* yarr/YarrJIT.h:
(JSC::Yarr::MatchingContextHolder::offsetOfStackLimit): Deleted.
(JSC::Yarr::MatchingContextHolder::offsetOfPatternContextBuffer): Deleted.
(JSC::Yarr::MatchingContextHolder::offsetOfPatternContextBufferSize): Deleted.
* yarr/YarrMatchingContextHolder.h: Added.
(JSC::Yarr::MatchingContextHolder::offsetOfStackLimit):
(JSC::Yarr::MatchingContextHolder::offsetOfPatternContextBuffer):
(JSC::Yarr::MatchingContextHolder::offsetOfPatternContextBufferSize):
(JSC::Yarr::MatchingContextHolder::MatchingContextHolder):
(JSC::Yarr::MatchingContextHolder::~MatchingContextHolder):

git-svn-id: http://svn.webkit.org/repository/webkit/trunk@279976 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/Source/JavaScriptCore/runtime/SamplingProfiler.cpp b/Source/JavaScriptCore/runtime/SamplingProfiler.cpp
index 73db61c..b27e8e3 100644
--- a/Source/JavaScriptCore/runtime/SamplingProfiler.cpp
+++ b/Source/JavaScriptCore/runtime/SamplingProfiler.cpp
@@ -366,6 +366,7 @@
             CallFrame* callFrame;
             void* machinePC;
             bool topFrameIsLLInt = false;
+            RegExp* regExp = nullptr;
             void* llintPC;
             {
                 PlatformRegisters registers;
@@ -385,16 +386,16 @@
             // FIXME: Lets have a way of detecting when we're parsing code.
             // https://bugs.webkit.org/show_bug.cgi?id=152761
             if (ExecutableAllocator::singleton().isValidExecutableMemory(executableAllocatorLocker, machinePC)) {
-                if (m_vm.isExecutingInRegExpJIT) {
-                    // FIXME: We're executing a regexp. Lets gather more intersting data.
-                    // https://bugs.webkit.org/show_bug.cgi?id=152729
+                regExp = m_vm.m_executingRegExp;
+                if (regExp)
                     callFrame = m_vm.topCallFrame; // We need to do this or else we'd fail our backtrace validation b/c this isn't a JS frame.
-                }
             } else if (LLInt::isLLIntPC(machinePC)) {
                 topFrameIsLLInt = true;
                 // We're okay to take a normal stack trace when the PC
                 // is in LLInt code.
             } else {
+                // RegExp evaluation is leaf. So if RegExp evaluation exists, we can say it is RegExp evaluation is the top user-visible frame.
+                regExp = m_vm.m_executingRegExp;
                 // We resort to topCallFrame to see if we can get anything
                 // useful. We usually get here when we're executing C code.
                 callFrame = m_vm.topCallFrame;
@@ -434,7 +435,7 @@
                     stackTrace.uncheckedAppend(frame);
                 }
 
-                m_unprocessedStackTraces.append(UnprocessedStackTrace { nowTime, machinePC, topFrameIsLLInt, llintPC, WTFMove(stackTrace) });
+                m_unprocessedStackTraces.append(UnprocessedStackTrace { nowTime, machinePC, topFrameIsLLInt, llintPC, regExp, WTFMove(stackTrace) });
 
                 if (didRunOutOfVectorSpace)
                     m_currentFrames.grow(m_currentFrames.size() * 1.25);
@@ -592,7 +593,14 @@
         // Prepend the top-most inlined frame if needed and gather
         // location information about where the top frame is executing.
         size_t startIndex = 0;
-        if (unprocessedStackTrace.frames.size() && !!unprocessedStackTrace.frames[0].verifiedCodeBlock) {
+        if (unprocessedStackTrace.regExp) {
+            // If the stack-trace is annotated with RegExp, the top-frame must be RegExp since RegExp evaluation is leaf function.
+            appendEmptyFrame();
+            stackTrace.frames.last().regExp = unprocessedStackTrace.regExp;
+            stackTrace.frames.last().frameType = FrameType::RegExp;
+            stackTrace.frames.last().semanticLocation.isRegExp = true;
+            m_liveCellPointers.add(unprocessedStackTrace.regExp);
+        } else if (!unprocessedStackTrace.frames.isEmpty() && !!unprocessedStackTrace.frames[0].verifiedCodeBlock) {
             CodeBlock* topCodeBlock = unprocessedStackTrace.frames[0].verifiedCodeBlock;
             if (unprocessedStackTrace.topFrameIsLLInt) {
                 // We reuse LLInt CodeBlocks for the baseline JIT, so we need to check for both jit types.
@@ -735,8 +743,11 @@
 
 String SamplingProfiler::StackFrame::nameFromCallee(VM& vm)
 {
-    if (!callee)
+    if (!callee) {
+        if (regExp)
+            return regExp->toSourceString();
         return String();
+    }
 
     DeferTermination deferScope(vm);
     auto scope = DECLARE_CATCH_SCOPE(vm);
@@ -782,12 +793,16 @@
         }
 #endif
         return "(unknown C PC)"_s;
+
     case FrameType::Unknown:
         return "(unknown)"_s;
 
     case FrameType::Host:
         return "(host)"_s;
 
+    case FrameType::RegExp:
+        return "(regexp)"_s;
+
     case FrameType::Wasm:
 #if ENABLE(WEBASSEMBLY)
         if (wasmIndexOrName)
@@ -826,6 +841,9 @@
     case FrameType::C:
         return "(unknown)"_s;
 
+    case FrameType::RegExp:
+        return "(regexp)"_s;
+
     case FrameType::Host:
         return "(host)"_s;
 
@@ -866,6 +884,7 @@
     switch (frameType) {
     case FrameType::Unknown:
     case FrameType::Host:
+    case FrameType::RegExp:
     case FrameType::C:
     case FrameType::Wasm:
         return -1;
@@ -884,6 +903,7 @@
     switch (frameType) {
     case FrameType::Unknown:
     case FrameType::Host:
+    case FrameType::RegExp:
     case FrameType::C:
     case FrameType::Wasm:
         return std::numeric_limits<unsigned>::max();
@@ -903,6 +923,7 @@
     switch (frameType) {
     case FrameType::Unknown:
     case FrameType::Host:
+    case FrameType::RegExp:
     case FrameType::C:
     case FrameType::Wasm:
         return internalSourceID;
@@ -922,6 +943,7 @@
     switch (frameType) {
     case FrameType::Unknown:
     case FrameType::Host:
+    case FrameType::RegExp:
     case FrameType::C:
     case FrameType::Wasm:
         return emptyString();
@@ -1115,6 +1137,7 @@
     String builtin = "js builtin"_s;
     String wasm = "Wasm"_s;
     String host = "Host"_s;
+    String regexp = "RegExp"_s;
     String cpp = "C/C++"_s;
     String unknownFrame = "Unknown Frame"_s;
     String unknownExecutable = "Unknown Executable"_s;
@@ -1127,6 +1150,7 @@
         func(builtin);
         func(wasm);
         func(host);
+        func(regexp);
         func(cpp);
         func(unknownFrame);
         func(unknownExecutable);
@@ -1143,21 +1167,23 @@
             if (location.hasBytecodeIndex())
                 bytecodeIndex = toString(location.bytecodeIndex);
             else
-                bytecodeIndex = "<nil>";
+                bytecodeIndex = "<nil>"_s;
 
             if (location.hasCodeBlockHash()) {
                 StringPrintStream stream;
                 location.codeBlockHash.dump(stream);
                 codeBlockHash = stream.toString();
             } else
-                codeBlockHash = "<nil>";
+                codeBlockHash = "<nil>"_s;
 
             if (wasmCompilationMode)
                 jitType = Wasm::makeString(wasmCompilationMode.value());
+            else if (location.isRegExp)
+                jitType = "RegExp"_s;
             else
                 jitType = JITCode::typeName(location.jitType);
 
-            return makeString("#", codeBlockHash, ":", jitType, ":", bytecodeIndex);
+            return makeString('#', codeBlockHash, ':', jitType, ':', bytecodeIndex);
         };
 
         StackFrame& frame = stackTrace.frames.first();
@@ -1207,6 +1233,9 @@
             case SamplingProfiler::FrameType::Host:
                 tierName = host;
                 break;
+            case SamplingProfiler::FrameType::RegExp:
+                tierName = regexp;
+                break;
             case SamplingProfiler::FrameType::C:
                 tierName = cpp;
                 break;
@@ -1291,6 +1320,9 @@
     case SamplingProfiler::FrameType::Host:
         out.print("Host");
         break;
+    case SamplingProfiler::FrameType::RegExp:
+        out.print("RegExp");
+        break;
     case SamplingProfiler::FrameType::C:
     case SamplingProfiler::FrameType::Unknown:
         out.print("Unknown");