PerformanceTests/ARES-6/Basic/lexer.js - WebKit - Git at Google

 /*
  * Copyright (C) 2016 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 "use strict";

 // Loosely based on ECMA 55 sections 4-8, but loosened to allow for modern conventions, like
 // multi-character variable names. But this doesn't go too far - in particular, this doesn't do
 // unicode, because that would require more thought.
 function* lex(string)
 {
     let sourceLineNumber = 0;
     for (let line of string.split("\n")) {
         ++sourceLineNumber;

         function consumeWhitespace()
         {
             if (/^\s+/.test(line))
                 line = RegExp.rightContext;
         }

         function consume(kind)
         {
             line = RegExp.rightContext;
             return {kind, string: RegExp.lastMatch, sourceLineNumber, userLineNumber};
         }

         const isIdentifier = /^[a-z_]([a-z0-9_]*)/i;
         const isNumber = /^(([0-9]+(\.([0-9]*))?)|(\.[0-9]+)(e([+-]?)([0-9]+))?)/i;
         const isString = /^\"([^\"]|(\"\"))*\"/;
         const isKeyword = /^((base)|(data)|(def)|(dim)|(end)|(for)|(go)|(gosub)|(goto)|(if)|(input)|(let)|(next)|(on)|(option)|(print)|(randomize)|(read)|(restore)|(return)|(step)|(stop)|(sub)|(then)|(to))/i;
         const isOperator = /^(-|\+|\*|\/|\^|\(|\)|(<[>=]?)|(>=?)|=|,|\$|;)/;
         const isRem = /^rem\s.*/;

         consumeWhitespace();

         if (!/^[0-9]+/.test(line))
             throw new Error("At line " + sourceLineNumber + ": Expect line number: " + line);
         let userLineNumber = +RegExp.lastMatch;
         line = RegExp.rightContext;
         yield {kind: "userLineNumber", string: RegExp.lastMatch, sourceLineNumber, userLineNumber};

         consumeWhitespace();

         while (line.length) {
             if (isKeyword.test(line))
                 yield consume("keyword");
             else if (isIdentifier.test(line))
                 yield consume("identifier");
             else if (isNumber.test(line)) {
                 let token = consume("number");
                 token.value = +token.string;
                 yield token;
             } else if (isString.test(line)) {
                 let token = consume("string");
                 token.value = "";
                 for (let i = 1; i < token.string.length - 1; ++i) {
                     let char = token.string.charAt(i);
                     if (char == "\"")
                         i++;
                     token.value += char;
                 }
                 yield token;
             } else if (isOperator.test(line))
                 yield consume("operator");
             else if (isRem.test(line))
                 yield consume("remark");
             else
                 throw new Error("At line " + sourceLineNumber + ": Cannot lex token: " + line);
             consumeWhitespace();
         }

         // Note: this is necessary for the parser, which may look-ahead without checking if we're
         // done. Fortunately, it won't look-ahead past a newLine.
         yield {kind: "newLine", string:"\n", sourceLineNumber, userLineNumber};
     }
 }
	/*
	* Copyright (C) 2016 Apple Inc. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	*
	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/
	"use strict";

	// Loosely based on ECMA 55 sections 4-8, but loosened to allow for modern conventions, like
	// multi-character variable names. But this doesn't go too far - in particular, this doesn't do
	// unicode, because that would require more thought.
	function* lex(string)
	{
	let sourceLineNumber = 0;
	for (let line of string.split("\n")) {
	++sourceLineNumber;

	function consumeWhitespace()
	{
	if (/^\s+/.test(line))
	line = RegExp.rightContext;
	}

	function consume(kind)
	{
	line = RegExp.rightContext;
	return {kind, string: RegExp.lastMatch, sourceLineNumber, userLineNumber};
	}

	const isIdentifier = /^[a-z_]([a-z0-9_]*)/i;
	const isNumber = /^(([0-9]+(\.([0-9]*))?)\|(\.[0-9]+)(e([+-]?)([0-9]+))?)/i;
	const isString = /^\"([^\"]\|(\"\"))*\"/;
	const isKeyword = /^((base)\|(data)\|(def)\|(dim)\|(end)\|(for)\|(go)\|(gosub)\|(goto)\|(if)\|(input)\|(let)\|(next)\|(on)\|(option)\|(print)\|(randomize)\|(read)\|(restore)\|(return)\|(step)\|(stop)\|(sub)\|(then)\|(to))/i;
	const isOperator = /^(-\|\+\|\*\|\/\|\^\|\(\|\)\|(<[>=]?)\|(>=?)\|=\|,\|\$\|;)/;
	const isRem = /^rem\s.*/;

	consumeWhitespace();

	if (!/^[0-9]+/.test(line))
	throw new Error("At line " + sourceLineNumber + ": Expect line number: " + line);
	let userLineNumber = +RegExp.lastMatch;
	line = RegExp.rightContext;
	yield {kind: "userLineNumber", string: RegExp.lastMatch, sourceLineNumber, userLineNumber};

	consumeWhitespace();

	while (line.length) {
	if (isKeyword.test(line))
	yield consume("keyword");
	else if (isIdentifier.test(line))
	yield consume("identifier");
	else if (isNumber.test(line)) {
	let token = consume("number");
	token.value = +token.string;
	yield token;
	} else if (isString.test(line)) {
	let token = consume("string");
	token.value = "";
	for (let i = 1; i < token.string.length - 1; ++i) {
	let char = token.string.charAt(i);
	if (char == "\"")
	i++;
	token.value += char;
	}
	yield token;
	} else if (isOperator.test(line))
	yield consume("operator");
	else if (isRem.test(line))
	yield consume("remark");
	else
	throw new Error("At line " + sourceLineNumber + ": Cannot lex token: " + line);
	consumeWhitespace();
	}

	// Note: this is necessary for the parser, which may look-ahead without checking if we're
	// done. Fortunately, it won't look-ahead past a newLine.
	yield {kind: "newLine", string:"\n", sourceLineNumber, userLineNumber};
	}
	}