| /* |
| * Copyright (C) 2016 Apple Inc. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| "use strict"; |
| |
| // Loosely based on ECMA 55 sections 4-8, but loosened to allow for modern conventions, like |
| // multi-character variable names. But this doesn't go too far - in particular, this doesn't do |
| // unicode, because that would require more thought. |
| function* lex(string) |
| { |
| let sourceLineNumber = 0; |
| for (let line of string.split("\n")) { |
| ++sourceLineNumber; |
| |
| function consumeWhitespace() |
| { |
| if (/^\s+/.test(line)) |
| line = RegExp.rightContext; |
| } |
| |
| function consume(kind) |
| { |
| line = RegExp.rightContext; |
| return {kind, string: RegExp.lastMatch, sourceLineNumber, userLineNumber}; |
| } |
| |
| const isIdentifier = /^[a-z_]([a-z0-9_]*)/i; |
| const isNumber = /^(([0-9]+(\.([0-9]*))?)|(\.[0-9]+)(e([+-]?)([0-9]+))?)/i; |
| const isString = /^\"([^\"]|(\"\"))*\"/; |
| const isKeyword = /^((base)|(data)|(def)|(dim)|(end)|(for)|(go)|(gosub)|(goto)|(if)|(input)|(let)|(next)|(on)|(option)|(print)|(randomize)|(read)|(restore)|(return)|(step)|(stop)|(sub)|(then)|(to))/i; |
| const isOperator = /^(-|\+|\*|\/|\^|\(|\)|(<[>=]?)|(>=?)|=|,|\$|;)/; |
| const isRem = /^rem\s.*/; |
| |
| consumeWhitespace(); |
| |
| if (!/^[0-9]+/.test(line)) |
| throw new Error("At line " + sourceLineNumber + ": Expect line number: " + line); |
| let userLineNumber = +RegExp.lastMatch; |
| line = RegExp.rightContext; |
| yield {kind: "userLineNumber", string: RegExp.lastMatch, sourceLineNumber, userLineNumber}; |
| |
| consumeWhitespace(); |
| |
| while (line.length) { |
| if (isKeyword.test(line)) |
| yield consume("keyword"); |
| else if (isIdentifier.test(line)) |
| yield consume("identifier"); |
| else if (isNumber.test(line)) { |
| let token = consume("number"); |
| token.value = +token.string; |
| yield token; |
| } else if (isString.test(line)) { |
| let token = consume("string"); |
| token.value = ""; |
| for (let i = 1; i < token.string.length - 1; ++i) { |
| let char = token.string.charAt(i); |
| if (char == "\"") |
| i++; |
| token.value += char; |
| } |
| yield token; |
| } else if (isOperator.test(line)) |
| yield consume("operator"); |
| else if (isRem.test(line)) |
| yield consume("remark"); |
| else |
| throw new Error("At line " + sourceLineNumber + ": Cannot lex token: " + line); |
| consumeWhitespace(); |
| } |
| |
| // Note: this is necessary for the parser, which may look-ahead without checking if we're |
| // done. Fortunately, it won't look-ahead past a newLine. |
| yield {kind: "newLine", string:"\n", sourceLineNumber, userLineNumber}; |
| } |
| } |