scala/tools/nsc/ast/parser/Scanners.scala

/* NSC -- new Scala compiler
 * Copyright 2005-2009 LAMP/EPFL
 * @author  Martin Odersky
 */
// $Id: Scanners.scala 17285 2009-03-11 13:51:56Z rytz $
package scala.tools.nsc
package ast.parser

import scala.tools.nsc.util._
import SourceFile.{LF, FF, CR, SU}
import Tokens._
import scala.annotation.switch
import scala.collection.mutable.{ListBuffer, ArrayBuffer}

trait Scanners {
  val global : Global
  import global._

  /** Offset into source character array */
  type Offset = Int

  /** An undefined offset */
  val NoOffset: Offset = -1

  trait TokenData {

    /** the next token */
    var token: Int = EMPTY

    /** the offset of the first character of the current token */
    var offset: Offset = 0

    /** the offset of the character following the token preceding this one */
    var lastOffset: Offset = 0

    /** the name of an identifier */
    var name: Name = null

    /** the string value of a literal */
    var strVal: String = null

    /** the base of a number */
    var base: Int = 0

    def copyFrom(td: TokenData) = {
      this.token = td.token
      this.offset = td.offset
      this.lastOffset = td.lastOffset
      this.name = td.name
      this.strVal = td.strVal
      this.base = td.base
    }
  }
  
  abstract class Scanner extends CharArrayReader with TokenData {

    def flush = { charOffset = offset; nextChar(); this }

    def resume(lastCode: Int) = {
      token = lastCode
      assert(next.token == EMPTY)
      nextToken()
    }

    // things to fill in, in addition to buf, decodeUni
    def warning(off: Offset, msg: String): Unit
    def error  (off: Offset, msg: String): Unit
    def incompleteInputError(off: Offset, msg: String): Unit
    def deprecationWarning(off: Offset, msg: String): Unit

    /** the last error offset
     */
    var errOffset: Offset = NoOffset

    /** A character buffer for literals
     */  
    val cbuf = new StringBuilder

    /** append Unicode character to "cbuf" buffer
     */
    protected def putChar(c: Char) { 
//      assert(cbuf.size < 10000, cbuf)
      cbuf.append(c) 
    }

    /** Clear buffer and set name and token */
    private def finishNamed() {
      name = newTermName(cbuf.toString)
      token = name2token(name)
      cbuf.clear()
    }

    /** Clear buffer and set string */
    private def setStrVal() {
      strVal = cbuf.toString
      cbuf.clear()
    }

    /** Should doc comments be built? */
    def buildDocs: Boolean = onlyPresentation
  
    /** buffer for the documentation comment
     */
    var docBuffer: StringBuilder = null

    /** Return current docBuffer and set docBuffer to null */
    def flushDoc = {
      val ret = if (docBuffer != null) docBuffer.toString else null
      docBuffer = null
      ret
    }

    /** add the given character to the documentation buffer 
     */
    protected def putDocChar(c: Char) {
      if (docBuffer ne null) docBuffer.append(c)
    }

    private class TokenData0 extends TokenData

    /** we need one token lookahead and one token history
     */
    val next : TokenData = new TokenData0
    val prev : TokenData = new TokenData0

    /** a stack of tokens which indicates whether line-ends can be statement separators
     */
    var sepRegions: List[Int] = List()

// Get next token ------------------------------------------------------------

    /** read next token and return last offset
     */
    def skipToken(): Offset = {
      val off = offset
      nextToken()
      off
    }
    
    /** Produce next token, filling TokenData fields of Scanner.
     */
    def nextToken() {
      val lastToken = token
      // Adapt sepRegions according to last token 
      (lastToken: @switch) match {
        case LPAREN => 
          sepRegions = RPAREN :: sepRegions
        case LBRACKET => 
          sepRegions = RBRACKET :: sepRegions
        case LBRACE =>
          sepRegions = RBRACE :: sepRegions
        case CASE =>
          sepRegions = ARROW :: sepRegions
        case RBRACE =>
          sepRegions = sepRegions dropWhile (_ != RBRACE)
          if (!sepRegions.isEmpty) sepRegions = sepRegions.tail
        case RBRACKET | RPAREN | ARROW =>
          if (!sepRegions.isEmpty && sepRegions.head == lastToken)
            sepRegions = sepRegions.tail
        case _ =>
      }

      // Read a token or copy it from `next` tokenData
      if (next.token == EMPTY) {
        lastOffset = charOffset - 1
        fetchToken()
      } else {
        this copyFrom next
        next.token = EMPTY
      }

      /** Insert NEWLINE or NEWLINES if
       *  - we are after a newline
       *  - we are within a { ... } or on toplevel (wrt sepRegions)
       *  - the current token can start a statement and the one before can end it
       *  insert NEWLINES if we are past a blank line, NEWLINE otherwise
       */
      if (!applyBracePatch() && afterLineEnd() && inLastOfStat(lastToken) && inFirstOfStat(token) &&
          (sepRegions.isEmpty || sepRegions.head == RBRACE)) {
        next copyFrom this
        offset = if (lineStartOffset <= offset) lineStartOffset else lastLineStartOffset
        token = if (pastBlankLine()) NEWLINES else NEWLINE
      }

      // Join CASE + CLASS => CASECLASS, CASE + OBJECT => CASEOBJECT, SEMI + ELSE => ELSE
      if (token == CASE) {
        prev copyFrom this
        val nextLastOffset = charOffset - 1
        fetchToken()
        if (token == CLASS) {
          token = CASECLASS
        } else if (token == OBJECT) {
          token = CASEOBJECT
        } else {
          lastOffset = nextLastOffset
          next copyFrom this
          this copyFrom prev
        }
      } else if (token == SEMI) {
        prev copyFrom this
        fetchToken()
        if (token != ELSE) {
          next copyFrom this
          this copyFrom prev
        }
      }  

//      print("["+this+"]")
    }

    /** Is current token first one after a newline? */
    private def afterLineEnd(): Boolean =
      lastOffset < lineStartOffset && 
      (lineStartOffset <= offset || 
       lastOffset < lastLineStartOffset && lastLineStartOffset <= offset)

    /** Is there a blank line between the current token and the last one?
     *  @pre  afterLineEnd().
     */
    private def pastBlankLine(): Boolean = {
      var idx = lastOffset
      var ch = buf(idx)
      val end = offset
      while (idx < end) {
        if (ch == LF || ch == FF) {
          do {
            idx += 1; ch = buf(idx)
            if (ch == LF || ch == FF) {
//              println("blank line found at "+lastOffset+":"+(lastOffset to idx).map(buf(_)).toList)
              return true
            }
          } while (idx < end && ch <= ' ')
        }
        idx += 1; ch = buf(idx)
      }
      false
    }

    /** read next token, filling TokenData fields of Scanner.
     */
    protected final def fetchToken() {
      offset = charOffset - 1
      (ch: @switch) match {
        
        case ' ' | '\t' | CR | LF | FF =>
          nextChar()
          fetchToken()
        case 'A' | 'B' | 'C' | 'D' | 'E' |
             'F' | 'G' | 'H' | 'I' | 'J' |
             'K' | 'L' | 'M' | 'N' | 'O' |
             'P' | 'Q' | 'R' | 'S' | 'T' |
             'U' | 'V' | 'W' | 'X' | 'Y' |
             'Z' | '$' | '_' |
             'a' | 'b' | 'c' | 'd' | 'e' |
             'f' | 'g' | 'h' | 'i' | 'j' |
             'k' | 'l' | 'm' | 'n' | 'o' |
             'p' | 'q' | 'r' | 's' | 't' |
             'u' | 'v' | 'w' | 'x' | 'y' |  // scala-mode: need to understand multi-line case patterns
             'z' =>
          putChar(ch)
          nextChar()
          getIdentRest()  // scala-mode: wrong indent for multi-line case blocks
        case '<' => // is XMLSTART?
          val last = if (charOffset >= 2) buf(charOffset - 2) else ' '
          nextChar()
          last match {
            case ' '|'\t'|'\n'|'{'|'('|'>' if xml.Parsing.isNameStart(ch) || ch == '!' || ch == '?' =>
              token = XMLSTART
            case _ =>
              // Console.println("found '<', but last is '"+in.last+"'"); // DEBUG
              putChar('<')
              getOperatorRest()
          }
        case '~' | '!' | '@' | '#' | '%' |
             '^' | '*' | '+' | '-' | /*'<' | */
             '>' | '?' | ':' | '=' | '&' | 
             '|' | '\\' =>
          putChar(ch)
          nextChar()
          getOperatorRest() 
        case '/' =>
          nextChar()
          if (skipComment()) {
            fetchToken()
          } else {
            putChar('/')
            getOperatorRest()
          }
        case '0' =>
          putChar(ch)
          nextChar()
          if (ch == 'x' || ch == 'X') {
            nextChar()
            base = 16
          } else {
            base = 8
          }
          getNumber()
        case '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' =>
          base = 10
          getNumber()
        case '`' => 
          nextChar()
          if (getStringLit('`')) { 
            finishNamed(); 
            if (name.length == 0) syntaxError("empty quoted identifier")
            token = BACKQUOTED_IDENT 
          }
          else syntaxError("unclosed quoted identifier")
        case '\"' => 
          nextChar()
          if (ch == '\"') {
            nextChar()
            if (ch == '\"') {
              nextChar()
              val saved = lineStartOffset
              getMultiLineStringLit()
              if (lineStartOffset != saved) // ignore linestarts within a multi-line string 
                lastLineStartOffset = saved
            } else {
              token = STRINGLIT
              strVal = ""
            }
          } else if (getStringLit('\"')) { 
            setStrVal()
            token = STRINGLIT 
          } else {
            syntaxError("unclosed string literal")
          }
        case '\'' =>
          nextChar()
          if (isIdentifierStart(ch))
            charLitOr(getIdentRest)
          else if (isSpecial(ch))
            charLitOr(getOperatorRest)
          else {
            getLitChar()
            if (ch == '\'') {
              nextChar()
              token = CHARLIT
              setStrVal()
            } else {
              syntaxError("unclosed character literal")
            }
          }
        case '.' =>
          nextChar()
          if ('0' <= ch && ch <= '9') {
            putChar('.'); getFraction()
          } else {
            token = DOT
          }
        case ';' =>
          nextChar(); token = SEMI
        case ',' =>
          nextChar(); token = COMMA
        case '(' =>
          nextChar(); token = LPAREN
        case '{' =>
          nextChar(); token = LBRACE
        case ')' =>
          nextChar(); token = RPAREN
        case '}' =>
          nextChar(); token = RBRACE
        case '[' =>
          nextChar(); token = LBRACKET
        case ']' =>
          nextChar(); token = RBRACKET
        case SU =>
          if (charOffset >= buf.length) token = EOF
          else {
            syntaxError("illegal character")
            nextChar()
          }
        case _ =>
          if (ch == '\u21D2') {
            nextChar(); token = ARROW
          } else if (ch == '\u2190') {
            nextChar(); token = LARROW
          } else if (Character.isUnicodeIdentifierStart(ch)) {
            putChar(ch)
            nextChar()
            getIdentRest()
          } else if (isSpecial(ch)) {
            putChar(ch)
            getOperatorRest()
          } else {
            syntaxError("illegal character")
            nextChar()
          }
      }
    }

    private def skipComment(): Boolean = {
      if (ch == '/') {
        do {
          nextChar()
        } while ((ch != CR) && (ch != LF) && (ch != SU))
        true
      } else if (ch == '*') {
        docBuffer = null
        var openComments = 1
        nextChar()
        if (ch == '*' && buildDocs)
          docBuffer = new StringBuilder("/**")
        while (openComments > 0) {
          do {
            do {
              if (ch == '/') {
                nextChar(); putDocChar(ch)
                if (ch == '*') {
                  nextChar(); putDocChar(ch)
                  openComments += 1
                }
              }
              if (ch != '*' && ch != SU) {
                nextChar(); putDocChar(ch)
              }
            } while (ch != '*' && ch != SU)
            while (ch == '*') {
              nextChar(); putDocChar(ch)
            }
          } while (ch != '/' && ch != SU)
          if (ch == '/') nextChar()
          else incompleteInputError("unclosed comment")
          openComments -= 1
        }
        true
      } else {
        false
      }
    }

    /** Can token start a statement? */
    def inFirstOfStat(token: Int) = token match {
      case EOF | CATCH | ELSE | EXTENDS | FINALLY | FORSOME | MATCH | WITH | YIELD |
           COMMA | SEMI | NEWLINE | NEWLINES | DOT | COLON | EQUALS | ARROW | LARROW | 
           SUBTYPE | VIEWBOUND | SUPERTYPE | HASH | RPAREN | RBRACKET | RBRACE | LBRACKET =>
        false
      case _ =>
        true
    }

    /** Can token end a statement? */
    def inLastOfStat(token: Int) = token match {
      case CHARLIT | INTLIT | LONGLIT | FLOATLIT | DOUBLELIT | STRINGLIT | SYMBOLLIT |
           IDENTIFIER | BACKQUOTED_IDENT | THIS | NULL | TRUE | FALSE | RETURN | USCORE | 
           TYPE | XMLSTART | RPAREN | RBRACKET | RBRACE =>
        true
      case _ =>
        false
    }

// Identifiers ---------------------------------------------------------------

    private def getIdentRest(): Unit = (ch: @switch) match {
      case 'A' | 'B' | 'C' | 'D' | 'E' |
           'F' | 'G' | 'H' | 'I' | 'J' |
           'K' | 'L' | 'M' | 'N' | 'O' |
           'P' | 'Q' | 'R' | 'S' | 'T' |
           'U' | 'V' | 'W' | 'X' | 'Y' |
           'Z' | '$' |
           'a' | 'b' | 'c' | 'd' | 'e' |
           'f' | 'g' | 'h' | 'i' | 'j' |
           'k' | 'l' | 'm' | 'n' | 'o' |
           'p' | 'q' | 'r' | 's' | 't' |
           'u' | 'v' | 'w' | 'x' | 'y' |
           'z' |
           '0' | '1' | '2' | '3' | '4' |
           '5' | '6' | '7' | '8' | '9' =>
        putChar(ch)
        nextChar()
        getIdentRest()
      case '_' =>
        putChar(ch)
        nextChar()
        getIdentOrOperatorRest()
      case SU => // strangely enough, Character.isUnicodeIdentifierPart(SU) returns true!
        finishNamed()
      case _ =>
        if (Character.isUnicodeIdentifierPart(ch)) {
          putChar(ch)
          nextChar()
          getIdentRest()
        } else {
          finishNamed()
        }
    }

    private def getOperatorRest(): Unit = (ch: @switch) match {
      case '~' | '!' | '@' | '#' | '%' | 
           '^' | '*' | '+' | '-' | '<' |
           '>' | '?' | ':' | '=' | '&' | 
           '|' | '\\' =>
        putChar(ch); nextChar(); getOperatorRest()
      case '/' =>
        nextChar()
        if (skipComment()) finishNamed()
        else { putChar('/'); getOperatorRest() }
      case _ =>
        if (isSpecial(ch)) { putChar(ch); nextChar(); getOperatorRest() }
        else finishNamed()
    }

    private def getIdentOrOperatorRest() {
      if (isIdentifierPart(ch)) 
        getIdentRest()
      else ch match {
        case '~' | '!' | '@' | '#' | '%' |
             '^' | '*' | '+' | '-' | '<' |
             '>' | '?' | ':' | '=' | '&' | 
             '|' | '\\' | '/' =>
          getOperatorRest()
        case _ =>
          if (isSpecial(ch)) getOperatorRest()
          else finishNamed()
      }
    }
 
    private def getStringLit(delimiter: Char): Boolean = {
      while (ch != delimiter && (isUnicodeEscape || ch != CR && ch != LF && ch != SU)) {
        getLitChar()
      }
      if (ch == delimiter) { nextChar(); true }
      else false
    }

    private def getMultiLineStringLit() {
      if (ch == '\"') {
        nextChar()
        if (ch == '\"') {
          nextChar()
          if (ch == '\"') {
            nextChar()
            token = STRINGLIT
            setStrVal()
          } else {
            putChar('\"')
            putChar('\"')
            getMultiLineStringLit()
          }
        } else {
          putChar('\"')
          getMultiLineStringLit()
        }
      } else if (ch == SU) {
        incompleteInputError("unclosed multi-line string literal")
      } else {
        putChar(ch)
        nextChar()
        getMultiLineStringLit()
      }
    }

// Literals -----------------------------------------------------------------

    /** read next character in character or string literal:
    */
    protected def getLitChar() =
      if (ch == '\\') {
        nextChar()
        if ('0' <= ch && ch <= '7') {
          val leadch: Char = ch
          var oct: Int = digit2int(ch, 8)
          nextChar()
          if ('0' <= ch && ch <= '7') {
            oct = oct * 8 + digit2int(ch, 8)
            nextChar()
            if (leadch <= '3' && '0' <= ch && ch <= '7') {
              oct = oct * 8 + digit2int(ch, 8)
              nextChar()
            }
          }
          putChar(oct.toChar)
        } else {
          ch match {
            case 'b'  => putChar('\b')
            case 't'  => putChar('\t')
            case 'n'  => putChar('\n')
            case 'f'  => putChar('\f')
            case 'r'  => putChar('\r')
            case '\"' => putChar('\"')
            case '\'' => putChar('\'')
            case '\\' => putChar('\\')
            case _    =>
              syntaxError(charOffset - 1, "invalid escape character")
              putChar(ch)
          }
          nextChar()
        }
      } else  {
        putChar(ch)
        nextChar()
      }

    /** read fractional part and exponent of floating point number
     *  if one is present.
     */
    protected def getFraction() {
      token = DOUBLELIT
      while ('0' <= ch && ch <= '9') {
        putChar(ch)
        nextChar()
      }
      if (ch == 'e' || ch == 'E') {
        val lookahead = lookaheadReader
        lookahead.nextChar()
        if (lookahead.ch == '+' || lookahead.ch == '-') {
          lookahead.nextChar()
        }
        if ('0' <= lookahead.ch && lookahead.ch <= '9') {
          putChar(ch)
          nextChar()
          if (ch == '+' || ch == '-') {
            putChar(ch)
            nextChar()
          }
          while ('0' <= ch && ch <= '9') {
            putChar(ch)
            nextChar()
          }
        }
        token = DOUBLELIT
      }
      if (ch == 'd' || ch == 'D') {
        putChar(ch)
        nextChar()
        token = DOUBLELIT
      } else if (ch == 'f' || ch == 'F') {
        putChar(ch)
        nextChar()
        token = FLOATLIT
      }
      checkNoLetter()
      setStrVal()
    }

    /** Convert current strVal to char value
     */
    def charVal: Char = if (strVal.length > 0) strVal.charAt(0) else 0

    /** Convert current strVal, base to long value
     *  This is tricky because of max negative value.
     */
    def intVal(negated: Boolean): Long = {
      if (token == CHARLIT && !negated) {
        charVal
      } else {
        var value: Long = 0
        val divider = if (base == 10) 1 else 2
        val limit: Long =
          if (token == LONGLIT) Math.MAX_LONG else Math.MAX_INT
        var i = 0
        val len = strVal.length
        while (i < len) {
          val d = digit2int(strVal charAt i, base)
          if (d < 0) {
            syntaxError("malformed integer number")
            return 0
          }
          if (value < 0 ||
              limit / (base / divider) < value ||
              limit - (d / divider) < value * (base / divider) &&
              !(negated && limit == value * base - 1 + d)) {
                syntaxError("integer number too large")
                return 0
              }
          value = value * base + d
          i += 1
        }
        if (negated) -value else value
      }
    }

    def intVal: Long = intVal(false)

    /** Convert current strVal, base to double value
    */
    def floatVal(negated: Boolean): Double = {
      val limit: Double = 
        if (token == DOUBLELIT) Math.MAX_DOUBLE else Math.MAX_FLOAT
      try {
        val value: Double = java.lang.Double.valueOf(strVal).doubleValue()
        if (value > limit)
          syntaxError("floating point number too large")
        if (negated) -value else value
      } catch {
        case _: NumberFormatException => 
          syntaxError("malformed floating point number")
          0.0
      }
    }

    def floatVal: Double = floatVal(false)

    def checkNoLetter() {
      if (isIdentifierPart(ch) && ch >= ' ') 
        syntaxError("Invalid literal number")
    }

    /** Read a number into strVal and set base
    */
    protected def getNumber() {
      val base1 = if (base < 10) 10 else base 
        // read 8,9's even if format is octal, produce a malformed number error afterwards.
      while (digit2int(ch, base1) >= 0) {
        putChar(ch)
        nextChar()
      }
      token = INTLIT
      if (base <= 10 && ch == '.') {
        val lookahead = lookaheadReader
        lookahead.nextChar()
        lookahead.ch match {
          case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | 
               '8' | '9' | 'd' | 'D' | 'e' | 'E' | 'f' | 'F' =>
            putChar(ch)
            nextChar()
            return getFraction()
          case _ =>
            if (!isIdentifierStart(lookahead.ch)) {
              putChar(ch)
              nextChar()
              return getFraction()
            }
        }
      }
      if (base <= 10 && 
          (ch == 'e' || ch == 'E' ||
           ch == 'f' || ch == 'F' ||
           ch == 'd' || ch == 'D')) {
        return getFraction()
      }
      setStrVal()
      if (ch == 'l' || ch == 'L') {
        nextChar()
        token = LONGLIT
      } else checkNoLetter()
    }

    /** Parse character literal if current character is followed by \',
     *  or follow with given op and return a symol literal token
     */
    def charLitOr(op: () => Unit) {
      putChar(ch)
      nextChar()
      if (ch == '\'') {
        nextChar()
        token = CHARLIT
        setStrVal()
      } else {
        op()
        token = SYMBOLLIT
        strVal = name.toString
      }
    }

// Errors -----------------------------------------------------------------

    /** generate an error at the given offset
    */
    def syntaxError(off: Offset, msg: String) {
      error(off, msg)
      token = ERROR
      errOffset = off
    }

    /** generate an error at the current token offset
    */
    def syntaxError(msg: String): Unit = syntaxError(offset, msg)

    /** signal an error where the input ended in the middle of a token */
    def incompleteInputError(msg: String) {
      incompleteInputError(offset, msg)
      token = EOF
      errOffset = offset
    }

    override def toString() = token match {
      case IDENTIFIER | BACKQUOTED_IDENT =>
        "id(" + name + ")"
      case CHARLIT =>
        "char(" + intVal + ")"
      case INTLIT =>
        "int(" + intVal + ")"
      case LONGLIT =>
        "long(" + intVal + ")"
      case FLOATLIT =>
        "float(" + floatVal + ")"
      case DOUBLELIT =>
        "double(" + floatVal + ")"
      case STRINGLIT =>
        "string(" + strVal + ")"
      case SEMI =>
        ";"
      case NEWLINE =>
        ";"
      case NEWLINES =>
        ";;"
      case COMMA =>
        ","
      case _ =>
        token2string(token)
    }

    // ------------- brace counting and healing ------------------------------
 
    /** overridden in UnitScanners:
     *  apply brace patch if one exists for this offset
     *  return true if subsequent end of line handling should be suppressed.
     */
    def applyBracePatch(): Boolean = false
 
    /** overridden in UnitScanners */
    def parenBalance(token: Int) = 0
    
    /** overridden in UnitScanners */
    def healBraces(): List[BracePatch] = List()

    /** Initialization method: read first char, then first token
     */
    def init() {
      nextChar()
      nextToken()
    }
  } // end Scanner

  // ------------- character classification --------------------------------

  def isIdentifierStart(c: Char): Boolean =
    ('A' <= c && c <= 'Z') ||
    ('a' <= c && c <= 'a') ||
    (c == '_') || (c == '$') ||
    Character.isUnicodeIdentifierStart(c)
  
  def isIdentifierPart(c: Char) =
    isIdentifierStart(c) || 
    ('0' <= c && c <= '9') ||
    Character.isUnicodeIdentifierPart(c)

  def isSpecial(c: Char) = {
    val chtp = Character.getType(c)
    chtp == Character.MATH_SYMBOL || chtp == Character.OTHER_SYMBOL
  }

  def isOperatorPart(c : Char) : Boolean = (c: @switch) match {
    case '~' | '!' | '@' | '#' | '%' | 
         '^' | '*' | '+' | '-' | '<' |
         '>' | '?' | ':' | '=' | '&' | 
         '|' | '/' | '\\' => true
    case c => isSpecial(c)
  }

  // ------------- keyword configuration -----------------------------------
  
  /** Keyword array; maps from name indices to tokens */
  private var keyCode: Array[Byte] = _
  /** The highest name index of a keyword token */
  private var maxKey = 0
  /** An array of all keyword token names */
  private var keyName = new Array[Name](128)
  /** The highest keyword token plus one */
  private var tokenCount = 0 

  /** Enter keyword with given name and token id */
  protected def enterKeyword(n: Name, tokenId: Int) {
    while (tokenId >= keyName.length) {
      val newTokName = new Array[Name](keyName.length * 2)
      compat.Platform.arraycopy(keyName, 0, newTokName, 0, newTokName.length)
      keyName = newTokName
    }
    keyName(tokenId) = n
    if (n.start > maxKey) maxKey = n.start
    if (tokenId >= tokenCount) tokenCount = tokenId + 1
  }

  /** Enter all keywords */
  protected def enterKeywords() {
    enterKeyword(nme.ABSTRACTkw, ABSTRACT)
    enterKeyword(nme.CASEkw, CASE)
    enterKeyword(nme.CATCHkw, CATCH)
    enterKeyword(nme.CLASSkw, CLASS)
    enterKeyword(nme.DEFkw, DEF)
    enterKeyword(nme.DOkw, DO)
    enterKeyword(nme.ELSEkw, ELSE)
    enterKeyword(nme.EXTENDSkw, EXTENDS)
    enterKeyword(nme.FALSEkw, FALSE)
    enterKeyword(nme.FINALkw, FINAL)
    enterKeyword(nme.FINALLYkw, FINALLY)
    enterKeyword(nme.FORkw, FOR)
    enterKeyword(nme.FORSOMEkw, FORSOME)
    enterKeyword(nme.IFkw, IF)
    enterKeyword(nme.IMPLICITkw, IMPLICIT)
    enterKeyword(nme.IMPORTkw, IMPORT)
    enterKeyword(nme.LAZYkw, LAZY)
    enterKeyword(nme.MATCHkw, MATCH)
    enterKeyword(nme.NEWkw, NEW)
    enterKeyword(nme.NULLkw, NULL)
    enterKeyword(nme.OBJECTkw, OBJECT)
    enterKeyword(nme.OVERRIDEkw, OVERRIDE)
    enterKeyword(nme.PACKAGEkw, PACKAGE)
    enterKeyword(nme.PRIVATEkw, PRIVATE)
    enterKeyword(nme.PROTECTEDkw, PROTECTED)
    enterKeyword(nme.RETURNkw, RETURN)
    enterKeyword(nme.SEALEDkw, SEALED)
    enterKeyword(nme.SUPERkw, SUPER)
    enterKeyword(nme.THISkw, THIS)
    enterKeyword(nme.THROWkw, THROW)
    enterKeyword(nme.TRAITkw, TRAIT)
    enterKeyword(nme.TRUEkw, TRUE)
    enterKeyword(nme.TRYkw, TRY)
    enterKeyword(nme.TYPEkw, TYPE)
    enterKeyword(nme.VALkw, VAL)
    enterKeyword(nme.VARkw, VAR)
    enterKeyword(nme.WHILEkw, WHILE)
    enterKeyword(nme.WITHkw, WITH)
    enterKeyword(nme.YIELDkw, YIELD)
    enterKeyword(nme.DOTkw, DOT)
    enterKeyword(nme.USCOREkw, USCORE)
    enterKeyword(nme.COLONkw, COLON)
    enterKeyword(nme.EQUALSkw, EQUALS)
    enterKeyword(nme.ARROWkw, ARROW)
    enterKeyword(nme.LARROWkw, LARROW)
    enterKeyword(nme.SUBTYPEkw, SUBTYPE)
    enterKeyword(nme.VIEWBOUNDkw, VIEWBOUND)
    enterKeyword(nme.SUPERTYPEkw, SUPERTYPE)
    enterKeyword(nme.HASHkw, HASH)
    enterKeyword(nme.ATkw, AT)
  }

  { // initialization
    enterKeywords()
    // Build keyword array
    keyCode = Array.fill(maxKey + 1)(IDENTIFIER)
    for (j <- 0 until tokenCount if keyName(j) ne null)
      keyCode(keyName(j).start) = j.toByte
  }

  /** Convert name to token */
  def name2token(name: Name): Int =
    if (name.start <= maxKey) keyCode(name.start) else IDENTIFIER

// Token representation ----------------------------------------------------

  /** Returns the string representation of given token. */
  def token2string(token: Int): String = (token: @switch) match {
    case IDENTIFIER | BACKQUOTED_IDENT => "identifier"
    case CHARLIT => "character literal"
    case INTLIT => "integer literal"
    case LONGLIT => "long literal"
    case FLOATLIT => "float literal"
    case DOUBLELIT => "double literal"
    case STRINGLIT => "string literal"
    case SYMBOLLIT => "symbol literal"
    case LPAREN => "'('"
    case RPAREN => "')'"
    case LBRACE => "'{'"
    case RBRACE => "'}'"
    case LBRACKET => "'['"
    case RBRACKET => "']'"
    case EOF => "eof"
    case ERROR => "something"
    case SEMI => "';'"
    case NEWLINE => "';'"
    case NEWLINES => "';'"
    case COMMA => "','"
    case CASECLASS => "case class"
    case CASEOBJECT => "case object"
    case XMLSTART => "$XMLSTART$<"
    case _ =>
      if (token <= maxKey) "'" + keyName(token) + "'"
      else "'<" + token + ">'"
  }

  /** A scanner over a given compilation unit
   */
  class UnitScanner(unit: CompilationUnit, patches: List[BracePatch]) extends Scanner {
    def this(unit: CompilationUnit) = this(unit, List())
    val buf = unit.source.asInstanceOf[BatchSourceFile].content
    val decodeUnit = !settings.nouescape.value

    def warning(off: Offset, msg: String) = unit.warning(unit.position(off), msg)
    def error  (off: Offset, msg: String) = unit.error(unit.position(off), msg)
    def incompleteInputError(off: Offset, msg: String) = unit.incompleteInputError(unit.position(off), msg)
    def deprecationWarning(off: Offset, msg: String) = unit.deprecationWarning(unit.position(off), msg)

    private var bracePatches: List[BracePatch] = patches

    lazy val parensAnalyzer = new ParensAnalyzer(unit, List())

    override def parenBalance(token: Int) = parensAnalyzer.balance(token)

    override def healBraces(): List[BracePatch] = {
      var patches: List[BracePatch] = List()
      if (!parensAnalyzer.tabSeen) {
        var bal = parensAnalyzer.balance(RBRACE)
        while (bal < 0) {
          patches = new ParensAnalyzer(unit, patches).insertRBrace()
          bal += 1
        }
        while (bal > 0) {
          patches = new ParensAnalyzer(unit, patches).deleteRBrace()
          bal -= 1
        }
      }
      patches
    }

    /** Insert or delete a brace, if a patch exists for this offset */
    override def applyBracePatch(): Boolean = {
      if (bracePatches.isEmpty || bracePatches.head.off != offset) false
      else {
        val patch = bracePatches.head
        bracePatches = bracePatches.tail
//        println("applying brace patch "+offset)//DEBUG
        if (patch.inserted) {
          next copyFrom this
          error(offset, "Missing closing brace `}' assumed here")
          token = RBRACE
          true
        } else {
          error(offset, "Unmatched closing brace '}' ignored here")
          fetchToken()
          false
        }
      }
    }
  }

  class ParensAnalyzer(unit: CompilationUnit, patches: List[BracePatch]) extends UnitScanner(unit, patches) {
    var balance = collection.mutable.Map(RPAREN -> 0, RBRACKET -> 0, RBRACE -> 0)

    init()
    
    /** The offset of the first token on this line, or next following line if blank
     */
    val lineStart = new ArrayBuffer[Int]

    /** The list of matching top-level brace pairs (each of which may contain nested brace pairs).
     */
    val bracePairs: List[BracePair] = {

      var lineCount = 1
      var lastOffset = 0
      var indent = 0
      val oldBalance = collection.mutable.Map[Int, Int]()
      def markBalance() = for ((k, v) <- balance) oldBalance(k) = v
      markBalance()

      def scan(bpbuf: ListBuffer[BracePair]): (Int, Int) = {
        if (token != NEWLINE && token != NEWLINES) {
          while (lastOffset < offset) {
            if (buf(lastOffset) == LF) lineCount += 1
            lastOffset += 1
          }
          while (lineCount > lineStart.length) {
            lineStart += offset
            // reset indentation unless there are new opening brackets or
            // braces since last ident line and at the same time there
            // are no new braces.
            if (balance(RPAREN) >= oldBalance(RPAREN) &&
                balance(RBRACKET) >= oldBalance(RBRACKET) ||
                balance(RBRACE) != oldBalance(RBRACE)) {
              indent = column(offset)
              markBalance()
            }
          }
        }

        token match {
          case LPAREN => 
            balance(RPAREN) -= 1; nextToken(); scan(bpbuf) 
          case LBRACKET => 
            balance(RBRACKET) -= 1; nextToken(); scan(bpbuf) 
          case RPAREN => 
            balance(RPAREN) += 1; nextToken(); scan(bpbuf)
          case RBRACKET => 
            balance(RBRACKET) += 1; nextToken(); scan(bpbuf)
          case LBRACE =>  
            balance(RBRACE) -= 1
            val lc = lineCount
            val loff = offset
            val lindent = indent
            val bpbuf1 = new ListBuffer[BracePair]
            nextToken()
            val (roff, rindent) = scan(bpbuf1)
            if (lc != lineCount)
              bpbuf += BracePair(loff, lindent, roff, rindent, bpbuf1.toList)
            scan(bpbuf)
          case RBRACE =>
            balance(RBRACE) += 1
            val off = offset; nextToken(); (off, indent)
          case EOF =>
            (-1, -1)
          case _ =>
            nextToken(); scan(bpbuf)
        }
      }

      val bpbuf = new ListBuffer[BracePair]
      while (token != EOF) {
        val (roff, rindent) = scan(bpbuf)
        if (roff != -1) {
          val current = BracePair(-1, -1, roff, rindent, bpbuf.toList)
          bpbuf.clear()
          bpbuf += current
        }
      }

      def printBP(bp: BracePair, indent: Int) {
        println(" "*indent+line(bp.loff)+":"+bp.lindent+" to "+line(bp.roff)+":"+bp.rindent)
        if (bp.nested.nonEmpty) 
          for (bp1 <- bp.nested) {
            printBP(bp1, indent + 2)
          }
      }
//      println("lineStart = "+lineStart)//DEBUG
//      println("bracepairs = ")
//      for (bp <- bpbuf.toList) printBP(bp, 0)
      bpbuf.toList
    }

    var tabSeen = false

    def line(offset: Int): Int = {
      def findLine(lo: Int, hi: Int): Int = {
        val mid = (lo + hi) / 2
        if (offset < lineStart(mid)) findLine(lo, mid - 1)
        else if (mid + 1 < lineStart.length && offset >= lineStart(mid + 1)) findLine(mid + 1, hi)
        else mid
      }
      if (offset <= 0) 0
      else findLine(0, lineStart.length - 1)
    }

    def column(offset: Int): Int = {
      var col = 0
      var i = offset - 1
      while (i >= 0 && buf(i) != CR && buf(i) != LF) {
        if (buf(i) == '\t') tabSeen = true
        col += 1
        i -= 1
      }
      col
    }

    def insertPatch(patches: List[BracePatch], patch: BracePatch): List[BracePatch] = patches match {
      case List() => List(patch)
      case bp :: bps => if (patch.off < bp.off) patch :: patches
                        else bp :: insertPatch(bps, patch)
    }    
    
    def leftColumn(offset: Int) = 
      if (offset == -1) -1 else column(lineStart(line(offset)))

    def rightColumn(offset: Int, default: Int) = 
      if (offset == -1) -1 
      else {
        val rlin = line(offset)
        if (lineStart(rlin) == offset) column(offset)
        else if (rlin + 1 < lineStart.length) column(lineStart(rlin + 1))
        else default
      }

    def insertRBrace(): List[BracePatch] = {
      def insert(bps: List[BracePair]): List[BracePatch] = bps match {
        case List() => patches
        case (bp @ BracePair(loff, lindent, roff, rindent, nested)) :: bps1 =>
          if (lindent <= rindent) insert(bps1)
          else {
//           println("patch inside "+bp+"/"+line(loff)+"/"+lineStart(line(loff))+"/"+lindent"/"+rindent)//DEBUG
            val patches1 = insert(nested)
            if (patches1 ne patches) patches1
            else {
              var lin = line(loff) + 1
              while (lin < lineStart.length && column(lineStart(lin)) > lindent)
                lin += 1
              if (lin < lineStart.length) {
                val patches1 = insertPatch(patches, BracePatch(lineStart(lin), true))
                //println("patch for "+bp+"/"+imbalanceMeasure+"/"+new ParensAnalyzer(unit, patches1).imbalanceMeasure)
                /*if (improves(patches1))*/ 
                patches1 
                /*else insert(bps1)*/
                // (this test did not seem to work very well in practice)
              } else patches
            }
          }
      }
      insert(bracePairs)
    }
      
    def deleteRBrace(): List[BracePatch] = {
      def delete(bps: List[BracePair]): List[BracePatch] = bps match {
        case List() => patches
        case BracePair(loff, lindent, roff, rindent, nested) :: bps1 =>
          if (lindent >= rindent) delete(bps1)
          else {
            val patches1 = delete(nested)
            if (patches1 ne patches) patches1
            else insertPatch(patches, BracePatch(roff, false))
          }
      }
      delete(bracePairs)
    }    

    def imbalanceMeasure: Int = {
      def measureList(bps: List[BracePair]): Int =
        (bps map measure).sum
      def measure(bp: BracePair): Int = 
        (if (bp.lindent != bp.rindent) 1 else 0) + measureList(bp.nested)
      measureList(bracePairs)
    }

    def improves(patches1: List[BracePatch]): Boolean =
      imbalanceMeasure > new ParensAnalyzer(unit, patches1).imbalanceMeasure

    override def error(offset: Int, msg: String) {}
  }
}