Logo Search packages:      
Sourcecode: jruby0.9 version File versions  Download package

RubyYaccLexer.java

/***** BEGIN LICENSE BLOCK *****
 * Version: CPL 1.0/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Common Public
 * License Version 1.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.eclipse.org/legal/cpl-v10.html
 *
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 *
 * Copyright (C) 2002 Benoit Cerrina <b.cerrina@wanadoo.fr>
 * Copyright (C) 2002-2004 Anders Bengtsson <ndrsbngtssn@yahoo.se>
 * Copyright (C) 2002-2004 Jan Arne Petersen <jpetersen@uni-bonn.de>
 * Copyright (C) 2004-2006 Thomas E Enebo <enebo@acm.org>
 * Copyright (C) 2004 Stefan Matthias Aust <sma@3plus4.de>
 * Copyright (C) 2004-2005 David Corbin <dcorbin@users.sourceforge.net>
 * Copyright (C) 2005 Zach Dennis <zdennis@mktec.com>
 * Copyright (C) 2006 Thomas Corbat <tcorbat@hsr.ch>
 * 
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the CPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the CPL, the GPL or the LGPL.
 ***** END LICENSE BLOCK *****/
package org.jruby.lexer.yacc;

import java.io.IOException;

import java.math.BigInteger;

import org.jruby.ast.BackRefNode;
import org.jruby.ast.BignumNode;
import org.jruby.ast.CommentNode;
import org.jruby.ast.FixnumNode;
import org.jruby.ast.FloatNode;
import org.jruby.ast.NthRefNode;
import org.jruby.common.IRubyWarnings;
import org.jruby.parser.BlockStaticScope;
import org.jruby.parser.ParserSupport;
import org.jruby.parser.StaticScope;
import org.jruby.parser.Tokens;
import org.jruby.util.IdUtil;

/** This is a port of the MRI lexer to Java it is compatible to Ruby 1.8.1.
 */
00056 public class RubyYaccLexer {
    // Last token read via yylex().
    private int token;
    
    // Value of last token which had a value associated with it.
    Object yaccValue;

    // Stream of data that yylex() examines.
    private LexerSource src;
    
    // Used for tiny smidgen of grammar in lexer (see setParserSupport())
    private ParserSupport parserSupport = null;

    // What handles warnings
    private IRubyWarnings warnings;

    // Additional context surrounding tokens that both the lexer and
    // grammar use.
    private LexState lex_state;
    
    // Tempory buffer to build up a potential token.  Consumer takes responsibility to reset 
    // this before use.
    private StringBuffer tokenBuffer = new StringBuffer(60);

    private StackState conditionState = new StackState();
    private StackState cmdArgumentState = new StackState();
    private StrTerm lex_strterm;
    private boolean commandStart;

    // Give a name to a value.  Enebo: This should be used more.
    static final int EOF = 0;

    // ruby constants for strings (should this be moved somewhere else?)
    static final int STR_FUNC_ESCAPE=0x01;
    static final int STR_FUNC_EXPAND=0x02;
    static final int STR_FUNC_REGEXP=0x04;
    static final int STR_FUNC_QWORDS=0x08;
    static final int STR_FUNC_SYMBOL=0x10;
    static final int STR_FUNC_INDENT=0x20;

    private final int str_squote = 0;
    private final int str_dquote = STR_FUNC_EXPAND;
    private final int str_xquote = STR_FUNC_EXPAND;
    private final int str_regexp = STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND;
    private final int str_ssym   = STR_FUNC_SYMBOL;
    private final int str_dsym   = STR_FUNC_SYMBOL | STR_FUNC_EXPAND;
    
    public RubyYaccLexer() {
      reset();
    }
    
    public void reset() {
      token = 0;
      yaccValue = null;
      src = null;
        lex_state = null;
        resetStacks();
        lex_strterm = null;
        commandStart = true;
    }
    
    /**
     * How the parser advances to the next token.
     * 
     * @return true if not at end of file (EOF).
     */
00122     public boolean advance() throws IOException {
        return (token = yylex()) != EOF;
    }
    
    /**
     * Last token read from the lexer at the end of a call to yylex()
     * 
     * @return last token read
     */
00131     public int token() {
        return token;
    }

    public StringBuffer getTokenBuffer() {
        return tokenBuffer;
    }
    
    /**
     * Value of last token (if it is a token which has a value).
     * 
     * @return value of last value-laden token
     */
00144     public Object value() {
        return yaccValue;
    }

    public ISourcePositionFactory getPositionFactory() {
        return src.getPositionFactory();
    }
    
    /**
     * Get position information for Token/Node that follows node represented by startPosition 
     * and current lexer location.
     * 
     * @param startPosition previous node/token
     * @param inclusive include previous node into position information of current node
     * @return a new position
     */
00160     public ISourcePosition getPosition(ISourcePosition startPosition, boolean inclusive) {
      return src.getPosition(startPosition, inclusive); 
    }
    
    public ISourcePosition getPosition() {
        return src.getPosition(null, false);
    }

    /**
     * Parse must pass its support object for some check at bottom of
     * yylex().  Ruby does it this way as well (i.e. a little parsing
     * logic in the lexer).
     * 
     * @param parserSupport
     */
00175     public void setParserSupport(ParserSupport parserSupport) {
        this.parserSupport = parserSupport;
    }

    /**
     * Allow the parser to set the source for its lexer.
     * 
     * @param source where the lexer gets raw data
     */
00184     public void setSource(LexerSource source) {
        this.src = source;
    }

    public StrTerm getStrTerm() {
        return lex_strterm;
    }
    
    public void setStrTerm(StrTerm strterm) {
        this.lex_strterm = strterm;
    }

    public void resetStacks() {
        conditionState.reset();
        cmdArgumentState.reset();
    }
    
    public void setWarnings(IRubyWarnings warnings) {
        this.warnings = warnings;
    }


    public void setState(LexState state) {
        this.lex_state = state;
    }

    public StackState getCmdArgumentState() {
        return cmdArgumentState;
    }

    public StackState getConditionState() {
        return conditionState;
    }
    
    public void setValue(Object yaccValue) {
        this.yaccValue = yaccValue;
    }

    private boolean isNext_identchar() throws IOException {
        char c = src.read();
        src.unread(c);

        return c != EOF && (Character.isLetterOrDigit(c) || c == '-');
    }
    
    private Object getInteger(String value, int radix) {
        try {
            return new FixnumNode(getPosition(), Long.parseLong(value, radix));
        } catch (NumberFormatException e) {
            return new BignumNode(getPosition(), new BigInteger(value, radix));
        }
    }

    /**
       * Do the next characters from the source match provided String in a case insensitive manner.  
     * If so, then consume those characters and that string.  Otherwise, consume none of them and 
     * return null.
       * 
       * @param s to be matched against
     * @return string if string matches, null otherwise
     */ 
00245     private String isNextNoCase(String s) throws IOException {
      StringBuffer buf = new StringBuffer();
      
        for (int i = 0; i < s.length(); i++) {
            char c = s.charAt(i);
            char r = src.read();
            buf.append(r);
            
            if (Character.toLowerCase(c) != r &&
                Character.toUpperCase(c) != r) {
                  src.unreadMany(buf);
                return null;
            }
        }

        return buf.toString();
    }

      /**
       * @param c the character to test
       * @return true if character is a hex value (0-9a-f)
       */
00267     static final boolean isHexChar(char c) {
        return Character.isDigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F');
    }

    /**
       * @param c the character to test
     * @return true if character is an octal value (0-7)
       */
00275     static final boolean isOctChar(char c) {
        return '0' <= c && c <= '7';
    }
    
    /**
     * @param c is character to be compared
     * @return whether c is an identifier or not
     */
00283     private static final boolean isIdentifierChar(char c) {
        return Character.isLetterOrDigit(c) || c == '_';
    }
    
    /**
     * What type/kind of quote are we dealing with?
     * 
     * @param c first character the the quote construct
     * @return a token that specifies the quote type
     */
00293     private int parseQuote(char c) throws IOException {
        char begin, end;
        boolean shortHand;
        
        // Short-hand (e.g. %{,%.,%!,... versus %Q{).
        if (!Character.isLetterOrDigit(c)) {
            begin = c;
            c = 'Q';
            shortHand = true;
        // Long-hand (e.g. %Q{}).
        } else {
            shortHand = false;
            begin = src.read();
            if (Character.isLetterOrDigit(begin) /* no mb || ismbchar(term)*/) {
                throw new SyntaxException(getPosition(), "unknown type of %string");
            }
        }
        if (c == EOF || begin == EOF) {
            throw new SyntaxException(getPosition(), "unterminated quoted string meets end of file");
        }
        
        // Figure end-char.  '\0' is special to indicate begin=end and that no nesting?
        if (begin == '(') end = ')';
        else if (begin == '[') end = ']';
        else if (begin == '{') end = '}';
        else if (begin == '<') end = '>';
        else { end = begin; begin = '\0'; };

        switch (c) {
        case 'Q':
            lex_strterm = new StringTerm(str_dquote, end, begin);
            yaccValue = new Token("%"+ (shortHand ? (""+end) : ("" + c + begin)), getPosition());
            return Tokens.tSTRING_BEG;

        case 'q':
            lex_strterm = new StringTerm(str_squote, end, begin);
            yaccValue = new Token("%"+c+begin, getPosition());
            return Tokens.tSTRING_BEG;

        case 'W':
            lex_strterm = new StringTerm(str_dquote | STR_FUNC_QWORDS, end, begin);
            do {c = src.read();} while (Character.isWhitespace(c));
            src.unread(c);
            yaccValue = new Token("%"+c+begin, getPosition());
            return Tokens.tWORDS_BEG;

        case 'w':
            lex_strterm = new StringTerm(str_squote | STR_FUNC_QWORDS, end, begin);
            do {c = src.read();} while (Character.isWhitespace(c));
            src.unread(c);
            yaccValue = new Token("%"+c+begin, getPosition());
            return Tokens.tQWORDS_BEG;

        case 'x':
            lex_strterm = new StringTerm(str_xquote, end, begin);
            yaccValue = new Token("%"+c+begin, getPosition());
            return Tokens.tXSTRING_BEG;

        case 'r':
            lex_strterm = new StringTerm(str_regexp, end, begin);
            yaccValue = new Token("%"+c+begin, getPosition());
            return Tokens.tREGEXP_BEG;

        case 's':
            lex_strterm = new StringTerm(str_ssym, end, begin);
            lex_state = LexState.EXPR_FNAME;
            yaccValue = new Token("%"+c+begin, getPosition());
            return Tokens.tSYMBEG;

        default:
            throw new SyntaxException(getPosition(), "Unknown type of %string. Expected 'Q', 'q', 'w', 'x', 'r' or any non letter character, but found '" + c + "'.");
        }
    }
    
    private int hereDocumentIdentifier() throws IOException {
        char c = src.read(); 
        int term;

        int func = 0;
        if (c == '-') {
            c = src.read();
            func = STR_FUNC_INDENT;
        }
        
        if (c == '\'' || c == '"' || c == '`') {
            if (c == '\'') {
                func |= str_squote;
            } else if (c == '"') {
                func |= str_dquote;
            } else {
                func |= str_xquote; 
            }

            tokenBuffer.setLength(0);
            term = c;
            while ((c = src.read()) != EOF && c != term) {
                tokenBuffer.append(c);
            }
            if (c == EOF) {
                throw new SyntaxException(getPosition(), "unterminated here document identifier");
            }     
        } else {
            if (!isIdentifierChar(c)) {
                src.unread(c);
                if ((func & STR_FUNC_INDENT) != 0) {
                    src.unread('-');
                }
                return 0;
            }
            tokenBuffer.setLength(0);
            term = '"';
            func |= str_dquote;
            do {
                tokenBuffer.append(c);
            } while ((c = src.read()) != EOF && isIdentifierChar(c));
            src.unread(c);
        }

        String line = src.readLine() + '\n';
        String tok = tokenBuffer.toString();
        lex_strterm = new HeredocTerm(tok, func, line);

        if (term == '`') {
            yaccValue = new Token("`", getPosition());
            return Tokens.tXSTRING_BEG;
        }
        
        yaccValue = new Token("\"", getPosition());
        // Hacky: Advance position to eat newline here....
        getPosition();
        return Tokens.tSTRING_BEG;
    }
    
    private void arg_ambiguous() {
        warnings.warning(getPosition(), "Ambiguous first argument; make sure.");
    }

    /**
     * Read a comment up to end of line.  When found each comment will get stored away into
     * the parser result so that any interested party can use them as they seem fit.  One idea
     * is that IDE authors can do distance based heuristics to associate these comments to the
     * AST node they think they belong to.
     * 
     * @param c last character read from lexer source
     * @return newline or eof value 
     */
00439     protected int readComment(char c) throws IOException {
        ISourcePosition startPosition = src.getPosition();
        tokenBuffer.setLength(0);
        tokenBuffer.append(c);

        // FIXME: Consider making a better LexerSource.readLine
        while ((c = src.read()) != '\n') {
            tokenBuffer.append(c);
            if (c == EOF) {
                break;
            }
        }
        src.unread(c);
        
        // Store away each comment to parser result so IDEs can do whatever they want with them.
        ISourcePosition position = startPosition.union(getPosition());
        parserSupport.getResult().addComment(new CommentNode(position, tokenBuffer.toString()));
        
        return c;
    }
    
    /*
     * Not normally used, but is left in here since it can be useful in debugging
     * grammar and lexing problems.
    private void printToken(int token) {
        //System.out.print("LOC: " + support.getPosition() + " ~ ");
        
        switch (token) {
            case Tokens.yyErrorCode: System.err.print("yyErrorCode,"); break;
            case Tokens.kCLASS: System.err.print("kClass,"); break;
            case Tokens.kMODULE: System.err.print("kModule,"); break;
            case Tokens.kDEF: System.err.print("kDEF,"); break;
            case Tokens.kUNDEF: System.err.print("kUNDEF,"); break;
            case Tokens.kBEGIN: System.err.print("kBEGIN,"); break;
            case Tokens.kRESCUE: System.err.print("kRESCUE,"); break;
            case Tokens.kENSURE: System.err.print("kENSURE,"); break;
            case Tokens.kEND: System.err.print("kEND,"); break;
            case Tokens.kIF: System.err.print("kIF,"); break;
            case Tokens.kUNLESS: System.err.print("kUNLESS,"); break;
            case Tokens.kTHEN: System.err.print("kTHEN,"); break;
            case Tokens.kELSIF: System.err.print("kELSIF,"); break;
            case Tokens.kELSE: System.err.print("kELSE,"); break;
            case Tokens.kCASE: System.err.print("kCASE,"); break;
            case Tokens.kWHEN: System.err.print("kWHEN,"); break;
            case Tokens.kWHILE: System.err.print("kWHILE,"); break;
            case Tokens.kUNTIL: System.err.print("kUNTIL,"); break;
            case Tokens.kFOR: System.err.print("kFOR,"); break;
            case Tokens.kBREAK: System.err.print("kBREAK,"); break;
            case Tokens.kNEXT: System.err.print("kNEXT,"); break;
            case Tokens.kREDO: System.err.print("kREDO,"); break;
            case Tokens.kRETRY: System.err.print("kRETRY,"); break;
            case Tokens.kIN: System.err.print("kIN,"); break;
            case Tokens.kDO: System.err.print("kDO,"); break;
            case Tokens.kDO_COND: System.err.print("kDO_COND,"); break;
            case Tokens.kDO_BLOCK: System.err.print("kDO_BLOCK,"); break;
            case Tokens.kRETURN: System.err.print("kRETURN,"); break;
            case Tokens.kYIELD: System.err.print("kYIELD,"); break;
            case Tokens.kSUPER: System.err.print("kSUPER,"); break;
            case Tokens.kSELF: System.err.print("kSELF,"); break;
            case Tokens.kNIL: System.err.print("kNIL,"); break;
            case Tokens.kTRUE: System.err.print("kTRUE,"); break;
            case Tokens.kFALSE: System.err.print("kFALSE,"); break;
            case Tokens.kAND: System.err.print("kAND,"); break;
            case Tokens.kOR: System.err.print("kOR,"); break;
            case Tokens.kNOT: System.err.print("kNOT,"); break;
            case Tokens.kIF_MOD: System.err.print("kIF_MOD,"); break;
            case Tokens.kUNLESS_MOD: System.err.print("kUNLESS_MOD,"); break;
            case Tokens.kWHILE_MOD: System.err.print("kWHILE_MOD,"); break;
            case Tokens.kUNTIL_MOD: System.err.print("kUNTIL_MOD,"); break;
            case Tokens.kRESCUE_MOD: System.err.print("kRESCUE_MOD,"); break;
            case Tokens.kALIAS: System.err.print("kALIAS,"); break;
            case Tokens.kDEFINED: System.err.print("kDEFINED,"); break;
            case Tokens.klBEGIN: System.err.print("klBEGIN,"); break;
            case Tokens.klEND: System.err.print("klEND,"); break;
            case Tokens.k__LINE__: System.err.print("k__LINE__,"); break;
            case Tokens.k__FILE__: System.err.print("k__FILE__,"); break;
            case Tokens.tIDENTIFIER: System.err.print("tIDENTIFIER["+ value() + "],"); break;
            case Tokens.tFID: System.err.print("tFID[" + value() + "],"); break;
            case Tokens.tGVAR: System.err.print("tGVAR[" + value() + "],"); break;
            case Tokens.tIVAR: System.err.print("tIVAR[" + value() +"],"); break;
            case Tokens.tCONSTANT: System.err.print("tCONSTANT["+ value() +"],"); break;
            case Tokens.tCVAR: System.err.print("tCVAR,"); break;
            case Tokens.tINTEGER: System.err.print("tINTEGER,"); break;
            case Tokens.tFLOAT: System.err.print("tFLOAT,"); break;
            case Tokens.tSTRING_CONTENT: System.err.print("tSTRING_CONTENT[" + yaccValue + "],"); break;
            case Tokens.tSTRING_BEG: System.err.print("tSTRING_BEG,"); break;
            case Tokens.tSTRING_END: System.err.print("tSTRING_END,"); break;
            case Tokens.tSTRING_DBEG: System.err.print("STRING_DBEG,"); break;
            case Tokens.tSTRING_DVAR: System.err.print("tSTRING_DVAR,"); break;
            case Tokens.tXSTRING_BEG: System.err.print("tXSTRING_BEG,"); break;
            case Tokens.tREGEXP_BEG: System.err.print("tREGEXP_BEG,"); break;
            case Tokens.tREGEXP_END: System.err.print("tREGEXP_END,"); break;
            case Tokens.tWORDS_BEG: System.err.print("tWORDS_BEG,"); break;
            case Tokens.tQWORDS_BEG: System.err.print("tQWORDS_BEG,"); break;
            case Tokens.tBACK_REF: System.err.print("tBACK_REF,"); break;
            case Tokens.tNTH_REF: System.err.print("tNTH_REF,"); break;
            case Tokens.tUPLUS: System.err.print("tUPLUS"); break;
            case Tokens.tUMINUS: System.err.print("tUMINUS,"); break;
            case Tokens.tPOW: System.err.print("tPOW,"); break;
            case Tokens.tCMP: System.err.print("tCMP,"); break;
            case Tokens.tEQ: System.err.print("tEQ,"); break;
            case Tokens.tEQQ: System.err.print("tEQQ,"); break;
            case Tokens.tNEQ: System.err.print("tNEQ,"); break;
            case Tokens.tGEQ: System.err.print("tGEQ,"); break;
            case Tokens.tLEQ: System.err.print("tLEQ,"); break;
            case Tokens.tANDOP: System.err.print("tANDOP,"); break;
            case Tokens.tOROP: System.err.print("tOROP,"); break;
            case Tokens.tMATCH: System.err.print("tMATCH,"); break;
            case Tokens.tNMATCH: System.err.print("tNMATCH,"); break;
            case Tokens.tDOT2: System.err.print("tDOT2,"); break;
            case Tokens.tDOT3: System.err.print("tDOT3,"); break;
            case Tokens.tAREF: System.err.print("tAREF,"); break;
            case Tokens.tASET: System.err.print("tASET,"); break;
            case Tokens.tLSHFT: System.err.print("tLSHFT,"); break;
            case Tokens.tRSHFT: System.err.print("tRSHFT,"); break;
            case Tokens.tCOLON2: System.err.print("tCOLON2,"); break;
            case Tokens.tCOLON3: System.err.print("tCOLON3,"); break;
            case Tokens.tOP_ASGN: System.err.print("tOP_ASGN,"); break;
            case Tokens.tASSOC: System.err.print("tASSOC,"); break;
            case Tokens.tLPAREN: System.err.print("tLPAREN,"); break;
            case Tokens.tLPAREN_ARG: System.err.print("tLPAREN_ARG,"); break;
            case Tokens.tLBRACK: System.err.print("tLBRACK,"); break;
            case Tokens.tLBRACE: System.err.print("tLBRACE,"); break;
            case Tokens.tSTAR: System.err.print("tSTAR,"); break;
            case Tokens.tAMPER: System.err.print("tAMPER,"); break;
            case Tokens.tSYMBEG: System.err.print("tSYMBEG,"); break;
            case '\n': System.err.println("NL"); break;
            default: System.err.print("'" + (int)token + "',"); break;
        }
    }

    // DEBUGGING HELP 
    private int yylex() {
        int token = yylex2();
        
        printToken(token);
        
        return token;
    }
    */

    /**
     *  Returns the next token. Also sets yyVal is needed.
     *
     *@return    Description of the Returned Value
     */
00585     private int yylex() throws IOException {
        char c;
        boolean spaceSeen = false;
        boolean commandState;
        
        if (lex_strterm != null) {
                  int tok = lex_strterm.parseString(this, src);
                  if (tok == Tokens.tSTRING_END || tok == Tokens.tREGEXP_END) {
                      lex_strterm = null;
                      lex_state = LexState.EXPR_END;
                  }
                  return tok;
        }

        commandState = commandStart;
        commandStart = false;

        LexState last_state = lex_state;
        
        retry: for(;;) {
            c = src.read();
            switch(c) {
            case '\004':            /* ^D */
            case '\032':            /* ^Z */
            case 0:                 /* end of script. */
                return 0;
           
                /* white spaces */
            case ' ': case '\t': case '\f': case '\r':
            case '\13': /* '\v' */
                getPosition();
                spaceSeen = true;
                continue retry;
            case '#':         /* it's a comment */
                if (readComment(c) == 0) return 0;
                    
                /* fall through */
            case '\n':
                  // Replace a string of newlines with a single one
                while((c = src.read()) == '\n') {
                    
                }
                src.unread( c );
                getPosition();

                if (lex_state == LexState.EXPR_BEG ||
                    lex_state == LexState.EXPR_FNAME ||
                    lex_state == LexState.EXPR_DOT ||
                    lex_state == LexState.EXPR_CLASS) {
                    continue retry;
                } 

                commandStart = true;
                lex_state = LexState.EXPR_BEG;
                return '\n';
                
            case '*':
                if ((c = src.read()) == '*') {
                    if ((c = src.read()) == '=') {
                        lex_state = LexState.EXPR_BEG;
                        yaccValue = new Token("**", getPosition());
                        return Tokens.tOP_ASGN;
                    }
                    src.unread(c);
                    yaccValue = new Token("**", getPosition());
                    c = Tokens.tPOW;
                } else {
                    if (c == '=') {
                        lex_state = LexState.EXPR_BEG;
                        yaccValue = new Token("*", getPosition());
                        return Tokens.tOP_ASGN;
                    }
                    src.unread(c);
                    if (lex_state.isArgument() && spaceSeen && !Character.isWhitespace(c)) {
                        warnings.warning(getPosition(), "`*' interpreted as argument prefix");
                        c = Tokens.tSTAR;
                    } else if (lex_state == LexState.EXPR_BEG || 
                            lex_state == LexState.EXPR_MID) {
                        c = Tokens.tSTAR;
                    } else {
                        c = Tokens.tSTAR2;
                    }
                    yaccValue = new Token("*", getPosition());
                }
                if (lex_state == LexState.EXPR_FNAME ||
                    lex_state == LexState.EXPR_DOT) {
                    lex_state = LexState.EXPR_ARG;
                } else {
                    lex_state = LexState.EXPR_BEG;
                }
                return c;

            case '!':
                lex_state = LexState.EXPR_BEG;
                if ((c = src.read()) == '=') {
                 yaccValue = new Token("!=",getPosition());
                 return Tokens.tNEQ;
                }
                if (c == '~') {
                    yaccValue = new Token("!~",getPosition());
                    return Tokens.tNMATCH;
                }
                src.unread(c);
                yaccValue = new Token("!",getPosition());
                return Tokens.tBANG;

            case '=':
                // documentation nodes
                if (src.wasBeginOfLine()) {
                    String equalLabel;
                    if ((equalLabel = isNextNoCase("begin")) != null) {
                        tokenBuffer.setLength(0);
                        tokenBuffer.append(equalLabel);
                        c = src.read();
                        
                        if (Character.isWhitespace(c)) {
                            // In case last next was the newline.
                            src.unread(c);
                            for (;;) {
                                c = src.read();
                                tokenBuffer.append(c);

                                // If a line is followed by a blank line put
                                // it back.
                                while (c == '\n') {
                                    c = src.read();
                                    tokenBuffer.append(c);
                                }
                                if (c == EOF) {
                                    throw new SyntaxException(getPosition(), "embedded document meets end of file");
                                }
                                if (c != '=') continue;
                                if (src.wasBeginOfLine() && (equalLabel = isNextNoCase("end")) != null) {
                                    tokenBuffer.append(equalLabel);
                                    tokenBuffer.append(src.readLine());
                                    src.unread('\n');
                                    break;
                                }
                            }
                            
                            parserSupport.getResult().addComment(new CommentNode(getPosition(), tokenBuffer.toString()));
                            continue retry;
                        }
                                    src.unread(c);
                    }
                }

                if (lex_state == LexState.EXPR_FNAME || lex_state == LexState.EXPR_DOT) {
                    lex_state = LexState.EXPR_ARG;
                } else { 
                    lex_state = LexState.EXPR_BEG;
                }

                c = src.read();
                if (c == '=') {
                    c = src.read();
                    if (c == '=') {
                        yaccValue = new Token("===", getPosition());
                        return Tokens.tEQQ;
                    }
                    src.unread(c);
                    yaccValue = new Token("==", getPosition());
                    return Tokens.tEQ;
                }
                if (c == '~') {
                    yaccValue = new Token("=~", getPosition());
                    return Tokens.tMATCH;
                } else if (c == '>') {
                    yaccValue = new Token("=>", getPosition());
                    return Tokens.tASSOC;
                }
                src.unread(c);
                yaccValue = new Token("=", getPosition());
                return '=';
                
            case '<':
                c = src.read();
                if (c == '<' &&
                        lex_state != LexState.EXPR_END &&
                        lex_state != LexState.EXPR_DOT &&
                        lex_state != LexState.EXPR_ENDARG && 
                        lex_state != LexState.EXPR_CLASS &&
                        (!lex_state.isArgument() || spaceSeen)) {
                    int tok = hereDocumentIdentifier();
                    if (tok != 0) return tok;
                }
                if (lex_state == LexState.EXPR_FNAME ||
                    lex_state == LexState.EXPR_DOT) {
                    lex_state = LexState.EXPR_ARG;
                } else {
                    lex_state = LexState.EXPR_BEG;
                }
                if (c == '=') {
                    if ((c = src.read()) == '>') {
                        yaccValue = new Token("<=>", getPosition());
                        return Tokens.tCMP;
                    }
                    src.unread(c);
                    yaccValue = new Token("<=", getPosition());
                    return Tokens.tLEQ;
                }
                if (c == '<') {
                    if ((c = src.read()) == '=') {
                        lex_state = LexState.EXPR_BEG;
                        yaccValue = new Token("<<", getPosition());
                        return Tokens.tOP_ASGN;
                    }
                    src.unread(c);
                    yaccValue = new Token("<<", getPosition());
                    return Tokens.tLSHFT;
                }
                yaccValue = new Token("<", getPosition());
                src.unread(c);
                return Tokens.tLT;
                
            case '>':
                if (lex_state == LexState.EXPR_FNAME ||
                    lex_state == LexState.EXPR_DOT) {
                    lex_state = LexState.EXPR_ARG;
                } else {
                    lex_state = LexState.EXPR_BEG;
                }

                if ((c = src.read()) == '=') {
                    yaccValue = new Token(">=", getPosition());
                    return Tokens.tGEQ;
                }
                if (c == '>') {
                    if ((c = src.read()) == '=') {
                        lex_state = LexState.EXPR_BEG;
                        yaccValue = new Token(">>", getPosition());
                        return Tokens.tOP_ASGN;
                    }
                    src.unread(c);
                    yaccValue = new Token(">>", getPosition());
                    return Tokens.tRSHFT;
                }
                src.unread(c);
                yaccValue = new Token(">", getPosition());
                return Tokens.tGT;

            case '"':
                lex_strterm = new StringTerm(str_dquote, '"', '\0');
                yaccValue = new Token("\"", getPosition());
                return Tokens.tSTRING_BEG;

            case '`':
                yaccValue = new Token("`", getPosition());
                if (lex_state == LexState.EXPR_FNAME) {
                    lex_state = LexState.EXPR_END;
                    return Tokens.tBACK_REF2;
                }
                if (lex_state == LexState.EXPR_DOT) {
                    if (commandState) {
                        lex_state = LexState.EXPR_CMDARG;
                    } else {
                        lex_state = LexState.EXPR_ARG;
                    }
                    return Tokens.tBACK_REF2;
                }
                lex_strterm = new StringTerm(str_xquote, '`', '\0');
                return Tokens.tXSTRING_BEG;

            case '\'':
                lex_strterm = new StringTerm(str_squote, '\'', '\0');
                yaccValue = new Token("'", getPosition());
                return Tokens.tSTRING_BEG;

            case '?':
                if (lex_state == LexState.EXPR_END || 
                    lex_state == LexState.EXPR_ENDARG) {
                    lex_state = LexState.EXPR_BEG;
                    yaccValue = new Token("?",getPosition());
                    return '?';
                }
                c = src.read();
                if (c == EOF) {
                    throw new SyntaxException(getPosition(), "incomplete character syntax");
                }
                if (Character.isWhitespace(c)){
                    if (!lex_state.isArgument()){
                        int c2 = 0;
                        switch (c) {
                        case ' ':
                            c2 = 's';
                            break;
                        case '\n':
                            c2 = 'n';
                            break;
                        case '\t':
                            c2 = 't';
                            break;
                            /* What is \v in C?
                        case '\v':
                            c2 = 'v';
                            break;
                            */
                        case '\r':
                            c2 = 'r';
                            break;
                        case '\f':
                            c2 = 'f';
                            break;
                        }
                        if (c2 != 0) {
                            warnings.warn(getPosition(), "invalid character syntax; use ?\\" + c2);
                        }
                    }
                    src.unread(c);
                    lex_state = LexState.EXPR_BEG;
                    yaccValue = new Token("?", getPosition());
                    return '?';
                /*} else if (ismbchar(c)) { // ruby - we don't support them either?
                    rb_warn("multibyte character literal not supported yet; use ?\\" + c);
                    support.unread(c);
                    lexState = LexState.EXPR_BEG;
                    return '?';*/
                } else if ((Character.isLetterOrDigit(c) || c == '_') &&
                        !src.peek('\n') && isNext_identchar()) {
                    src.unread(c);
                    lex_state = LexState.EXPR_BEG;
                    yaccValue = new Token("?", getPosition());
                    return '?';
                } else if (c == '\\') {
                    c = src.readEscape();
                }
                c &= 0xff;
                lex_state = LexState.EXPR_END;
                yaccValue = new FixnumNode(getPosition(), c);
                return Tokens.tINTEGER;

            case '&':
                if ((c = src.read()) == '&') {
                    lex_state = LexState.EXPR_BEG;
                    if ((c = src.read()) == '=') {
                        yaccValue = new Token("&&", getPosition());
                        lex_state = LexState.EXPR_BEG;
                        return Tokens.tOP_ASGN;
                    }
                    src.unread(c);
                    yaccValue = new Token("&&", getPosition());
                    return Tokens.tANDOP;
                }
                else if (c == '=') {
                    yaccValue = new Token("&", getPosition());
                    lex_state = LexState.EXPR_BEG;
                    return Tokens.tOP_ASGN;
                }
                src.unread(c);
                //tmpPosition is required because getPosition()'s side effects.
                //if the warning is generated, the getPosition() on line 954 (this line + 18) will create
                //a wrong position if the "inclusive" flag is not set.
                ISourcePosition tmpPosition = getPosition();
                if (lex_state.isArgument() && spaceSeen && !Character.isWhitespace(c)){
                    warnings.warning(tmpPosition, "`&' interpreted as argument prefix");
                    c = Tokens.tAMPER;
                } else if (lex_state == LexState.EXPR_BEG || 
                        lex_state == LexState.EXPR_MID) {
                    c = Tokens.tAMPER;
                } else {
                    c = Tokens.tAMPER2;
                }
                
                if (lex_state == LexState.EXPR_FNAME ||
                    lex_state == LexState.EXPR_DOT) {
                    lex_state = LexState.EXPR_ARG;
                } else {
                    lex_state = LexState.EXPR_BEG;
                }
                yaccValue = new Token("&", tmpPosition);
                return c;
                
            case '|':
                if ((c = src.read()) == '|') {
                    lex_state = LexState.EXPR_BEG;
                    if ((c = src.read()) == '=') {
                        lex_state = LexState.EXPR_BEG;
                        yaccValue = new Token("||", getPosition());
                        return Tokens.tOP_ASGN;
                    }
                    src.unread(c);
                    yaccValue = new Token("||", getPosition());
                    return Tokens.tOROP;
                }
                if (c == '=') {
                    lex_state = LexState.EXPR_BEG;
                    yaccValue = new Token("|", getPosition());
                    return Tokens.tOP_ASGN;
                }
                if (lex_state == LexState.EXPR_FNAME || 
                    lex_state == LexState.EXPR_DOT) {
                    lex_state = LexState.EXPR_ARG;
                } else {
                    lex_state = LexState.EXPR_BEG;
                }
                src.unread(c);
                yaccValue = new Token("|", getPosition());
                return Tokens.tPIPE;

            case '+':
                c = src.read();
                if (lex_state == LexState.EXPR_FNAME || 
                    lex_state == LexState.EXPR_DOT) {
                    lex_state = LexState.EXPR_ARG;
                    if (c == '@') {
                        yaccValue = new Token("+@", getPosition());
                        return Tokens.tUPLUS;
                    }
                    src.unread(c);
                    yaccValue = new Token("+", getPosition());
                    return Tokens.tPLUS;
                }
                if (c == '=') {
                    lex_state = LexState.EXPR_BEG;
                    yaccValue = new Token("+", getPosition());
                    return Tokens.tOP_ASGN;
                }
                if (lex_state == LexState.EXPR_BEG ||
                    lex_state == LexState.EXPR_MID ||
                        (lex_state.isArgument() && spaceSeen && !Character.isWhitespace(c))) {
                    if (lex_state.isArgument()) arg_ambiguous();
                    lex_state = LexState.EXPR_BEG;
                    src.unread(c);
                    if (Character.isDigit(c)) {
                        c = '+';
                        return parseNumber(c);
                    }
                    yaccValue = new Token("+", getPosition());
                    return Tokens.tUPLUS;
                }
                lex_state = LexState.EXPR_BEG;
                src.unread(c);
                yaccValue = new Token("+", getPosition());
                return Tokens.tPLUS;

            case '-':
                c = src.read();
                if (lex_state == LexState.EXPR_FNAME || lex_state == LexState.EXPR_DOT) {
                    lex_state = LexState.EXPR_ARG;
                    if (c == '@') {
                        yaccValue = new Token("-@", getPosition());
                        return Tokens.tUMINUS;
                    }
                    src.unread(c);
                    yaccValue = new Token("-", getPosition());
                    return Tokens.tMINUS;
                }
                if (c == '=') {
                    lex_state = LexState.EXPR_BEG;
                    yaccValue = new Token("-", getPosition());
                    return Tokens.tOP_ASGN;
                }
                if (lex_state == LexState.EXPR_BEG || lex_state == LexState.EXPR_MID ||
                        (lex_state.isArgument() && spaceSeen && !Character.isWhitespace(c))) {
                    if (lex_state.isArgument()) arg_ambiguous();
                    lex_state = LexState.EXPR_BEG;
                    src.unread(c);
                    yaccValue = new Token("-", getPosition());
                    if (Character.isDigit(c)) {
                        return Tokens.tUMINUS_NUM;
                    }
                    return Tokens.tUMINUS;
                }
                lex_state = LexState.EXPR_BEG;
                src.unread(c);
                yaccValue = new Token("-", getPosition());
                return Tokens.tMINUS;
                
            case '.':
                lex_state = LexState.EXPR_BEG;
                if ((c = src.read()) == '.') {
                    if ((c = src.read()) == '.') {
                        yaccValue = new Token("...", getPosition());
                        return Tokens.tDOT3;
                    }
                    src.unread(c);
                    yaccValue = new Token("..", getPosition());
                    return Tokens.tDOT2;
                }
                src.unread(c);
                if (Character.isDigit(c)) {
                    throw new SyntaxException(getPosition(), "no .<digit> floating literal anymore; put 0 before dot"); 
                }
                lex_state = LexState.EXPR_DOT;
                yaccValue = new Token(".", getPosition());
                return Tokens.tDOT;
            case '0' : case '1' : case '2' : case '3' : case '4' :
            case '5' : case '6' : case '7' : case '8' : case '9' :
                return parseNumber(c);
                
            case ')':
                conditionState.restart();
                cmdArgumentState.restart();
                lex_state = LexState.EXPR_END;
                yaccValue = new Token(")", getPosition());
                return Tokens.tRPAREN;
            case ']':
                conditionState.restart();
                cmdArgumentState.restart();
                lex_state = LexState.EXPR_END;
                yaccValue = new Token(")", getPosition());
                return Tokens.tRBRACK;
            case '}':
                conditionState.restart();
                cmdArgumentState.restart();
                lex_state = LexState.EXPR_END;
                yaccValue = new Token("}",getPosition());
                return Tokens.tRCURLY;

            case ':':
                c = src.read();
                if (c == ':') {
                    if (lex_state == LexState.EXPR_BEG ||
                        lex_state == LexState.EXPR_MID ||
                        lex_state == LexState.EXPR_CLASS || 
                        (lex_state.isArgument() && spaceSeen)) {
                        lex_state = LexState.EXPR_BEG;
                        yaccValue = new Token("::", getPosition());
                        return Tokens.tCOLON3;
                    }
                    lex_state = LexState.EXPR_DOT;
                    yaccValue = new Token(":",getPosition());
                    return Tokens.tCOLON2;
                }
                if (lex_state == LexState.EXPR_END || 
                    lex_state == LexState.EXPR_ENDARG || Character.isWhitespace(c)) {
                    src.unread(c);
                    lex_state = LexState.EXPR_BEG;
                    yaccValue = new Token(":",getPosition());
                    return ':';
                }
                switch (c) {
                case '\'':
                    lex_strterm = new StringTerm(str_ssym, c, '\0');
                    break;
                case '"':
                    lex_strterm = new StringTerm(str_dsym, c, '\0');
                    break;
                default:
                    src.unread(c);
                    break;
                }
                lex_state = LexState.EXPR_FNAME;
                yaccValue = new Token(":", getPosition());
                return Tokens.tSYMBEG;

            case '/':
                if (lex_state == LexState.EXPR_BEG || 
                    lex_state == LexState.EXPR_MID) {
                    lex_strterm = new StringTerm(str_regexp, '/', '\0');
                    yaccValue = new Token("/",getPosition());
                    return Tokens.tREGEXP_BEG;
                }
                
                if ((c = src.read()) == '=') {
                    yaccValue = new Token("/", getPosition());
                    lex_state = LexState.EXPR_BEG;
                    return Tokens.tOP_ASGN;
                }
                src.unread(c);
                if (lex_state.isArgument() && spaceSeen) {
                    if (!Character.isWhitespace(c)) {
                        arg_ambiguous();
                        lex_strterm = new StringTerm(str_regexp, '/', '\0');
                        yaccValue = new Token("/",getPosition());
                        return Tokens.tREGEXP_BEG;
                    }
                }
                if (lex_state == LexState.EXPR_FNAME || 
                    lex_state == LexState.EXPR_DOT) {
                    lex_state = LexState.EXPR_ARG;
                } else {
                    lex_state = LexState.EXPR_BEG;
                }
                yaccValue = new Token("/", getPosition());
                return Tokens.tDIVIDE;

            case '^':
                if ((c = src.read()) == '=') {
                    lex_state = LexState.EXPR_BEG;
                    yaccValue = new Token("^", getPosition());
                    return Tokens.tOP_ASGN;
                }
                if (lex_state == LexState.EXPR_FNAME || 
                    lex_state == LexState.EXPR_DOT) {
                    lex_state = LexState.EXPR_ARG;
                } else {
                    lex_state = LexState.EXPR_BEG;
                }
                src.unread(c);
                yaccValue = new Token("^", getPosition());
                return Tokens.tCARET;

            case ';':
                commandStart = true;
            case ',':
                lex_state = LexState.EXPR_BEG;
                yaccValue = new Token(",", getPosition());
                return c;

            case '~':
                if (lex_state == LexState.EXPR_FNAME || 
                    lex_state == LexState.EXPR_DOT) {
                    if ((c = src.read()) != '@') {
                        src.unread(c);
                    }
                }
                if (lex_state == LexState.EXPR_FNAME || 
                        lex_state == LexState.EXPR_DOT) {
                    lex_state = LexState.EXPR_ARG;
                } else {
                    lex_state = LexState.EXPR_BEG;
                }
                yaccValue = new Token("~", getPosition());
                return Tokens.tTILDE;
            case '(':
                  c = Tokens.tLPAREN2;
                commandStart = true;
                if (lex_state == LexState.EXPR_BEG || 
                    lex_state == LexState.EXPR_MID) {
                    c = Tokens.tLPAREN;
                } else if (spaceSeen) {
                    if (lex_state == LexState.EXPR_CMDARG) {
                        c = Tokens.tLPAREN_ARG;
                    } else if (lex_state == LexState.EXPR_ARG) {
                        warnings.warn(getPosition(), "don't put space before argument parentheses");
                        c = Tokens.tLPAREN2;
                    }
                }
                conditionState.stop();
                cmdArgumentState.stop();
                lex_state = LexState.EXPR_BEG;
                yaccValue = new Token("(", getPosition());
                return c;

            case '[':
                if (lex_state == LexState.EXPR_FNAME || 
                    lex_state == LexState.EXPR_DOT) {
                    lex_state = LexState.EXPR_ARG;
                    if ((c = src.read()) == ']') {
                        if (src.peek('=')) {
                            c = src.read();
                            yaccValue = new Token("[]=", getPosition());
                            return Tokens.tASET;
                        }
                        yaccValue = new Token("[]", getPosition());
                        return Tokens.tAREF;
                    }
                    src.unread(c);
                    yaccValue = new Token("[", getPosition());
                    return '[';
                } else if (lex_state == LexState.EXPR_BEG || 
                           lex_state == LexState.EXPR_MID) {
                    c = Tokens.tLBRACK;
                } else if (lex_state.isArgument() && spaceSeen) {
                    c = Tokens.tLBRACK;
                }
                lex_state = LexState.EXPR_BEG;
                conditionState.stop();
                cmdArgumentState.stop();
                yaccValue = new Token("[", getPosition());
                return c;
                
            case '{':
                  c = Tokens.tLCURLY;
                  
                if (lex_state.isArgument() || lex_state == LexState.EXPR_END) {
                    c = Tokens.tLCURLY;          /* block (primary) */
                } else if (lex_state == LexState.EXPR_ENDARG) {
                    c = Tokens.tLBRACE_ARG;  /* block (expr) */
                } else {
                    c = Tokens.tLBRACE;      /* hash */
                }
                conditionState.stop();
                cmdArgumentState.stop();
                lex_state = LexState.EXPR_BEG;
                yaccValue = new Token("{", getPosition());
                return c;

            case '\\':
                c = src.read();
                if (c == '\n') {
                    spaceSeen = true;
                    continue retry; /* skip \\n */
                }
                src.unread(c);
                yaccValue = new Token("\\", getPosition());
                return '\\';

            case '%':
                if (lex_state == LexState.EXPR_BEG || 
                    lex_state == LexState.EXPR_MID) {
                    return parseQuote(src.read());
                }
                if ((c = src.read()) == '=') {
                    lex_state = LexState.EXPR_BEG;
                    yaccValue = new Token("%", getPosition());
                    return Tokens.tOP_ASGN;
                }
                if (lex_state.isArgument() && spaceSeen && !Character.isWhitespace(c)) {
                    return parseQuote(c);
                }
                if (lex_state == LexState.EXPR_FNAME || 
                    lex_state == LexState.EXPR_DOT) {
                    lex_state = LexState.EXPR_ARG;
                } else {
                    lex_state = LexState.EXPR_BEG;
                }
                src.unread(c);
                yaccValue = new Token("%", getPosition());
                return Tokens.tPERCENT;

            case '$':
                lex_state = LexState.EXPR_END;
                tokenBuffer.setLength(0);
                c = src.read();
                switch (c) {
                case '_':           /* $_: last read line string */
                    c = src.read();
                    if (isIdentifierChar(c)) {
                        tokenBuffer.append('$');
                        tokenBuffer.append('_');
                        break;
                    }
                    src.unread(c);
                    c = '_';
                    /* fall through */
                case '~':       /* $~: match-data */
                case '*':           /* $*: argv */
                case '$':           /* $$: pid */
                case '?':           /* $?: last status */
                case '!':           /* $!: error string */
                case '@':           /* $@: error position */
                case '/':           /* $/: input record separator */
                case '\\':          /* $\: output record separator */
                case ';':           /* $;: field separator */
                case ',':           /* $,: output field separator */
                case '.':           /* $.: last read line number */
                case '=':           /* $=: ignorecase */
                case ':':           /* $:: load path */
                case '<':           /* $<: reading filename */
                case '>':           /* $>: default output handle */
                case '\"':          /* $": already loaded files */
                    tokenBuffer.append('$');
                    tokenBuffer.append(c);
                    yaccValue = new Token(tokenBuffer.toString(), getPosition());
                    return Tokens.tGVAR;

                case '-':
                    tokenBuffer.append('$');
                    tokenBuffer.append(c);
                    c = src.read();
                    if (isIdentifierChar(c)) {
                        tokenBuffer.append(c);
                    } else {
                        src.unread(c);
                    }
                    yaccValue = new Token(tokenBuffer.toString(), getPosition());
                    /* xxx shouldn't check if valid option variable */
                    return Tokens.tGVAR;

                case '&':           /* $&: last match */
                case '`':           /* $`: string before last match */
                case '\'':          /* $': string after last match */
                case '+':           /* $+: string matches last paren. */
                    yaccValue = new BackRefNode(getPosition(), c);
                    return Tokens.tBACK_REF;

                case '1': case '2': case '3':
                case '4': case '5': case '6':
                case '7': case '8': case '9':
                    tokenBuffer.append('$');
                    do {
                        tokenBuffer.append(c);
                        c = src.read();
                    } while (Character.isDigit(c));
                    src.unread(c);
                    if(last_state == LexState.EXPR_FNAME) {
                        yaccValue = new Token(tokenBuffer.toString(), getPosition());
                        return Tokens.tGVAR;
                    } else {
                        yaccValue = new NthRefNode(getPosition(), Integer.parseInt(tokenBuffer.substring(1)));
                        return Tokens.tNTH_REF;
                    }
                default:
                    if (!isIdentifierChar(c)) {
                        src.unread(c);
                        yaccValue = new Token("$", getPosition());
                        return '$';
                    }
                case '0':
                    tokenBuffer.append('$');
                }
                break;

            case '@':
                c = src.read();
                tokenBuffer.setLength(0);
                tokenBuffer.append('@');
                if (c == '@') {
                    tokenBuffer.append('@');
                    c = src.read();
                }
                if (Character.isDigit(c)) {
                    if (tokenBuffer.length() == 1) {
                        throw new SyntaxException(getPosition(), "`@" + c + "' is not allowed as an instance variable name");
                    }
                    throw new SyntaxException(getPosition(), "`@@" + c + "' is not allowed as a class variable name");
                }
                if (!isIdentifierChar(c)) {
                    src.unread(c);
                    yaccValue = new Token("@", getPosition());
                    return '@';
                }
                break;

            case '_':
                if (src.wasBeginOfLine() && src.matchString("_END__\n", false)) {
                  parserSupport.getResult().setEndSeen(true);
                    return 0;
                }
                tokenBuffer.setLength(0);
                break;

            default:
                if (!isIdentifierChar(c)) {
                    throw new SyntaxException(getPosition(), "Invalid char `\\" + Integer.parseInt(""+c, 8) + "' in expression");
                }
            
                tokenBuffer.setLength(0);
                break;
            }
    
            do {
                tokenBuffer.append(c);
                /* no special multibyte character handling is needed in Java
                 * if (ismbchar(c)) {
                    int i, len = mbclen(c)-1;

                    for (i = 0; i < len; i++) {
                        c = src.read();
                        tokenBuffer.append(c);
                    }
                }*/
                c = src.read();
            } while (isIdentifierChar(c));
            
            char peek = src.read();
            if ((c == '!' || c == '?') && 
                isIdentifierChar(tokenBuffer.charAt(0)) && peek != '=') {
                src.unread(peek);
                tokenBuffer.append(c);
            } else {
                  src.unread(peek);
                  src.unread(c);
            }
            
            int result = 0;

            switch (tokenBuffer.charAt(0)) {
                case '$':
                    lex_state = LexState.EXPR_END;
                    result = Tokens.tGVAR;
                    break;
                case '@':
                    lex_state = LexState.EXPR_END;
                    if (tokenBuffer.charAt(1) == '@') {
                        result = Tokens.tCVAR;
                    } else {
                        result = Tokens.tIVAR;
                    }
                    break;

                default:
                  char last = tokenBuffer.charAt(tokenBuffer.length() - 1);
                    if (last == '!' || last == '?') {
                        result = Tokens.tFID;
                    } else {
                        if (lex_state == LexState.EXPR_FNAME) {
                            if ((c = src.read()) == '=') { 
                              char c2 = src.read();
                              
                              if (c2 != '~' && c2 != '>' &&
                                    (c2 != '=' || (c2 == '\n' && src.peek('>')))) {
                                    result = Tokens.tIDENTIFIER;
                                    tokenBuffer.append(c);
                                    src.unread(c2);
                              } else { 
                                    src.unread(c2);
                                    src.unread(c);
                              }
                              } else {
                              src.unread(c);
                            }
                        }
                        if (result == 0 && Character.isUpperCase(tokenBuffer.charAt(0))) {
                            result = Tokens.tCONSTANT;
                        } else {
                            result = Tokens.tIDENTIFIER;
                        }
                    }

                    if (lex_state != LexState.EXPR_DOT) {
                        /* See if it is a reserved word.  */
                        Keyword keyword = Keyword.getKeyword(tokenBuffer.toString(), tokenBuffer.length());
                        if (keyword != null) {
                            // enum lex_state
                            LexState state = lex_state;

                            lex_state = keyword.state;
                            if (state.isExprFName()) {
                                yaccValue = new Token(keyword.name, getPosition());
                            } else {
                                yaccValue = new Token(tokenBuffer.toString(), getPosition());
                            }
                            if (keyword.id0 == Tokens.kDO) {
                                if (conditionState.isInState()) {
                                    return Tokens.kDO_COND;
                                }
                                if (cmdArgumentState.isInState() && state != LexState.EXPR_CMDARG) {
                                    return Tokens.kDO_BLOCK;
                                }
                                if (state == LexState.EXPR_ENDARG) {
                                    return Tokens.kDO_BLOCK;
                                }
                                return Tokens.kDO;
                            }

                            if (state == LexState.EXPR_BEG) {
                                return keyword.id0;
                            }
                                          if (keyword.id0 != keyword.id1) {
                                                lex_state = LexState.EXPR_BEG;
                                          }
                                          return keyword.id1;
                        }
                    }

                    if (lex_state == LexState.EXPR_BEG ||
                            lex_state == LexState.EXPR_MID ||
                            lex_state == LexState.EXPR_DOT ||
                            lex_state == LexState.EXPR_ARG ||
                            lex_state == LexState.EXPR_CMDARG) {
                        if (commandState) {
                            lex_state = LexState.EXPR_CMDARG;
                        } else {
                            lex_state = LexState.EXPR_ARG;
                        }
                    } else {
                        lex_state = LexState.EXPR_END;
                    }
            }
            
            String tempVal = tokenBuffer.toString();

            // Lame: parsing logic made it into lexer in ruby...So we
            // are emulating
            // FIXME:  I believe this is much simpler now...
            StaticScope scope = parserSupport.getCurrentScope();
            if (IdUtil.getVarType(tempVal) == IdUtil.LOCAL_VAR &&
                    (scope instanceof BlockStaticScope && (scope.isDefined(tempVal) >= 0)) ||
                    (scope.getLocalScope().isDefined(tempVal) >= 0)) {
                lex_state = LexState.EXPR_END;
            }

            yaccValue = new Token(tempVal, getPosition());

            return result;
        }
    }

    /**
     *  Parse a number from the input stream.
     *
     *@param c The first character of the number.
     *@return A int constant wich represents a token.
     */
01562     private int parseNumber(char c) throws IOException {
        lex_state = LexState.EXPR_END;

        tokenBuffer.setLength(0);

        if (c == '-') {
            tokenBuffer.append(c);
            c = src.read();
        } else if (c == '+') {
            // We don't append '+' since Java number parser gets confused
            c = src.read();
        }
        
        char nondigit = '\0';

        if (c == '0') {
            int startLen = tokenBuffer.length();

            switch (c = src.read()) {
                case 'x' :
                case 'X' : //  hexadecimal
                    c = src.read();
                    if (isHexChar(c)) {
                        for (;; c = src.read()) {
                            if (c == '_') {
                                if (nondigit != '\0') {
                                    break;
                                }
                                                nondigit = c;
                            } else if (isHexChar(c)) {
                                nondigit = '\0';
                                tokenBuffer.append(c);
                            } else {
                                break;
                            }
                        }
                    }
                    src.unread(c);

                    if (tokenBuffer.length() == startLen) {
                        throw new SyntaxException(getPosition(), "Hexadecimal number without hex-digits.");
                    } else if (nondigit != '\0') {
                        throw new SyntaxException(getPosition(), "Trailing '_' in number.");
                    }
                    yaccValue = getInteger(tokenBuffer.toString(), 16);
                    return Tokens.tINTEGER;
                case 'b' :
                case 'B' : // binary
                    c = src.read();
                    if (c == '0' || c == '1') {
                        for (;; c = src.read()) {
                            if (c == '_') {
                                if (nondigit != '\0') {
                                    break;
                                }
                                                nondigit = c;
                            } else if (c == '0' || c == '1') {
                                nondigit = '\0';
                                tokenBuffer.append(c);
                            } else {
                                break;
                            }
                        }
                    }
                    src.unread(c);

                    if (tokenBuffer.length() == startLen) {
                        throw new SyntaxException(getPosition(), "Binary number without digits.");
                    } else if (nondigit != '\0') {
                        throw new SyntaxException(getPosition(), "Trailing '_' in number.");
                    }
                    yaccValue = getInteger(tokenBuffer.toString(), 2);
                    return Tokens.tINTEGER;
                case 'd' :
                case 'D' : // decimal
                    c = src.read();
                    if (Character.isDigit(c)) {
                        for (;; c = src.read()) {
                            if (c == '_') {
                                if (nondigit != '\0') {
                                    break;
                                }
                                                nondigit = c;
                            } else if (Character.isDigit(c)) {
                                nondigit = '\0';
                                tokenBuffer.append(c);
                            } else {
                                break;
                            }
                        }
                    }
                    src.unread(c);

                    if (tokenBuffer.length() == startLen) {
                        throw new SyntaxException(getPosition(), "Binary number without digits.");
                    } else if (nondigit != '\0') {
                        throw new SyntaxException(getPosition(), "Trailing '_' in number.");
                    }
                    yaccValue = getInteger(tokenBuffer.toString(), 2);
                    return Tokens.tINTEGER;
                case '0' : case '1' : case '2' : case '3' : case '4' : //Octal
                case '5' : case '6' : case '7' : case '_' : 
                    for (;; c = src.read()) {
                        if (c == '_') {
                            if (nondigit != '\0') {
                                break;
                            }
                                          nondigit = c;
                        } else if (c >= '0' && c <= '7') {
                            nondigit = '\0';
                            tokenBuffer.append(c);
                        } else {
                            break;
                        }
                    }
                    if (tokenBuffer.length() > startLen) {
                        src.unread(c);

                        if (nondigit != '\0') {
                            throw new SyntaxException(getPosition(), "Trailing '_' in number.");
                        }

                        yaccValue = getInteger(tokenBuffer.toString(), 8);
                        return Tokens.tINTEGER;
                    }
                case '8' :
                case '9' :
                    throw new SyntaxException(getPosition(), "Illegal octal digit.");
                case '.' :
                case 'e' :
                case 'E' :
                  tokenBuffer.append('0');
                    break;
                default :
                    src.unread(c);
                    yaccValue = new FixnumNode(getPosition(), 0);
                    return Tokens.tINTEGER;
            }
        }

        boolean seen_point = false;
        boolean seen_e = false;

        for (;; c = src.read()) {
            switch (c) {
                case '0' :
                case '1' :
                case '2' :
                case '3' :
                case '4' :
                case '5' :
                case '6' :
                case '7' :
                case '8' :
                case '9' :
                    nondigit = '\0';
                    tokenBuffer.append(c);
                    break;
                case '.' :
                    if (nondigit != '\0') {
                        src.unread(c);
                        throw new SyntaxException(getPosition(), "Trailing '_' in number.");
                    } else if (seen_point || seen_e) {
                        src.unread(c);
                        return getNumberToken(tokenBuffer.toString(), true, nondigit);
                    } else {
                        char c2;
                        if (!Character.isDigit(c2 = src.read())) {
                            src.unread(c2);
                              src.unread('.');
                            if (c == '_') { 
                                    // Enebo:  c can never be antrhign but '.'
                                    // Why did I put this here?
                            } else {
                                yaccValue = getInteger(tokenBuffer.toString(), 10);
                                return Tokens.tINTEGER;
                            }
                        } else {
                            tokenBuffer.append('.');
                            tokenBuffer.append(c2);
                            seen_point = true;
                            nondigit = '\0';
                        }
                    }
                    break;
                case 'e' :
                case 'E' :
                    if (nondigit != '\0') {
                        throw new SyntaxException(getPosition(), "Trailing '_' in number.");
                    } else if (seen_e) {
                        src.unread(c);
                        return getNumberToken(tokenBuffer.toString(), true, nondigit);
                    } else {
                        tokenBuffer.append(c);
                        seen_e = true;
                        nondigit = c;
                        c = src.read();
                        if (c == '-' || c == '+') {
                            tokenBuffer.append(c);
                            nondigit = c;
                        } else {
                            src.unread(c);
                        }
                    }
                    break;
                case '_' : //  '_' in number just ignored
                    if (nondigit != '\0') {
                        throw new SyntaxException(getPosition(), "Trailing '_' in number.");
                    }
                    nondigit = c;
                    break;
                default :
                    src.unread(c);
                return getNumberToken(tokenBuffer.toString(), seen_e || seen_point, nondigit);
            }
        }
    }

    private int getNumberToken(String number, boolean isFloat, char nondigit) {
        if (nondigit != '\0') {
            throw new SyntaxException(getPosition(), "Trailing '_' in number.");
        }
        if (isFloat) {
            double d;
            try {
                d = Double.parseDouble(number);
            } catch (NumberFormatException e) {
                warnings.warn(getPosition(), "Float " + number + " out of range.");
                
                d = number.startsWith("-") ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
            }
            yaccValue = new FloatNode(getPosition(), d);
            return Tokens.tFLOAT;
        }
            yaccValue = getInteger(number, 10);
            return Tokens.tINTEGER;
    }
}

Generated by  Doxygen 1.6.0   Back to index