TokenMarker.java :  » Scripting » seco » seco » notebook » syntax » Java Open Source

Java Open Source » Scripting » seco 
seco » seco » notebook » syntax » TokenMarker.java
/*
 * This file is part of the Scriba source distribution. This is free, open-source 
 * software. For full licensing information, please see the LicensingInformation file
 * at the root level of the distribution.
 *
 * Copyright (c) 2006-2007 Kobrix Software, Inc.
 */
/*
 * TokenMarker.java - Tokenizes lines of text
 * :tabSize=8:indentSize=8:noTabs=false:
 * :folding=explicit:collapseFolds=1:
 *
 * Copyright (C) 1998, 2003 Slava Pestov
 * Copyright (C) 1999, 2000 mike dillon
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */

package seco.notebook.syntax;

import gnu.regexp.*;
import javax.swing.text.Segment;

import seco.notebook.syntax.util.CharIndexedSegment;

import java.util.*;


/**
 * A token marker splits lines of text into tokens. Each token carries
 * a length field and an identification tag that can be mapped to a color
 * or font style for painting that token.
 *
 * @author Slava Pestov, mike dillon
 * @version $Id: TokenMarker.java,v 1.4 2006/07/21 16:37:44 bizi Exp $
 *
 * @see seco.notebook.syntax.Token
 * @see seco.notebook.syntax.TokenHandler
 */
public class TokenMarker
{
  private Hashtable ruleSets;
  private ParserRuleSet mainRuleSet;

  // Instead of passing these around to each method, we just store them
  // as instance variables. Note that this is not thread-safe.
  private TokenHandler tokenHandler;
  private Segment line;
  private LineContext context;
  private KeywordMap keywords;
  private Segment pattern = new Segment();
  private int lastOffset;
  private int lineLength;
  private int pos;
  private boolean escaped;

  private int whitespaceEnd;
  private boolean seenWhitespaceEnd;
  
  public TokenMarker()
  {
    ruleSets = new Hashtable(64);
  } 

  public void addRuleSet(ParserRuleSet rules)
  {
    ruleSets.put(rules.getSetName(), rules);

    if (rules.getSetName().equals("MAIN"))
      mainRuleSet = rules;
  } 

  public ParserRuleSet getMainRuleSet()
  {
    return mainRuleSet;
  } 

  public ParserRuleSet getRuleSet(String setName)
  {
    return (ParserRuleSet) ruleSets.get(setName);
  } 

  /**
   * @since jEdit 4.2pre3
   */
  public ParserRuleSet[] getRuleSets()
  {
    return (ParserRuleSet[])ruleSets.values().toArray(new ParserRuleSet[ruleSets.size()]);
  } 

  //{{{ markTokens() method
  /**
   * Do not call this method directly; call Buffer.markTokens() instead.
   */
  public LineContext markTokens(LineContext prevContext,
    TokenHandler tokenHandler, Segment line)
  {
    // Set up some instance variables
    // this is to avoid having to pass around lots and lots of
    // parameters.
    this.tokenHandler = tokenHandler;
    this.line = line;

    lastOffset = line.offset;
    lineLength = line.count + line.offset;

    context = new LineContext();

    if(prevContext == null)
      context.rules = getMainRuleSet();
    else
    {
      context.parent = prevContext.parent;
      context.inRule = prevContext.inRule;
      context.rules = prevContext.rules;
      context.spanEndSubst = prevContext.spanEndSubst;
    }

    keywords = context.rules.getKeywords();
    escaped = false;

    seenWhitespaceEnd = false;
    whitespaceEnd = line.offset;
    

    //Main parser loop
    ParserRule rule;
    int terminateChar = context.rules.getTerminateChar();
    boolean terminated = false;

main_loop:  for(pos = line.offset; pos < lineLength; pos++)
    {
      //check if we have to stop parsing
      if(terminateChar >= 0 && pos - line.offset >= terminateChar
        && !terminated)
      {
        terminated = true;
        context = new LineContext(ParserRuleSet
          .getStandardRuleSet(context.rules
          .getDefault()),context);
        keywords = context.rules.getKeywords();
      } 

      //{{{ check for end of delegate
      if(context.parent != null)
      {
        rule = context.parent.inRule;
        if(rule != null)
        {
          if(checkDelegateEnd(rule))
          {
            seenWhitespaceEnd = true;
            continue main_loop;
          }
        }
      } //}}}

      //{{{ check every rule
      char ch = line.array[pos];

      rule = context.rules.getRules(ch);
      while(rule != null)
      {
        // stop checking rules if there was a match
        if (handleRule(rule,false))
        {
          seenWhitespaceEnd = true;
          continue main_loop;
        }

        rule = rule.next;
      } //}}}

      //{{{ check if current character is a word separator
      if(Character.isWhitespace(ch))
      {
        if(!seenWhitespaceEnd)
          whitespaceEnd = pos + 1;

        if(context.inRule != null)
          handleRule(context.inRule,true);

        handleNoWordBreak();

        markKeyword(false);

        if(lastOffset != pos)
        {
          tokenHandler.handleToken(line,
            context.rules.getDefault(),
            lastOffset - line.offset,
            pos - lastOffset,
            context);
        }

        tokenHandler.handleToken(line,
          context.rules.getDefault(),
          pos - line.offset,1,context);
        lastOffset = pos + 1;

        escaped = false;
      }
      else
      {
        if(keywords != null || context.rules.getRuleCount() != 0)
        {
          String noWordSep = context.rules.getNoWordSep();

          if(!Character.isLetterOrDigit(ch)
            && noWordSep.indexOf(ch) == -1)
          {
            if(context.inRule != null)
              handleRule(context.inRule,true);

            handleNoWordBreak();

            markKeyword(true);

            tokenHandler.handleToken(line,
              context.rules.getDefault(),
              lastOffset - line.offset,1,
              context);
            lastOffset = pos + 1;
          }
        }

        seenWhitespaceEnd = true;
        escaped = false;
      } //}}}
    } //}}}

    //{{{ Mark all remaining characters
    pos = lineLength;

    if(context.inRule != null)
      handleRule(context.inRule,true);

    handleNoWordBreak();
    markKeyword(true);
    //}}}

    //Unwind any NO_LINE_BREAK parent delegates
unwind:    while(context.parent != null)
    {
      rule = context.parent.inRule;
      if((rule != null && (rule.action
        & ParserRule.NO_LINE_BREAK) == ParserRule.NO_LINE_BREAK)
        || terminated)
      {
        context = context.parent;
        keywords = context.rules.getKeywords();
        context.inRule = null;
      }
      else
        break unwind;
    }

    tokenHandler.handleToken(line,Token.END,
      pos - line.offset,0,context);

    context = context.intern();
    tokenHandler.setLineContext(context);
    return context;
  } 

  
  
  private boolean checkDelegateEnd(ParserRule rule)
  {
    if(rule.end == null)
      return false;

    LineContext tempContext = context;
    context = context.parent;
    keywords = context.rules.getKeywords();
    boolean tempEscaped = escaped;
    boolean b = handleRule(rule,true);
    context = tempContext;
    keywords = context.rules.getKeywords();

    if(b && !tempEscaped)
    {
      if(context.inRule != null)
        handleRule(context.inRule,true);

      markKeyword(true);

      context = (LineContext)context.parent.clone();

      tokenHandler.handleToken(line,
        (context.inRule.action & ParserRule.EXCLUDE_MATCH)
        == ParserRule.EXCLUDE_MATCH
        ? context.rules.getDefault()
        : context.inRule.token,
        pos - line.offset,pattern.count,context);

      keywords = context.rules.getKeywords();
      context.inRule = null;
      lastOffset = pos + pattern.count;

      // move pos to last character of match sequence
      pos += (pattern.count - 1);

      return true;
    }

    // check escape rule of parent
    if((rule.action & ParserRule.NO_ESCAPE) == 0
        && context.parent != null && context.parent.rules != null)
    {
      ParserRule escape = context.parent.rules.getEscapeRule();
      if(escape != null && handleRule(escape,false))
        return true;
    }

    return false;
  } 

  /**
   * Checks if the rule matches the line at the current position
   * and handles the rule if it does match
   */
  private boolean handleRule(ParserRule checkRule, boolean end)
  {
    //Some rules can only match in certain locations
    if(!end)
    {
      if(line!= null && line.array != null &&
              Character.toUpperCase(checkRule.hashChar)
        != Character.toUpperCase(line.array[pos]))
          return false;
    }

    int offset = ((checkRule.action & ParserRule.MARK_PREVIOUS) != 0) ?
      lastOffset : pos;
    int posMatch = (end ? checkRule.endPosMatch : checkRule.startPosMatch);

    if((posMatch & ParserRule.AT_LINE_START)
      == ParserRule.AT_LINE_START)
    {
      if(offset != line.offset)
        return false;
    }
    else if((posMatch & ParserRule.AT_WHITESPACE_END)
      == ParserRule.AT_WHITESPACE_END)
    {
      if(offset != whitespaceEnd)
        return false;
    }
    else if((posMatch & ParserRule.AT_WORD_START)
      == ParserRule.AT_WORD_START)
    {
      if(offset != lastOffset)
        return false;
    } 

    int matchedChars = 1;
    CharIndexedSegment charIndexed = null;
    REMatch match = null;

    //See if the rule's start or end sequence matches here
    if(!end || (checkRule.action & ParserRule.MARK_FOLLOWING) == 0)
    {
      // the end cannot be a regular expression
      if((checkRule.action & ParserRule.REGEXP) == 0 || end)
      {
        if(end)
        {
          if(context != null && context.spanEndSubst != null)
            pattern.array = context.spanEndSubst;
          else
            pattern.array = checkRule.end;
        }
        else
          pattern.array = checkRule.start;
        pattern.offset = 0;
        if(pattern.array == null) return false;
        pattern.count = pattern.array.length;
        matchedChars = pattern.count;

        if(!SyntaxUtilities.regionMatches(context.rules
          .getIgnoreCase(),line,pos,pattern.array))
        {
          return false;
        }
      }
      else
      {
        // note that all regexps start with \A so they only
        // match the start of the string
        int matchStart = pos - line.offset;
        charIndexed = new CharIndexedSegment(line,matchStart);
        match = checkRule.startRegexp.getMatch(
          charIndexed,0,RE.REG_ANCHORINDEX);
        if(match == null)
          return false;
        else if(match.getStartIndex() != 0)
          throw new InternalError("Can't happen");
        else
        {
          matchedChars = match.getEndIndex();
          /* workaround for hang if match was
           * zero-width. not sure if there is
           * a better way to handle this */
          if(matchedChars == 0)
            matchedChars = 1;
        }
      }
    } 

    //Check for an escape sequence
    if((checkRule.action & ParserRule.IS_ESCAPE) == ParserRule.IS_ESCAPE)
    {
      if(context.inRule != null)
        handleRule(context.inRule,true);

      escaped = !escaped;
      pos += pattern.count - 1;
    }
    else if(escaped)
    {
      escaped = false;
      pos += pattern.count - 1;
    }
    //Handle start of rule
    else if(!end)
    {
      if(context.inRule != null)
        handleRule(context.inRule,true);

      markKeyword((checkRule.action & ParserRule.MARK_PREVIOUS)
        != ParserRule.MARK_PREVIOUS);

      switch(checkRule.action & ParserRule.MAJOR_ACTIONS)
      {
      //{{{ SEQ
      case ParserRule.SEQ:
        context.spanEndSubst = null;

        if((checkRule.action & ParserRule.REGEXP) != 0)
        {
          handleTokenWithSpaces(tokenHandler,
            checkRule.token,
            pos - line.offset,
            matchedChars,
            context);
        }
        else
        {
          tokenHandler.handleToken(line,
            checkRule.token,
            pos - line.offset,
            matchedChars,context);
        }

        // a DELEGATE attribute on a SEQ changes the
        // ruleset from the end of the SEQ onwards
        if(checkRule.delegate != null)
        {
          context = new LineContext(
            checkRule.delegate,
            context.parent);
          keywords = context.rules.getKeywords();
        }
        break;
      //SPAN, EOL_SPAN
      case ParserRule.SPAN:
      case ParserRule.EOL_SPAN:
        context.inRule = checkRule;

        byte tokenType = ((checkRule.action & ParserRule.EXCLUDE_MATCH)
          == ParserRule.EXCLUDE_MATCH
          ? context.rules.getDefault() : checkRule.token);

        if((checkRule.action & ParserRule.REGEXP) != 0)
        {
          handleTokenWithSpaces(tokenHandler,
            tokenType,
            pos - line.offset,
            matchedChars,
            context);
        }
        else
        {
          tokenHandler.handleToken(line,tokenType,
            pos - line.offset,
            matchedChars,context);
        }

        char[] spanEndSubst = null;
        /* substitute result of matching the rule start
         * into the end string.
         *
         * eg, in shell script mode, <<\s*(\w+) is
         * matched into \<$1\> to construct rules for
         * highlighting read-ins like this <<EOF
         * ...
         * EOF
         */
        if(charIndexed != null && checkRule.end != null)
        {
          spanEndSubst = substitute(match,
            checkRule.end);
        }

        context.spanEndSubst = spanEndSubst;
        context = new LineContext(
          checkRule.delegate,
          context);
        keywords = context.rules.getKeywords();

        break;
      //{{{ MARK_FOLLOWING
      case ParserRule.MARK_FOLLOWING:
        tokenHandler.handleToken(line,(checkRule.action
          & ParserRule.EXCLUDE_MATCH)
          == ParserRule.EXCLUDE_MATCH ?
          context.rules.getDefault()
          : checkRule.token,pos - line.offset,
          pattern.count,context);

        context.spanEndSubst = null;
        context.inRule = checkRule;
        break;
      //}}}
      //{{{ MARK_PREVIOUS
      case ParserRule.MARK_PREVIOUS:
        context.spanEndSubst = null;

        if ((checkRule.action & ParserRule.EXCLUDE_MATCH)
          == ParserRule.EXCLUDE_MATCH)
        {
          if(pos != lastOffset)
          {
            tokenHandler.handleToken(line,
              checkRule.token,
              lastOffset - line.offset,
              pos - lastOffset,
              context);
          }

          tokenHandler.handleToken(line,
            context.rules.getDefault(),
            pos - line.offset,pattern.count,
            context);
        }
        else
        {
          tokenHandler.handleToken(line,
            checkRule.token,
            lastOffset - line.offset,
            pos - lastOffset + pattern.count,
            context);
        }

        break;
      //}}}
      default:
        throw new InternalError("Unhandled major action");
      }

      // move pos to last character of match sequence
      pos += (matchedChars - 1);
      lastOffset = pos + 1;

      // break out of inner for loop to check next char
    } //}}}
    //{{{ Handle end of MARK_FOLLOWING
    else if((context.inRule.action & ParserRule.MARK_FOLLOWING) != 0)
    {
      if(pos != lastOffset)
      {
        tokenHandler.handleToken(line,
          context.inRule.token,
          lastOffset - line.offset,
          pos - lastOffset,context);
      }

      lastOffset = pos;
      context.inRule = null;
    } //}}}

    return true;
  } //}}}

  //{{{ handleNoWordBreak() method
  private void handleNoWordBreak()
  {
    if(context.parent != null)
    {
      ParserRule rule = context.parent.inRule;
      if(rule != null && (context.parent.inRule.action
        & ParserRule.NO_WORD_BREAK) != 0)
      {
        if(pos != lastOffset)
        {
          tokenHandler.handleToken(line,
            rule.token,
            lastOffset - line.offset,
            pos - lastOffset,context);
        }

        lastOffset = pos;
        context = context.parent;
        keywords = context.rules.getKeywords();
        context.inRule = null;
      }
    }
  } //}}}

  //{{{ handleTokenWithSpaces() method
  private void handleTokenWithSpaces(TokenHandler tokenHandler,
    byte tokenType, int start, int len, LineContext context)
  {
    int last = start;
    int end = start + len;

    for(int i = start; i < end; i++)
    {
      if(Character.isWhitespace(line.array[i + line.offset]))
      {
        if(last != i)
        {
          tokenHandler.handleToken(line,
          tokenType,last,i - last,context);
        }
        tokenHandler.handleToken(line,tokenType,i,1,context);
        last = i + 1;
      }
    }

    if(last != end)
    {
      tokenHandler.handleToken(line,tokenType,last,
        end - last,context);
    }
  } //}}}

  //{{{ markKeyword() method
  private void markKeyword(boolean addRemaining)
  {
    int len = pos - lastOffset;
    if(len == 0)
      return;

    //{{{ Do digits
    if(context.rules.getHighlightDigits())
    {
      boolean digit = false;
      boolean mixed = false;

      for(int i = lastOffset; i < pos; i++)
      {
          //TODO:??? somehow this is null in very rare occasions
                if(line.array == null) continue;
        char ch = line.array[i];
        if(Character.isDigit(ch))
          digit = true;
        else
          mixed = true;
      }

      if(mixed)
      {
        RE digitRE = context.rules.getDigitRegexp();

        // only match against regexp if its not all
        // digits; if all digits, no point matching
        if(digit)
        { 
          if(digitRE == null)
          {
            // mixed digit/alpha keyword,
            // and no regexp... don't
            // highlight as DIGIT
            digit = false;
          }
          else
          {
            CharIndexedSegment seg = new CharIndexedSegment(
              line,false);
            int oldCount = line.count;
            int oldOffset = line.offset;
            line.offset = lastOffset;
            line.count = len;
            if(!digitRE.isMatch(seg))
              digit = false;
            line.offset = oldOffset;
            line.count = oldCount;
          }
        }
      }

      if(digit)
      {
        tokenHandler.handleToken(line,Token.DIGIT,
          lastOffset - line.offset,
          len,context);
        lastOffset = pos;

        return;
      }
    } //}}}

    //{{{ Do keywords
    if(keywords != null)
    {
      byte id = keywords.lookup(line, lastOffset, len);

      if(id != Token.NULL)
      {
        tokenHandler.handleToken(line,id,
          lastOffset - line.offset,
          len,context);
        lastOffset = pos;
        return;
      }
    } //}}}

    //{{{ Handle any remaining crud
    if(addRemaining)
    {
      tokenHandler.handleToken(line,context.rules.getDefault(),
        lastOffset - line.offset,len,context);
      lastOffset = pos;
    } //}}}
  } //}}}

  //{{{ substitute() method
  private char[] substitute(REMatch match, char[] end)
  {
    StringBuffer buf = new StringBuffer();
    for(int i = 0; i < end.length; i++)
    {
      char ch = end[i];
      if(ch == '$')
      {
        if(i == end.length - 1)
          buf.append(ch);
        else
        {
          char digit = end[i + 1];
          if(!Character.isDigit(digit))
            buf.append(ch);
          else
          {
            buf.append(match.toString(
              digit - '0'));
            i++;
          }
        }
      }
      else
        buf.append(ch);
    }

    char[] returnValue = new char[buf.length()];
    buf.getChars(0,buf.length(),returnValue,0);
    return returnValue;
  } //}}}

  //}}}

  //{{{ LineContext class
  /**
   * Stores persistent per-line syntax parser state.
   */
  public static class LineContext
  {
    private static Hashtable intern = new Hashtable();

    public LineContext parent;
    public ParserRule inRule;
    public ParserRuleSet rules;
    // used for SPAN_REGEXP rules; otherwise null
    public char[] spanEndSubst;

    //{{{ LineContext constructor
    public LineContext(ParserRuleSet rs, LineContext lc)
    {
      rules = rs;
      parent = (lc == null ? null : (LineContext)lc.clone());
    } //}}}

    //{{{ LineContext constructor
    public LineContext()
    {
    } //}}}

    //{{{ intern() method
    public LineContext intern()
    {
      Object obj = intern.get(this);
      if(obj == null)
      {
        intern.put(this,this);
        return this;
      }
      else
        return (LineContext)obj;
    } //}}}

    //{{{ hashCode() method
    public int hashCode()
    {
      if(inRule != null)
        return inRule.hashCode();
      else if(rules != null)
        return rules.hashCode();
      else
        return 0;
    } //}}}

    //{{{ equals() method
    public boolean equals(Object obj)
    {
      if(obj instanceof LineContext)
      {
        LineContext lc = (LineContext)obj;
        return lc.inRule == inRule && lc.rules == rules
          && MiscUtilities.objectsEqual(parent,lc.parent)
          && charArraysEqual(spanEndSubst,lc.spanEndSubst);
      }
      else
        return false;
    } //}}}

    //{{{ clone() method
    public Object clone()
    {
      LineContext lc = new LineContext();
      lc.inRule = inRule;
      lc.rules = rules;
      lc.parent = (parent == null) ? null : (LineContext) parent.clone();
      lc.spanEndSubst = spanEndSubst;

      return lc;
    } //}}}

    //{{{ charArraysEqual() method
    private boolean charArraysEqual(char[] c1, char[] c2)
    {
      if(c1 == null)
        return (c2 == null);
      else if(c2 == null)
        return (c1 == null);

      if(c1.length != c2.length)
        return false;

      for(int i = 0; i < c1.length; i++)
      {
        if(c1[i] != c2[i])
          return false;
      }

      return true;
    } //}}}
  } //}}}
}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.