Syntax.java :  » Swing-Library » abeille-forms-designer » org » netbeans » editor » Java Open Source

Java Open Source » Swing Library » abeille forms designer 
abeille forms designer » org » netbeans » editor » Syntax.java
/*
 *                 Sun Public License Notice
 * 
 * The contents of this file are subject to the Sun Public License
 * Version 1.0 (the "License"). You may not use this file except in
 * compliance with the License. A copy of the License is available at
 * http://www.sun.com/
 * 
 * The Original Code is NetBeans. The Initial Developer of the Original
 * Code is Sun Microsystems, Inc. Portions Copyright 1997-2000 Sun
 * Microsystems, Inc. All Rights Reserved.
 */

package org.netbeans.editor;

/**
 * Lexical analyzer that works on a given text buffer. It allows to sequentially
 * parse a given character buffer by calling <tt>nextToken()</tt> that returns
 * the token-ids.
 * 
 * After the token is found by calling the <tt>nextToken</tt> method, the
 * <tt>getTokenOffset()</tt> method can be used to get the starting offset of
 * the current token in the buffer. The <tt>getTokenLength()</tt> gives the
 * length of the current token.
 * 
 * The heart of the analyzer is the <tt>parseToken()</tt> method which parses
 * the text and returns the token-id of the last token found. The
 * <tt>parseToken()</tt> method is called from the <tt>nextToken()</tt>. It
 * operates with two important variables. The <tt>offset</tt> variable
 * identifies the currently scanned character in the buffer. The
 * <tt>tokenOffset</tt> is the begining of the current token. The
 * <tt>state</tt> variable that identifies the current internal state of the
 * analyzer is set accordingly when the characters are parsed. If the
 * <tt>parseToken()</tt> recognizes a token, it returns its ID and the
 * <tt>tokenOffset</tt> is its begining in the buffer and
 * <tt>offset - tokenOffset</tt> is its length. When the token is processed
 * the value of <tt>tokenOffset</tt> is set to be the same as current value of
 * the <tt>offset</tt> and the parsing continues.
 * 
 * Internal states are the integer constants used internally by analyzer. They
 * are assigned to the <tt>state</tt> variable to express that the analyzer
 * has moved from one state to another. They are usually numbered starting from
 * zero but they don't have to. The only reserved value is -1 which is reserved
 * for the INIT state - the initial internal state of the analyzer.
 * 
 * There is also the support for defining the persistent info about the current
 * state of the analyzer. This info can be later used to restore the parsing
 * from some particular state instead of parsing from the begining of the
 * buffer. This feature is very useful if there are the modifications performed
 * in the document. The info is stored in the <tt>StateInfo</tt> interface
 * with the <tt>BaseStateInfo</tt> as the basic implementation. It enables to
 * get and set the two important values from the persistent point of view. The
 * first one is the value of the <tt>state</tt> variable. The other one is the
 * difference <tt>offset - tokenOffset</tt> which is called pre-scan. The
 * particular analyzer can define additional values important for the persistent
 * storage. The <tt>createStateInfo()</tt> can be overriden to create custom
 * state-info and <tt>loadState()</tt> and <tt>storeState()</tt> can be
 * overriden to get/set the additional values.
 * 
 * The <tt>load()</tt> method sets the buffer to be parsed. There is a special
 * parameter in the load() method called position that allows a relation of the
 * character buffer passed to the load() method and the position of the buffer's
 * data in the document. For this extended functionality the document must be
 * passed to the constructor of the lexical analyzer at some level.
 * 
 * 
 * @author Miloslav Metelka
 * @version 1.00
 */

public class Syntax {

  /** Is the state of analyzer equal to a given state info? */
  public static final int EQUAL_STATE = 0;

  /** Is the state of analyzer different from given state info? */
  public static final int DIFFERENT_STATE = 1;

  /** Initial internal state of the analyzer */
  public static final int INIT = -1;

  /**
   * Internal state of the lexical analyzer. At the begining it's set to INIT
   * value but it is changed by <tt>parseToken()</tt> as the characters are
   * processed one by one.
   */
  protected int state = INIT;

  /** Text buffer to scan */
  protected char buffer[];

  /** Current offset in the buffer */
  protected int offset;

  /** Offset holding the begining of the current token */
  protected int tokenOffset;

  /** This variable is the length of the token that was found */
  protected int tokenLength;

  /**
   * Path from which the found token-id comes from. The
   * <tt>TokenContext.getContextPath()</tt> can be used to get the path. If
   * the lexical analyzer doesn't use any children token-contexts it can
   * assign the path in the constructor.
   */
  protected TokenContextPath tokenContextPath;

  /**
   * Setting this flag to true means that there are currently no more buffers
   * available so that analyzer should return all the tokens including those
   * whose successful scanning would be otherwise left for later when the next
   * buffer will be available. Setting this flag to true ensures that all the
   * characters in the current buffer will be processed. The lexical analyzer
   * should on one hand process all the characters but on the other hand it
   * should "save" its context. For example if the scanner finds the unclosed
   * comment at the end of the buffer it should return the comment token but
   * stay in the "being in comment" internal state.
   */
  protected boolean lastBuffer;

  /** On which offset in the buffer scanning should stop. */
  protected int stopOffset;

  /**
   * The position in the document that logically corresponds to the stopOffset
   * value. If there's no relation to the document, it's -1. The reason why
   * the relation to the document's data is expressed through the stopOffset
   * to stopPosition relation is because the stopOffset is the only offset
   * that doesn't change rapidly in the operation of the lexical analyzer.
   */
  protected int stopPosition;

  /**
   * This variable can be populated by the parseToken() method in case the
   * user types an errorneous construction but it's clear what correct token
   * he meant to write. For example if the user writes a single '0x' it's an
   * errorneous construct but it's clear that the user wants to enter the
   * hexa-number. In this situation the parseToken() should report error, but
   * it should also set the supposedTokenID to the hexa-number token. This
   * information is used while drawing the text. If the caret stand inside or
   * around such token, it calls the getSupposedTokenID() after calling the
   * nextToken() and if it's non-null it uses it instead of the original
   * token.
   */
  protected TokenID supposedTokenID;

  /**
   * Function that should be called externally to scan the text. It manages
   * the call to parseToken() and cares about the proper setting of the
   * offsets. It can be extended to support any custom debugging required.
   */
  public TokenID nextToken() {
    // Return immediately when at the end of buffer
    if (tokenOffset >= stopOffset) {
      tokenLength = 0;
      return null; // signal no token found
    }

    // Divide non-debug and debug sections
    supposedTokenID = null;
    TokenID tokenID = parseToken();
    if (tokenID != null) { // regular token found
      tokenLength = offset - tokenOffset;
      tokenOffset = offset; // move to the next token
      if (tokenLength == 0) { // test for empty token
        return nextToken(); // repeat until non-empty token is found
      }
    }
    else { // EOT reached
      tokenLength = 0;
    }

    return tokenID;
  }

  /**
   * This is core function of analyzer and it returns either the token-id or
   * null to indicate that the end of buffer was found. The function scans the
   * active character and does one or more of the following actions: 1. change
   * internal analyzer state 2. set the token-context-path and return token-id
   * 3. adjust current position to signal different end of token; the
   * character that offset points to is not included in the token
   */
  protected TokenID parseToken() {
    return null;
  }

  /**
   * Load the state from syntax mark into analyzer. This method is used when
   * 
   * @param stateInfo
   *            info about the state of the lexical analyzer to load. It can
   *            be null to indicate there's no previous state so the analyzer
   *            starts from its initial state.
   * @param buffer
   *            buffer that will be scanned
   * @param offset
   *            offset of the first character that will be scanned
   * @param len
   *            length of the area to be scanned
   * @param lastBuffer
   *            whether this is the last buffer in the document. All the
   *            tokens will be returned including the last possibly incomplete
   *            one. If the data come from the document, the simple rule for
   *            this parameter is (doc.getLength() == stop-position) where
   *            stop-position is the position corresponding to the (offset +
   *            len) in the buffer that comes from the document data.
   * @param stopPosition
   *            position in the document that corresponds to (offset + len)
   *            offset in the provided buffer. It has only sense if the data
   *            in the buffer come from the document. It helps in writing the
   *            advanced analyzers that need to interact with some other data
   *            in the document than only those provided in the character
   *            buffer. If there is no relation to the document data, the
   *            stopPosition parameter must be filled with -1 which means an
   *            invalid value. The stop-position is passed (instead of
   *            start-position) because it doesn't change through the analyzer
   *            operation. It corresponds to the <tt>stopOffset</tt> that
   *            also doesn't change through the analyzer operation so any
   *            buffer-offset can be transferred to position by computing
   *            <tt>stopPosition + buffer-offset - stopOffset</tt> where
   *            stopOffset is the instance variable that is assigned to
   *            <tt>offset + len</tt> in the body of relocate().
   */
  public void load(StateInfo stateInfo, char buffer[], int offset, int len, boolean lastBuffer, int stopPosition) {
    this.buffer = buffer;
    this.offset = offset;
    this.tokenOffset = offset;
    this.stopOffset = offset + len;
    this.lastBuffer = lastBuffer;
    this.stopPosition = stopPosition;

    if (stateInfo != null) {
      loadState(stateInfo);
    }
    else {
      loadInitState();
    }
  }

  /**
   * Relocate scanning to another buffer. This is used to continue scanning
   * after previously reported EOT. Relocation delta between current offset
   * and the requested offset is computed and all the offsets are relocated.
   * If there's a non-zero preScan in the analyzer, it is a caller's
   * responsibility to provide all the preScan characters in the relocation
   * buffer.
   * 
   * @param buffer
   *            next buffer where the scan will continue.
   * @param offset
   *            offset where the scan will continue. It's not decremented by
   *            the current preScan.
   * @param len
   *            length of the area to be scanned. It's not extended by the
   *            current preScan.
   * @param lastBuffer
   *            whether this is the last buffer in the document. All the
   *            tokens will be returned including the last possibly incomplete
   *            one. If the data come from the document, the simple rule for
   *            this parameter is (doc.getLength() == stop-position) where
   *            stop-position is the position corresponding to the (offset +
   *            len) in the buffer that comes from the document data.
   * @param stopPosition
   *            position in the document that corresponds to (offset + len)
   *            offset in the provided buffer. It has only sense if the data
   *            in the buffer come from the document. It helps in writing the
   *            advanced analyzers that need to interact with some other data
   *            in the document than only those provided in the character
   *            buffer. If there is no relation to the document data, the
   *            stopPosition parameter must be filled with -1 which means an
   *            invalid value. The stop-position is passed (instead of
   *            start-position) because it doesn't change through the analyzer
   *            operation. It corresponds to the <tt>stopOffset</tt> that
   *            also doesn't change through the analyzer operation so any
   *            buffer-offset can be transferred to position by computing
   *            <tt>stopPosition + buffer-offset - stopOffset</tt> where
   *            stopOffset is the instance variable that is assigned to
   *            <tt>offset + len</tt> in the body of relocate().
   */
  public void relocate(char buffer[], int offset, int len, boolean lastBuffer, int stopPosition) {
    this.buffer = buffer;
    this.lastBuffer = lastBuffer;

    int delta = offset - this.offset; // delta according to current offset
    this.offset += delta;
    this.tokenOffset += delta;
    this.stopOffset = offset + len;
    this.stopPosition = stopPosition;
  }

  /** Get the current buffer */
  public char[] getBuffer() {
    return buffer;
  }

  /** Get the current scanning offset */
  public int getOffset() {
    return offset;
  }

  /** Get start of token in scanned buffer. */
  public int getTokenOffset() {
    return offset - tokenLength;
  }

  /** Get length of token in scanned buffer. */
  public int getTokenLength() {
    return tokenLength;
  }

  /** Get the token-context-path of the returned token. */
  public TokenContextPath getTokenContextPath() {
    return tokenContextPath;
  }

  public TokenID getSupposedTokenID() {
    return supposedTokenID;
  }

  /**
   * Get the pre-scan which is a number of characters between offset and
   * tokenOffset. If there's no more characters in the current buffer, the
   * analyzer returns EOT, but it can be in a state when there are already
   * some characters parsed at the end of the current buffer but the token is
   * still incomplete and it cannot be returned yet. The pre-scan value helps
   * to determine how many characters from the end of the current buffer
   * should be present at the begining of the next buffer so that the current
   * incomplete token can be returned as the first token when parsing the next
   * buffer.
   */
  public int getPreScan() {
    return offset - tokenOffset;
  }

  /**
   * Initialize the analyzer when scanning from the begining of the document
   * or when the state stored in syntax mark is null for some reason or to
   * explicitly reset the analyzer to the initial state. The offsets must not
   * be touched by this method.
   */
  public void loadInitState() {
    state = INIT;
  }

  public void reset() {
    tokenLength = stopOffset = tokenOffset = offset = 0;
    loadInitState();
  }

  /**
   * Load valid mark state into the analyzer. Offsets are already initialized
   * when this method is called. This method must get the state from the mark
   * and set it to the analyzer. Then it must decrease tokenOffset by the
   * preScan stored in the mark state.
   * 
   * @param markState
   *            mark state to be loaded into syntax. It must be non-null
   *            value.
   */
  public void loadState(StateInfo stateInfo) {
    state = stateInfo.getState();
    tokenOffset -= stateInfo.getPreScan();
  }

  /** Store state of this analyzer into given mark state. */
  public void storeState(StateInfo stateInfo) {
    stateInfo.setState(state);
    stateInfo.setPreScan(getPreScan());
  }

  /** Compare state of this analyzer to given state info */
  public int compareState(StateInfo stateInfo) {
    if (stateInfo != null) {
      return ((stateInfo.getState() == state) && stateInfo.getPreScan() == getPreScan()) ? EQUAL_STATE : DIFFERENT_STATE;
    }
    else {
      return DIFFERENT_STATE;
    }
  }

  /** Create state info appropriate for particular analyzer */
  public StateInfo createStateInfo() {
    return new BaseStateInfo();
  }

  /**
   * Get state name as string. It can be used for debugging purposes by
   * developer of new syntax analyzer. The states that this function
   * recognizes can include all constants used in analyzer so that it can be
   * used everywhere in analyzer to convert numbers to more practical strings.
   */
  public String getStateName(int stateNumber) {
    switch (stateNumber) {
    case INIT:
      return "INIT"; // NOI18N

    default:
      return "Unknown state " + stateNumber; // NOI18N
    }
  }

  /** Syntax information as String */
  public String toString() {
    return "tokenOffset=" + tokenOffset // NOI18N
        + ", offset=" + offset // NOI18N
        + ", state=" + getStateName(state) // NOI18N
        + ", stopOffset=" + stopOffset // NOI18N
        + ", lastBuffer=" + lastBuffer; // NOI18N
  }

  /**
   * Interface that stores two basic pieces of information about the state of
   * the whole lexical analyzer - its internal state and preScan.
   */
  public interface StateInfo {

    /** Get the internal state */
    public int getState();

    /** Store the internal state */
    public void setState(int state);

    /** Get the preScan value */
    public int getPreScan();

    /** Store the preScan value */
    public void setPreScan(int preScan);

  }

  /** Base implementation of the StateInfo interface */
  public static class BaseStateInfo implements StateInfo {

    /** analyzer state */
    private int state;

    /** Pre-scan length */
    private int preScan;

    public int getState() {
      return state;
    }

    public void setState(int state) {
      this.state = state;
    }

    public int getPreScan() {
      return preScan;
    }

    public void setPreScan(int preScan) {
      this.preScan = preScan;
    }

    public String toString(Syntax syntax) {
      return "state=" + syntax.getStateName(getState()) + ", preScan=" + getPreScan(); // NOI18N
    }

  }

}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.