Perl5Matcher.java :  » Development » Jakarta-oro-2.0.8 » org » apache » oro » text » regex » Java Open Source

Java Open Source » Development » Jakarta oro 2.0.8 
Jakarta oro 2.0.8 » org » apache » oro » text » regex » Perl5Matcher.java
/*
 * $Id: Perl5Matcher.java,v 1.27 2003/11/07 20:16:25 dfs Exp $
 *
 * ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2000 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro" 
 *    must not be used to endorse or promote products derived from this
 *    software without prior written permission. For written
 *    permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache" 
 *    or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their 
 *    name, without prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */


package org.apache.oro.text.regex;

import java.util.*;

/**
 * The Perl5Matcher class is used to match regular expressions
 * (conforming to the Perl5 regular expression syntax) generated by
 * Perl5Compiler.
 * <p>
 * Perl5Compiler and Perl5Matcher are designed with the intent that
 * you use a separate instance of each per thread to avoid the overhead
 * of both synchronization and concurrent access (e.g., a match that takes
 * a long time in one thread will block the progress of another thread with
 * a shorter match).  If you want to use a single instance of each
 * in a concurrent program, you must appropriately protect access to
 * the instances with critical sections.  If you want to share Perl5Pattern
 * instances between concurrently executing instances of Perl5Matcher, you
 * must compile the patterns with {@link Perl5Compiler#READ_ONLY_MASK}.
 *
 * @version @version@
 * @since 1.0
 * @see PatternMatcher
 * @see Perl5Compiler
 */
public final class Perl5Matcher implements PatternMatcher {
  private static final char __EOS = Character.MAX_VALUE;
  private static final int __INITIAL_NUM_OFFSETS = 20;

  private boolean __multiline = false, __lastSuccess = false;
  private boolean __caseInsensitive = false;
  private char __previousChar, __input[], __originalInput[];
  private Perl5Repetition __currentRep;
  private int __numParentheses, __bol, __eol, __currentOffset, __endOffset;

  private char[] __program;
  private int __expSize, __inputOffset, __lastParen;
  private int[] __beginMatchOffsets, __endMatchOffsets;
  private Stack __stack = new Stack();
  private Perl5MatchResult __lastMatchResult = null;

  private static boolean
    __compare(char[] s1, int s1Offs, char[] s2, int s2Offs, int n)
  {
    int cnt;

    for(cnt = 0; cnt < n; cnt++, s1Offs++, s2Offs++) {
      if(s1Offs >= s1.length)
  return false;
      if(s2Offs >= s2.length)
  return false;
      if(s1[s1Offs] != s2[s2Offs])
  return false;
    }

    return true;
  }

  private static int __findFirst(char[] input, int current, int endOffset,
         char[] mustString)
  {
    int count, saveCurrent;
    char ch;


    if(input.length == 0)
      return endOffset;

    ch = mustString[0];
    // Find the offset of the first character of the must string
    while(current < endOffset) {
      if(ch == input[current]){
  saveCurrent = current;
  count = 0;

  while(current < endOffset && count < mustString.length) {
    if(mustString[count] != input[current])
      break;
    ++count;
    ++current;
  }

  current = saveCurrent;

  if(count >= mustString.length)
    break;
      }
      ++current;
    }

    return current;
  }


  private void __pushState(int parenFloor) {
    int[] state;
    int stateEntries, paren;

    stateEntries = 3*(__expSize - parenFloor);
    if(stateEntries <= 0)
      state = new int[3];
    else
      state = new int[stateEntries + 3];

    state[0] = __expSize;
    state[1] = __lastParen;
    state[2] = __inputOffset;

    for(paren = __expSize; paren > parenFloor; --paren, stateEntries-=3) {
      state[stateEntries]     = __endMatchOffsets[paren];
      state[stateEntries + 1] = __beginMatchOffsets[paren];
      state[stateEntries + 2] = paren;
    }

    __stack.push(state);
  }


  private void __popState() {
    int[] state;
    int entry, paren;

    state = (int[])__stack.pop();

    __expSize     = state[0];
    __lastParen   = state[1];
    __inputOffset = state[2];

    for(entry = 3; entry < state.length; entry+=3) {
      paren = state[entry + 2];
      __beginMatchOffsets[paren] = state[entry + 1];

      if(paren <= __lastParen)
  __endMatchOffsets[paren] = state[entry];
    }

    for(paren = __lastParen + 1; paren <= __numParentheses; paren++) {
      if(paren > __expSize)
  __beginMatchOffsets[paren] = OpCode._NULL_OFFSET;
      __endMatchOffsets[paren] = OpCode._NULL_OFFSET;
    }
  }


  // Initialize globals needed before calling __tryExpression for first time
  private void __initInterpreterGlobals(Perl5Pattern expression, char[] input,
          int beginOffset, int endOffset,
          int currentOffset)
  {
    // Remove this hack after more efficient case-folding and unicode
    // character classes are implemented
    __caseInsensitive            = expression._isCaseInsensitive;
    __input                      = input;
    __endOffset                  = endOffset;
    __currentRep                 = new Perl5Repetition();
    __currentRep._numInstances   = 0;
    __currentRep._lastRepetition = null;
    __program                    = expression._program;
    __stack.setSize(0);

    // currentOffset should always be >= beginOffset and should
    // always be equal to zero when beginOffset equals 0, but we
    // make a weak attempt to protect against a violation of this
    // precondition
    if(currentOffset == beginOffset || currentOffset <= 0)
      __previousChar = '\n';
    else {
      __previousChar = input[currentOffset - 1];
      if(!__multiline && __previousChar == '\n')
  __previousChar = '\0';
    }

    __numParentheses    = expression._numParentheses;
    __currentOffset     = currentOffset;

    __bol = beginOffset;
    __eol = endOffset;

    // Ok, here we're using endOffset as a temporary variable.
    endOffset = __numParentheses + 1;
    if(__beginMatchOffsets == null || endOffset > __beginMatchOffsets.length) {
      if(endOffset < __INITIAL_NUM_OFFSETS)
  endOffset = __INITIAL_NUM_OFFSETS;
      __beginMatchOffsets = new int[endOffset];
      __endMatchOffsets   = new int[endOffset];      
    }
  }

  // Set the match result information.  Only call this if we successfully
  // matched.
  private void __setLastMatchResult() {
    int offs, maxEndOffs = 0;

    //endOffset+=dontTry;

    __lastMatchResult = new Perl5MatchResult(__numParentheses + 1);

    // This can happen when using Perl5StreamInput
    if(__endMatchOffsets[0] > __originalInput.length)
      throw new ArrayIndexOutOfBoundsException();

    __lastMatchResult._matchBeginOffset = __beginMatchOffsets[0];

    while(__numParentheses >= 0) {
      offs = __beginMatchOffsets[__numParentheses];

      if(offs >= 0)
  __lastMatchResult._beginGroupOffset[__numParentheses] =
    offs - __lastMatchResult._matchBeginOffset;
      else
  __lastMatchResult._beginGroupOffset[__numParentheses] =
    OpCode._NULL_OFFSET;

      offs = __endMatchOffsets[__numParentheses];

      if(offs >= 0) {
  __lastMatchResult._endGroupOffset[__numParentheses] =
    offs - __lastMatchResult._matchBeginOffset;
  if(offs > maxEndOffs && offs <= __originalInput.length)
    maxEndOffs = offs;
      } else
  __lastMatchResult._endGroupOffset[__numParentheses] =
    OpCode._NULL_OFFSET;

      --__numParentheses;
    }

    __lastMatchResult._match =
      new String(__originalInput, __beginMatchOffsets[0],
     maxEndOffs - __beginMatchOffsets[0]);

    // Free up for garbage collection
    __originalInput = null;
  }



  // Expects to receive a valid regular expression program.  No checking
  // is done to ensure validity.
  // __originalInput must be set before calling this method for
  // __lastMatchResult to be set correctly.
  // beginOffset marks the beginning of the string
  // currentOffset marks where to start the pattern search
  private boolean __interpret(Perl5Pattern expression, char[] input,
            int beginOffset, int endOffset,
            int currentOffset)
  {
    boolean success;
    int minLength = 0, dontTry = 0, offset;
    char ch, mustString[];

    __initInterpreterGlobals(expression, input, beginOffset, endOffset,
           currentOffset);

    success = false;
    mustString = expression._mustString;

  _mainLoop:
    while(true) {

      if(mustString != null &&
   ((expression._anchor & Perl5Pattern._OPT_ANCH) == 0 ||
    ((__multiline || 
     (expression._anchor & Perl5Pattern._OPT_ANCH_MBOL) != 0)
     && expression._back >= 0)))
  {

  __currentOffset =
    __findFirst(__input, __currentOffset, endOffset, mustString);
  
  if(__currentOffset >= endOffset) {
    if((expression._options & Perl5Compiler.READ_ONLY_MASK) == 0)
      expression._mustUtility++;
    success = false;
    break _mainLoop;
  } else if(expression._back >= 0) {
    __currentOffset-=expression._back;
    if(__currentOffset < currentOffset)
      __currentOffset = currentOffset;
    minLength = expression._back + mustString.length;
  } else if(!expression._isExpensive &&
      (expression._options & Perl5Compiler.READ_ONLY_MASK) == 0 &&
      (--expression._mustUtility < 0)) {
    // Be careful!  The preceding logical expression is constructed
    // so that mustUtility is only decremented if the expression is
    // compiled without READ_ONLY_MASK.
    mustString = expression._mustString = null;
    __currentOffset = currentOffset;
  } else {
    __currentOffset = currentOffset;
    minLength = mustString.length;
  }
      }

      if((expression._anchor & Perl5Pattern._OPT_ANCH) != 0) {
  if(__currentOffset == beginOffset && __tryExpression(beginOffset)) {
    success = true;
    break _mainLoop;
  } else if(__multiline
      || (expression._anchor & Perl5Pattern._OPT_ANCH_MBOL) != 0
      || (expression._anchor & Perl5Pattern._OPT_IMPLICIT) != 0)
    {
      if(minLength > 0)
        dontTry = minLength - 1;
      endOffset-=dontTry;

      if(__currentOffset > currentOffset)
        --__currentOffset;

      while(__currentOffset < endOffset) {
        if(__input[__currentOffset++] == '\n') {
    if(__currentOffset < endOffset &&
       __tryExpression(__currentOffset)) {
      success = true;
      break _mainLoop;
    }
        }
      }
    }

  break _mainLoop;
      }


      if(expression._startString != null) {
  mustString = expression._startString;
  if((expression._anchor & Perl5Pattern._OPT_SKIP) != 0) {
    ch = mustString[0];

    while(__currentOffset < endOffset) {
      if(ch == __input[__currentOffset]) {
        if(__tryExpression(__currentOffset)){
    success = true;
    break _mainLoop;
        }
        ++__currentOffset;
        while(__currentOffset < endOffset &&
        __input[__currentOffset] == ch)
        ++__currentOffset;
      }
      ++__currentOffset;
    }
  } else {

    while((__currentOffset =
     __findFirst(__input, __currentOffset, endOffset, mustString))
    < endOffset){
      if(__tryExpression(__currentOffset)) {
        success = true;
        break _mainLoop;
      }
      ++__currentOffset;
    }
  }

  break _mainLoop;
      }

      if((offset = expression._startClassOffset) != OpCode._NULL_OFFSET) {
  boolean doEvery, tmp;
  char op;

  doEvery = ((expression._anchor & Perl5Pattern._OPT_SKIP) == 0);

  if(minLength > 0)
    dontTry = minLength - 1;
  endOffset -= dontTry;
  tmp = true;

  switch(op = __program[offset]) {
  case OpCode._ANYOF:
    offset = OpCode._getOperand(offset);
    while(__currentOffset < endOffset) {
      ch = __input[__currentOffset];

      if(ch < 256 &&
         (__program[offset + (ch >> 4)] & (1 << (ch & 0xf))) == 0) {
        if(tmp && __tryExpression(__currentOffset)) {
    success = true;
    break _mainLoop;
        } else
    tmp = doEvery;
      } else
        tmp = true;
      ++__currentOffset;
    }

    break;

  case OpCode._ANYOFUN:
  case OpCode._NANYOFUN:
    offset = OpCode._getOperand(offset);
    while(__currentOffset < endOffset) {
      ch = __input[__currentOffset];

      if(__matchUnicodeClass(ch, __program, offset, op)) {
        if(tmp && __tryExpression(__currentOffset)) {
    success = true;
    break _mainLoop;
        } else
    tmp = doEvery;
      } else
        tmp = true;
      ++__currentOffset;
    }

    break;

  case OpCode._BOUND:
    if(minLength > 0) {
      ++dontTry;
      --endOffset;
    }

    if(__currentOffset != beginOffset) {
      ch = __input[__currentOffset - 1];
      tmp = OpCode._isWordCharacter(ch);
    } else
      tmp = OpCode._isWordCharacter(__previousChar);

    while(__currentOffset < endOffset) {
      ch = __input[__currentOffset];
      if(tmp != OpCode._isWordCharacter(ch)){
        tmp = !tmp;
        if(__tryExpression(__currentOffset)) {
    success = true;
    break _mainLoop;
        }
      }
      ++__currentOffset;
    }

    if((minLength > 0 || tmp) && 
       __tryExpression(__currentOffset)) {
      success = true;
      break _mainLoop;
    }
    break;

  case OpCode._NBOUND:
    if(minLength > 0) {
      ++dontTry;
      --endOffset;
    }

    if(__currentOffset != beginOffset) {
      ch = __input[__currentOffset - 1];
      tmp = OpCode._isWordCharacter(ch);
    } else
      tmp = OpCode._isWordCharacter(__previousChar);

    while(__currentOffset < endOffset) {
      ch = __input[__currentOffset];
      if(tmp != OpCode._isWordCharacter(ch))
        tmp = !tmp;
      else if(__tryExpression(__currentOffset)) {
        success = true;
        break _mainLoop;
      }

      ++__currentOffset;
    }

    if((minLength > 0 || !tmp) &&
       __tryExpression(__currentOffset)) {
      success = true;
      break _mainLoop;
    }
    break;

  case OpCode._ALNUM:
    while(__currentOffset < endOffset) {
      ch = __input[__currentOffset];
      if(OpCode._isWordCharacter(ch)) {
        if(tmp && __tryExpression(__currentOffset)) {
    success = true;
    break _mainLoop;
        } else
    tmp = doEvery;
      } else
        tmp = true;
      ++__currentOffset;
    }
    break;

  case OpCode._NALNUM:
    while(__currentOffset < endOffset) {
      ch = __input[__currentOffset];
      if(!OpCode._isWordCharacter(ch)) {
        if(tmp && __tryExpression(__currentOffset)) {
    success = true;
    break _mainLoop;
        } else
    tmp = doEvery;
      } else
        tmp = true;
      ++__currentOffset;
    }
    break;

  case OpCode._SPACE:
    while(__currentOffset < endOffset) {
      if(Character.isWhitespace(__input[__currentOffset])) {
        if(tmp && __tryExpression(__currentOffset)) {
    success = true;
    break _mainLoop;
        } else
    tmp = doEvery;
      } else
        tmp = true;
      ++__currentOffset;
    }
    break;

  case OpCode._NSPACE:
    while(__currentOffset < endOffset) {
      if(!Character.isWhitespace(__input[__currentOffset])) {
        if(tmp && __tryExpression(__currentOffset)) {
    success = true;
    break _mainLoop;
        } else
    tmp = doEvery;
      } else
        tmp = true;
      ++__currentOffset;
    }
    break;

  case OpCode._DIGIT:
    while(__currentOffset < endOffset) {
      if(Character.isDigit(__input[__currentOffset])) {
        if(tmp && __tryExpression(__currentOffset)) {
    success = true;
    break _mainLoop;
        } else
    tmp = doEvery;
      } else
        tmp = true;
      ++__currentOffset;
    }
    break;


  case OpCode._NDIGIT:
    while(__currentOffset < endOffset) {
      if(!Character.isDigit(__input[__currentOffset])) {
        if(tmp && __tryExpression(__currentOffset)) {
    success = true;
    break _mainLoop;
        } else
    tmp = doEvery;
      } else
        tmp = true;
      ++__currentOffset;
    }
    break;
  } // end switch

      } else {
  if(minLength > 0)
    dontTry = minLength - 1;
  endOffset-=dontTry;

  do {
    if(__tryExpression(__currentOffset)) {
      success = true;
      break _mainLoop;
    }
  } while(__currentOffset++ < endOffset);

      }


      break _mainLoop;
    } // end while

    __lastSuccess = success;
    __lastMatchResult = null;

    return success;
  }
  
  private boolean __matchUnicodeClass(char code, char __program[], 
           int offset ,char opcode)
  {
    boolean isANYOF = ( opcode == OpCode._ANYOFUN );

    while( __program[offset] != OpCode._END ){
      if( __program[offset] == OpCode._RANGE ){
  offset++;
  if((code >= __program[offset]) && (code <= __program[offset+1])){
    return isANYOF;
  } else {
    offset+=2;
  }

      } else if(__program[offset] == OpCode._ONECHAR) {
         offset++;
  if(__program[offset++] == code) return isANYOF;

      } else {
  isANYOF = (__program[offset] == OpCode._OPCODE) 
    ? isANYOF : !isANYOF;

  offset++;
  switch ( __program[offset++] ) {
  case OpCode._ALNUM:
    if(OpCode._isWordCharacter(code)) return isANYOF;
    break;
  case OpCode._NALNUM:
    if(!OpCode._isWordCharacter(code)) return isANYOF;
    break;
  case OpCode._SPACE:
    if(Character.isWhitespace(code)) return isANYOF;
    break;
  case OpCode._NSPACE:
    if(!Character.isWhitespace(code)) return isANYOF;
    break;
  case OpCode._DIGIT:
    if(Character.isDigit(code)) return isANYOF;
    break;
  case OpCode._NDIGIT:
    if(!Character.isDigit(code)) return isANYOF;
    break;
  case OpCode._ALNUMC:
    if(Character.isLetterOrDigit(code)) return isANYOF;
    break;
  case OpCode._ALPHA:
    if(Character.isLetter(code)) return isANYOF;
    break;
  case OpCode._BLANK:
    if(Character.isSpaceChar(code)) return isANYOF;
    break;
  case OpCode._CNTRL:
    if(Character.isISOControl(code)) return isANYOF;
    break;
  case OpCode._LOWER:
    if(Character.isLowerCase(code)) return isANYOF;
    // Remove this hack after more efficient case-folding and unicode
    // character classes are implemented
    if(__caseInsensitive && Character.isUpperCase(code))
      return isANYOF;
    break;
  case OpCode._UPPER:
    if(Character.isUpperCase(code)) return isANYOF;
    // Remove this hack after more efficient case-folding and unicode
    // character classes are implemented
    if(__caseInsensitive && Character.isLowerCase(code))
      return isANYOF;
    break;
  case OpCode._PRINT:
    if(Character.isSpaceChar(code)) return isANYOF;
          // Fall through to check if the character is alphanumeric,
    // or a punctuation mark.  Printable characters are either
    // alphanumeric, punctuation marks, or spaces.
  case OpCode._GRAPH:
    if(Character.isLetterOrDigit(code)) return isANYOF;
          // Fall through to check if the character is a punctuation mark.
          // Graph characters are either alphanumeric or punctuation.
  case OpCode._PUNCT:
    switch ( Character.getType(code) ) {
      case Character.DASH_PUNCTUATION:
      case Character.START_PUNCTUATION:
      case Character.END_PUNCTUATION:
      case Character.CONNECTOR_PUNCTUATION:
      case Character.OTHER_PUNCTUATION:
            case Character.MATH_SYMBOL:
            case Character.CURRENCY_SYMBOL:
            case Character.MODIFIER_SYMBOL:
        return isANYOF;
      default:
        break;
      }
    break;
  case OpCode._XDIGIT:
    if( (code >= '0' && code <= '9') ||
        (code >= 'a' && code <= 'f') ||
        (code >= 'A' && code <= 'F')) return isANYOF;
    break;
  case OpCode._ASCII:
    if(code < 0x80)return isANYOF;
  }
      } 
    }
    return !isANYOF;
  }
  
  private boolean __tryExpression(int offset) {
    int count;
    
    __inputOffset = offset;
    __lastParen   = 0;
    __expSize     = 0;

    if(__numParentheses > 0) {
      for(count=0; count <= __numParentheses; count++) {
  __beginMatchOffsets[count] = OpCode._NULL_OFFSET;
  __endMatchOffsets[count]   = OpCode._NULL_OFFSET;
      }
    }

    if(__match(1)){
      __beginMatchOffsets[0] = offset;
      __endMatchOffsets[0]   = __inputOffset;
      return true;
    }

    return false;
  }
    

  private int __repeat(int offset, int max) {
    int scan, eol, operand, ret;
    char ch;
    char op;

    scan = __inputOffset;
    eol  = __eol;

    if(max != Character.MAX_VALUE && max < eol - scan)
      eol = scan + max;

    operand = OpCode._getOperand(offset);

    switch(op = __program[offset]) {

    case OpCode._ANY:
      while(scan < eol && __input[scan] != '\n')
  ++scan;
      break;

    case OpCode._SANY:
      scan = eol;
      break;

    case OpCode._EXACTLY:
      ++operand;
      while(scan < eol && __program[operand] == __input[scan])
  ++scan;
      break;

    case OpCode._ANYOF:
      if(scan < eol && (ch = __input[scan]) < 256) {
  while((ch < 256  ) && (__program[operand + (ch >> 4)] & (1 << (ch & 0xf))) == 0) {
    if(++scan < eol)
      ch = __input[scan];
    else
      break;
  }
      }
      break;

    case OpCode._ANYOFUN:
    case OpCode._NANYOFUN:
      if(scan < eol) {
  ch = __input[scan];
  while(__matchUnicodeClass(ch, __program, operand, op)){
    if(++scan < eol)
      ch = __input[scan];
    else
      break;
  }
      }
      break;

    case OpCode._ALNUM:
      while(scan < eol && OpCode._isWordCharacter(__input[scan]))
  ++scan;
      break;

    case OpCode._NALNUM:
      while(scan < eol && !OpCode._isWordCharacter(__input[scan]))
  ++scan;
      break;

    case OpCode._SPACE:
      while(scan < eol && Character.isWhitespace(__input[scan]))
  ++scan;
      break;

    case OpCode._NSPACE:
      while(scan < eol && !Character.isWhitespace(__input[scan]))
  ++scan;
      break;

    case OpCode._DIGIT:
      while(scan < eol && Character.isDigit(__input[scan]))
  ++scan;
      break;

    case OpCode._NDIGIT:
      while(scan < eol && !Character.isDigit(__input[scan]))
  ++scan;
      break;

    default:
      break;

    }

    ret = scan - __inputOffset;
    __inputOffset = scan;

    return ret;
  }


  private boolean __match(int offset) {
    char nextChar, op;
    int scan, next, input, maxScan, current, line, arg;
    boolean inputRemains = true, minMod = false;
    Perl5Repetition rep;


    input    = __inputOffset;
    inputRemains = (input < __endOffset);
    nextChar = (inputRemains ? __input[input] : __EOS);

    scan     = offset;
    maxScan  = __program.length;

    while(scan < maxScan /*&& scan > 0*/){
      next = OpCode._getNext(__program, scan);

      switch(op = __program[scan]) {

      case OpCode._BOL:
  if(input == __bol ? __previousChar == '\n' :
     (__multiline && (inputRemains || input < __eol) && 
      __input[input - 1] == '\n'))
    break;
  return false;

      case OpCode._MBOL:
  if(input == __bol ? __previousChar == '\n' :
     ((inputRemains || input < __eol) && __input[input - 1] == '\n'))
    break;
  return false;

      case OpCode._SBOL:
  if(input == __bol && __previousChar == '\n')
    break;
  return false;

      case OpCode._GBOL:
  if(input == __bol)
    break;
  return true;

      case OpCode._EOL :
  if((inputRemains || input < __eol) && nextChar != '\n')
    return false;
  if(!__multiline && __eol - input > 1)
    return false;
  break;

      case OpCode._MEOL:
  if((inputRemains || input < __eol) && nextChar != '\n')
    return false;
  break;

      case OpCode._SEOL:
  if((inputRemains || input < __eol) && nextChar != '\n')
    return false;
  if(__eol - input > 1)
    return false;
  break;

      case OpCode._SANY:
  if(!inputRemains && input >= __eol)
    return false;
  inputRemains = (++input < __endOffset);
  nextChar = (inputRemains ? __input[input] : __EOS);
  break;

      case OpCode._ANY:
  if((!inputRemains && input >= __eol) || nextChar == '\n')
    return false;
  inputRemains = (++input < __endOffset);
  nextChar = (inputRemains ? __input[input] : __EOS);
  break;

      case OpCode._EXACTLY:
  current = OpCode._getOperand(scan);
  line = __program[current++];

  if(__program[current] != nextChar)
    return false;
  if(__eol - input < line)
    return false;

  if(line > 1 && !__compare(__program, current, __input, input, line))
    return false;

  input+=line;
  inputRemains = (input < __endOffset);
  nextChar = (inputRemains ? __input[input] : __EOS);
  break;

      case OpCode._ANYOF:
  current = OpCode._getOperand(scan);

  if(nextChar == __EOS && inputRemains)
    nextChar = __input[input];

  if(nextChar >= 256 || (__program[current + (nextChar >> 4)] &
      (1 << (nextChar & 0xf))) != 0)
    return false;

  if(!inputRemains && input >= __eol)
    return false;

  inputRemains = (++input < __endOffset);
  nextChar = (inputRemains ? __input[input] : __EOS);
  break;

      case OpCode._ANYOFUN:
      case OpCode._NANYOFUN:
  current = OpCode._getOperand(scan);

  if(nextChar == __EOS && inputRemains)
    nextChar = __input[input];

  if(!__matchUnicodeClass(nextChar, __program, current, op))
    return false;

  if(!inputRemains && input >= __eol)
    return false;

  inputRemains = (++input < __endOffset);
  nextChar = (inputRemains ? __input[input] : __EOS);
  break;

      case OpCode._ALNUM:
  if(!inputRemains)
    return false;
  if(!OpCode._isWordCharacter(nextChar))
    return false;
  inputRemains = (++input < __endOffset);
  nextChar = (inputRemains ? __input[input] : __EOS);
  break;

      case OpCode._NALNUM:
  if(!inputRemains && input >= __eol)
    return false;
  if(OpCode._isWordCharacter(nextChar))
    return false;
  inputRemains = (++input < __endOffset);
  nextChar = (inputRemains ? __input[input] : __EOS);
  break;


      case OpCode._NBOUND:
      case OpCode._BOUND:
  boolean a, b;

  if(input == __bol)
    a = OpCode._isWordCharacter(__previousChar);
  else
    a = OpCode._isWordCharacter(__input[input - 1]);

  b = OpCode._isWordCharacter(nextChar);

  if((a == b) == (__program[scan] == OpCode._BOUND))
    return false;
  break;

      case OpCode._SPACE:
  if(!inputRemains && input >= __eol)
    return false;
  if(!Character.isWhitespace(nextChar))
    return false;
  inputRemains = (++input < __endOffset);
  nextChar = (inputRemains ? __input[input] : __EOS);
  break;


      case OpCode._NSPACE:
  if(!inputRemains)
    return false;
  if(Character.isWhitespace(nextChar))
    return false;
  inputRemains = (++input < __endOffset);
  nextChar = (inputRemains ? __input[input] : __EOS);
  break;

      case OpCode._DIGIT:
  if(!Character.isDigit(nextChar))
    return false;
  inputRemains = (++input < __endOffset);
  nextChar = (inputRemains ? __input[input] : __EOS);
  break;

      case OpCode._NDIGIT:
  if(!inputRemains && input >= __eol)
    return false;
  if(Character.isDigit(nextChar))
    return false;
  inputRemains = (++input < __endOffset);
  nextChar = (inputRemains ? __input[input] : __EOS);
  break;

      case OpCode._REF:
  arg = OpCode._getArg1(__program, scan);
  current = __beginMatchOffsets[arg];

  if(current == OpCode._NULL_OFFSET)
    return false;

  if(__endMatchOffsets[arg] == OpCode._NULL_OFFSET)
    return false;

  if(current == __endMatchOffsets[arg])
    break;

  if(__input[current] != nextChar)
    return false;

  line = __endMatchOffsets[arg] - current;

  if(input + line > __eol)
    return false;

  if(line > 1 && !__compare(__input, current, __input, input, line))
    return false;

  input+=line;
  inputRemains = (input < __endOffset);
  nextChar     = (inputRemains ? __input[input] : __EOS);
  break;

      case OpCode._NOTHING:
  break;

      case OpCode._BACK:
  break;

      case OpCode._OPEN:
  arg = OpCode._getArg1(__program, scan);
  __beginMatchOffsets[arg] = input;

  if(arg > __expSize)
    __expSize = arg;
  break;

      case OpCode._CLOSE:
  arg = OpCode._getArg1(__program, scan);
  __endMatchOffsets[arg] = input;

  if(arg > __lastParen)
    __lastParen = arg;
  break;

      case OpCode._CURLYX:
  rep = new Perl5Repetition();
  rep._lastRepetition = __currentRep;
  __currentRep = rep;

  rep._parenFloor = __lastParen;
  rep._numInstances = -1;
  rep._min    = OpCode._getArg1(__program, scan);
  rep._max    = OpCode._getArg2(__program, scan);
  rep._scan   = OpCode._getNextOperator(scan) + 2;
  rep._next   = next;
  rep._minMod = minMod;
  // Must initialize to -1 because if we initialize to 0 and are
  // at the beginning of the input the OpCode._WHILEM case will
  // not work right.
  rep._lastLocation = -1;
  __inputOffset = input;

  // use minMod as temporary
  minMod = __match(OpCode._getPrevOperator(next));

  // leave scope call not pertinent?
  __currentRep = rep._lastRepetition;
  return minMod;

      case OpCode._WHILEM:
  rep = __currentRep;

  arg = rep._numInstances + 1;
  __inputOffset = input;

  if(input == rep._lastLocation) {
    __currentRep = rep._lastRepetition;
    line = __currentRep._numInstances;
    if(__match(rep._next))
      return true;
    __currentRep._numInstances = line;
    __currentRep = rep;
    return false;
  }

  if(arg < rep._min) {
    rep._numInstances = arg;
    rep._lastLocation = input;
    if(__match(rep._scan))
      return true;
    rep._numInstances = arg - 1;
    return false;
  }

  if(rep._minMod) {
    __currentRep = rep._lastRepetition;
    line = __currentRep._numInstances;
    if(__match(rep._next))
      return true;
    __currentRep._numInstances = line;
    __currentRep = rep;

    if(arg >= rep._max)
      return false;

    __inputOffset = input;
    rep._numInstances = arg;
    rep._lastLocation = input;

    if(__match(rep._scan))
      return true;

    rep._numInstances = arg - 1;
    return false;
  }

  if(arg < rep._max) {
    __pushState(rep._parenFloor);
    rep._numInstances = arg;
    rep._lastLocation = input;
    if(__match(rep._scan))
      return true;
    __popState();
    __inputOffset = input;
  }

  __currentRep = rep._lastRepetition;
  line = __currentRep._numInstances;
  if(__match(rep._next))
    return true;

  rep._numInstances = line;
  __currentRep = rep;
  rep._numInstances = arg - 1;
  return false;

      case OpCode._BRANCH:
  if(__program[next] != OpCode._BRANCH)
    next = OpCode._getNextOperator(scan);
  else {
    int lastParen;

    lastParen = __lastParen;

    do {

      __inputOffset = input;

      if(__match(OpCode._getNextOperator(scan)))
        return true;

      for(arg = __lastParen; arg > lastParen; --arg)
        //__endMatchOffsets[arg] = 0;
        __endMatchOffsets[arg] = OpCode._NULL_OFFSET;
      __lastParen = arg;

      scan = OpCode._getNext(__program, scan);
    } while(scan != OpCode._NULL_OFFSET &&
      __program[scan] == OpCode._BRANCH);
    return false;
  }

  break;

      case OpCode._MINMOD:
  minMod = true;
  break;


      case OpCode._CURLY:
      case OpCode._STAR:
      case OpCode._PLUS:
  if(op == OpCode._CURLY) {
    line = OpCode._getArg1(__program, scan);
    arg  = OpCode._getArg2(__program, scan);
    scan = OpCode._getNextOperator(scan) + 2;
  } else if(op == OpCode._STAR) {
    line = 0;
    arg  = Character.MAX_VALUE;
    scan = OpCode._getNextOperator(scan);
  } else {
    line = 1;
    arg  = Character.MAX_VALUE;
    scan = OpCode._getNextOperator(scan);
  }

  if(__program[next] == OpCode._EXACTLY) {
    nextChar = __program[OpCode._getOperand(next) + 1];
    current  = 0;
  } else {
    nextChar = __EOS;
    current  = -1000;
  }
  __inputOffset = input;

  if(minMod) {
    minMod = false;

    if(line > 0 && __repeat(scan, line) < line)
      return false;


    while(arg >= line || (arg == Character.MAX_VALUE && line > 0)) {
      // there may be a bug here with respect to
      // __inputOffset >= __endOffset, but it seems to be right for
      // now.  the issue is with __inputOffset being reset later.
      // is this test really supposed to happen here?
      if(current == -1000 || __inputOffset >= __endOffset ||
         __input[__inputOffset] == nextChar) {
        if(__match(next))
    return true;
      }

      __inputOffset = input + line;

      if(__repeat(scan, 1) != 0) {
        ++line;
        __inputOffset = input + line;
      } else
        return false;
    }

  } else {
    arg = __repeat(scan, arg);

    if(line < arg && OpCode._opType[__program[next]] == OpCode._EOL &&
       ((!__multiline && __program[next] != OpCode._MEOL) ||
              __program[next] == OpCode._SEOL))
      line = arg;

    while(arg >= line) {
      // there may be a bug here with respect to
      // __inputOffset >= __endOffset, but it seems to be right for
      // now.  the issue is with __inputOffset being reset later.
      // is this test really supposed to happen here?
      if(current == -1000 || __inputOffset >= __endOffset ||
         __input[__inputOffset] == nextChar) {
        if(__match(next))
    return true;
      }

      --arg;
      __inputOffset = input + arg;
    }
  }

  return false;

      case OpCode._SUCCEED:
      case OpCode._END:
  __inputOffset = input;
  // This enforces the rule that two consecutive matches cannot have
  // the same end offset.
  if(__inputOffset == __lastMatchInputEndOffset)
    return false;
  return true;

      case OpCode._IFMATCH:
  __inputOffset = input;
  scan = OpCode._getNextOperator(scan);
  if(!__match(scan))
    return false;
  break;

      case OpCode._UNLESSM:
  __inputOffset = input;
  scan = OpCode._getNextOperator(scan);
  if(__match(scan))
    return false;
  break;


      default:
  // todo: Need to throw an exception here.

      } // end switch

      //scan = (next > 0 ? next : 0);
      scan = next;
    } // end while scan



    return false;
  }


  /**
   * Set whether or not subsequent calls to {@link #matches matches()}
   * or {@link #contains contains()} should treat the input as
   * consisting of multiple lines.  The default behavior is for 
   * input to be treated as consisting of multiple lines.  This method
   * should only be called if the Perl5Pattern used for a match was
   * compiled without either of the Perl5Compiler.MULTILINE_MASK or
   * Perl5Compiler.SINGLELINE_MASK flags, and you want to alter the
   * behavior of how the <b>^</b>, <b>$</b>, and <b>.</b> metacharacters are
   * interpreted on the fly.  The compilation options used when compiling
   * a pattern ALWAYS override the behavior specified by setMultiline().  See
   * {@link Perl5Compiler} for more details.
   * <p>
   * @param multiline  If set to true treats the input as consisting of
   *        multiple lines with respect to the <b>^</b> and <b>$</b> 
   *        metacharacters.  If set to false treats the input as consisting
   *        of a single line with respect to the <b>^</b> and <b>$</b> 
   *        metacharacters.
   */
  public void setMultiline(boolean multiline) { __multiline = multiline; }


  /**
   * @return True if the matcher is treating input as consisting of multiple
   *         lines with respect to the <b>^</b> and <b>$</b> metacharacters,
   *         false otherwise.
   */
  public boolean isMultiline() { return __multiline; }

  char[] _toLower(char[] input) {
    int current;
    char[] inp;
    // todo:
    // Certainly not the best way to do case insensitive matching.
    // Must definitely change this in some way, but for now we
    // do what Perl does and make a copy of the input, converting
    // it all to lowercase.  This is truly better handled in the
    // compilation phase.
    inp = new char[input.length];
    System.arraycopy(input, 0, inp, 0, input.length);
    input = inp;

    // todo: Need to inline toLowerCase()
    for(current = 0; current < input.length; current++)
      if(Character.isUpperCase(input[current]))
  input[current] = Character.toLowerCase(input[current]);

    return input;
  }


  /**
   * Determines if a prefix of a string (represented as a char[])
   * matches a given pattern, starting from a given offset into the string.
   * If a prefix of the string matches the pattern, a MatchResult instance
   * representing the match is made accesible via
   * {@link #getMatch()}.
   * <p>
   * This method is useful for certain common token identification tasks
   * that are made more difficult without this functionality.
   * <p>
   * @param input  The char[] to test for a prefix match.
   * @param pattern  The Pattern to be matched.
   * @param offset The offset at which to start searching for the prefix.
   * @return True if input matches pattern, false otherwise.
   */
  public boolean matchesPrefix(char[] input, Pattern pattern, int offset) {
    Perl5Pattern expression;

    expression = (Perl5Pattern)pattern;
    __originalInput = input;
    if(expression._isCaseInsensitive)
      input = _toLower(input);

    __initInterpreterGlobals(expression, input, 0, input.length, offset);

    __lastSuccess = __tryExpression(offset);
    __lastMatchResult = null;

    return __lastSuccess;
  }


  /**
   * Determines if a prefix of a string (represented as a char[])
   * matches a given pattern.
   * If a prefix of the string matches the pattern, a MatchResult instance
   * representing the match is made accesible via
   * {@link #getMatch()}.
   * <p>
   * This method is useful for certain common token identification tasks
   * that are made more difficult without this functionality.
   * <p>
   * @param input  The char[] to test for a prefix match.
   * @param pattern  The Pattern to be matched.
   * @return True if input matches pattern, false otherwise.
   */
  public boolean matchesPrefix(char[] input, Pattern pattern) {
    return matchesPrefix(input, pattern, 0);
  }


  /**
   * Determines if a prefix of a string matches a given pattern.
   * If a prefix of the string matches the pattern, a MatchResult instance
   * representing the match is made accesible via
   * {@link #getMatch()}.
   * <p>
   * This method is useful for certain common token identification tasks
   * that are made more difficult without this functionality.
   * <p>
   * @param input  The String to test for a prefix match.
   * @param pattern  The Pattern to be matched.
   * @return True if input matches pattern, false otherwise.
   */
  public boolean matchesPrefix(String input, Pattern pattern) {
    return matchesPrefix(input.toCharArray(), pattern, 0);
  }

  /**
   * Determines if a prefix of a PatternMatcherInput instance
   * matches a given pattern.  If there is a match, a MatchResult instance
   * representing the match is made accesible via
   * {@link #getMatch()}.  Unlike the
   * {@link #contains(PatternMatcherInput, Pattern)}
   * method, the current offset of the PatternMatcherInput argument
   * is not updated.  However, unlike the
   * {@link #matches matches(PatternMatcherInput, Pattern)} method,
   * matchesPrefix() will start its search from the current offset
   * rather than the begin offset of the PatternMatcherInput.
   * <p>
   * This method is useful for certain common token identification tasks
   * that are made more difficult without this functionality.
   * <p>
   * @param input  The PatternMatcherInput to test for a prefix match.
   * @param pattern  The Pattern to be matched.
   * @return True if input matches pattern, false otherwise.
   */
  public boolean matchesPrefix(PatternMatcherInput input, Pattern pattern) {
    char[] inp;
    Perl5Pattern expression;

    expression = (Perl5Pattern)pattern;

    __originalInput = input._originalBuffer;
    if(expression._isCaseInsensitive) {
      if(input._toLowerBuffer == null)
  input._toLowerBuffer = _toLower(__originalInput);
      inp = input._toLowerBuffer;
    } else
      inp = __originalInput;

    __initInterpreterGlobals(expression, inp, input._beginOffset,
           input._endOffset, input._currentOffset);
    __lastSuccess = __tryExpression(input._currentOffset);
    __lastMatchResult = null;

    return __lastSuccess;
  }



  /**
   * Determines if a string (represented as a char[]) exactly 
   * matches a given pattern.  If
   * there is an exact match, a MatchResult instance
   * representing the match is made accesible via
   * {@link #getMatch()}.  The pattern must be
   * a Perl5Pattern instance, otherwise a ClassCastException will
   * be thrown.  You are not required to, and indeed should NOT try to
   * (for performance reasons), catch a ClassCastException because it
   * will never be thrown as long as you use a Perl5Pattern as the pattern
   * parameter.
   * <p>
   * <b>Note:</b> matches() is not the same as sticking a ^ in front of
   * your expression and a $ at the end of your expression in Perl5
   * and using the =~ operator, even though in many cases it will be
   * equivalent.  matches() literally looks for an exact match according
   * to the rules of Perl5 expression matching.  Therefore, if you have
   * a pattern <em>foo|foot</em> and are matching the input <em>foot</em>
   * it will not produce an exact match.  But <em>foot|foo</em> will
   * produce an exact match for either <em>foot</em> or <em>foo</em>.
   * Remember, Perl5 regular expressions do not match the longest
   * possible match.  From the perlre manpage:
   * <blockquote>
   *   Alternatives are tried from left to right, so the first
   *   alternative found for which the entire expression matches,
   *   is the one that is chosen. This means that alternatives
   *   are not necessarily greedy. For example: when matching
   *   foo|foot against "barefoot", only the "foo" part will
   *   match, as that is the first alternative tried, and it
   *   successfully matches the target string.
   * </blockquote>
   * <p>
   * @param input  The char[] to test for an exact match.
   * @param pattern  The Perl5Pattern to be matched.
   * @return True if input matches pattern, false otherwise.
   * @exception ClassCastException If a Pattern instance other than a
   *         Perl5Pattern is passed as the pattern parameter.
   */
  public boolean matches(char[] input, Pattern pattern) {
    Perl5Pattern expression;

    expression = (Perl5Pattern)pattern;
    __originalInput = input;
    if(expression._isCaseInsensitive)
      input = _toLower(input);

    __initInterpreterGlobals(expression, input, 0, input.length, 0);
    __lastSuccess = (__tryExpression(0) &&
         __endMatchOffsets[0] == input.length);
    __lastMatchResult = null;

    return __lastSuccess;
  }


  /**
   * Determines if a string exactly matches a given pattern.  If
   * there is an exact match, a MatchResult instance
   * representing the match is made accesible via
   * {@link #getMatch()}.  The pattern must be
   * a Perl5Pattern instance, otherwise a ClassCastException will
   * be thrown.  You are not required to, and indeed should NOT try to
   * (for performance reasons), catch a ClassCastException because it
   * will never be thrown as long as you use a Perl5Pattern as the pattern
   * parameter.
   * <p>
   * <b>Note:</b> matches() is not the same as sticking a ^ in front of
   * your expression and a $ at the end of your expression in Perl5
   * and using the =~ operator, even though in many cases it will be
   * equivalent.  matches() literally looks for an exact match according
   * to the rules of Perl5 expression matching.  Therefore, if you have
   * a pattern <em>foo|foot</em> and are matching the input <em>foot</em>
   * it will not produce an exact match.  But <em>foot|foo</em> will
   * produce an exact match for either <em>foot</em> or <em>foo</em>.
   * Remember, Perl5 regular expressions do not match the longest
   * possible match.  From the perlre manpage:
   * <blockquote>
   *   Alternatives are tried from left to right, so the first
   *   alternative found for which the entire expression matches,
   *   is the one that is chosen. This means that alternatives
   *   are not necessarily greedy. For example: when matching
   *   foo|foot against "barefoot", only the "foo" part will
   *   match, as that is the first alternative tried, and it
   *   successfully matches the target string.
   * </blockquote>
   * <p>
   * @param input  The String to test for an exact match.
   * @param pattern  The Perl5Pattern to be matched.
   * @return True if input matches pattern, false otherwise.
   * @exception ClassCastException If a Pattern instance other than a
   *         Perl5Pattern is passed as the pattern parameter.
   */
  public boolean matches(String input, Pattern pattern) {
    return matches(input.toCharArray(), pattern);
  }


  /**
   * Determines if the contents of a PatternMatcherInput instance
   * exactly matches a given pattern.  If
   * there is an exact match, a MatchResult instance
   * representing the match is made accesible via
   * {@link #getMatch()}.  Unlike the
   * {@link #contains(PatternMatcherInput, Pattern)}
   * method, the current offset of the PatternMatcherInput argument
   * is not updated.  You should remember that the region between
   * the begin (NOT the current) and end offsets of the PatternMatcherInput
   * will be tested for an exact match.
   * <p>
   * The pattern must be a Perl5Pattern instance, otherwise a
   * ClassCastException will be thrown.  You are not required to, and 
   * indeed should NOT try to (for performance reasons), catch a
   * ClassCastException because it will never be thrown as long as you use
   * a Perl5Pattern as the pattern parameter.
   * <p>
   * <b>Note:</b> matches() is not the same as sticking a ^ in front of
   * your expression and a $ at the end of your expression in Perl5
   * and using the =~ operator, even though in many cases it will be
   * equivalent.  matches() literally looks for an exact match according
   * to the rules of Perl5 expression matching.  Therefore, if you have
   * a pattern <em>foo|foot</em> and are matching the input <em>foot</em>
   * it will not produce an exact match.  But <em>foot|foo</em> will
   * produce an exact match for either <em>foot</em> or <em>foo</em>.
   * Remember, Perl5 regular expressions do not match the longest
   * possible match.  From the perlre manpage:
   * <blockquote>
   *   Alternatives are tried from left to right, so the first
   *   alternative found for which the entire expression matches,
   *   is the one that is chosen. This means that alternatives
   *   are not necessarily greedy. For example: when matching
   *   foo|foot against "barefoot", only the "foo" part will
   *   match, as that is the first alternative tried, and it
   *   successfully matches the target string.
   * </blockquote>
   * <p>
   * @param input  The PatternMatcherInput to test for a match.
   * @param pattern  The Perl5Pattern to be matched.
   * @return True if input matches pattern, false otherwise.
   * @exception ClassCastException If a Pattern instance other than a
   *         Perl5Pattern is passed as the pattern parameter.
   */
  public boolean matches(PatternMatcherInput input, Pattern pattern) {
    char[] inp;
    Perl5Pattern expression;

    expression = (Perl5Pattern)pattern;

    __originalInput = input._originalBuffer;
    if(expression._isCaseInsensitive) {
      if(input._toLowerBuffer == null)
  input._toLowerBuffer = _toLower(__originalInput);
      inp = input._toLowerBuffer;
    } else
      inp = __originalInput;

    __initInterpreterGlobals(expression, inp, input._beginOffset,
           input._endOffset, input._beginOffset);

    __lastMatchResult = null;

    if(__tryExpression(input._beginOffset)) {
      if(__endMatchOffsets[0] == input._endOffset ||
   input.length() == 0 || input._beginOffset == input._endOffset) {
  __lastSuccess = true;
  return true;
      }
    }

    __lastSuccess = false;

    return false;
  }



  /**
   * Determines if a string contains a pattern.  If the pattern is
   * matched by some substring of the input, a MatchResult instance
   * representing the <b> first </b> such match is made acessible via 
   * {@link #getMatch()}.  If you want to access
   * subsequent matches you should either use a PatternMatcherInput object
   * or use the offset information in the MatchResult to create a substring
   * representing the remaining input.  Using the MatchResult offset 
   * information is the recommended method of obtaining the parts of the
   * string preceeding the match and following the match.
   * <p>
   * The pattern must be a Perl5Pattern instance, otherwise a
   * ClassCastException will be thrown.  You are not required to, and 
   * indeed should NOT try to (for performance reasons), catch a
   * ClassCastException because it will never be thrown as long as you use
   * a Perl5Pattern as the pattern parameter.
   * <p>
   * @param input  The String to test for a match.
   * @param pattern  The Perl5Pattern to be matched.
   * @return True if the input contains a pattern match, false otherwise.
   * @exception ClassCastException If a Pattern instance other than a
   *         Perl5Pattern is passed as the pattern parameter.
   */
  public boolean contains(String input, Pattern pattern) {
    return contains(input.toCharArray(), pattern);
  }


  /**
   * Determines if a string (represented as a char[]) contains a pattern.
   * If the pattern is
   * matched by some substring of the input, a MatchResult instance
   * representing the <b> first </b> such match is made acessible via 
   * {@link #getMatch()}.  If you want to access
   * subsequent matches you should either use a PatternMatcherInput object
   * or use the offset information in the MatchResult to create a substring
   * representing the remaining input.  Using the MatchResult offset 
   * information is the recommended method of obtaining the parts of the
   * string preceeding the match and following the match.
   * <p>
   * The pattern must be a Perl5Pattern instance, otherwise a
   * ClassCastException will be thrown.  You are not required to, and 
   * indeed should NOT try to (for performance reasons), catch a
   * ClassCastException because it will never be thrown as long as you use
   * a Perl5Pattern as the pattern parameter.
   * <p>
   * @param input  The char[] to test for a match.
   * @param pattern  The Perl5Pattern to be matched.
   * @return True if the input contains a pattern match, false otherwise.
   * @exception ClassCastException If a Pattern instance other than a
   *         Perl5Pattern is passed as the pattern parameter.
   */
  public boolean contains(char[] input, Pattern pattern) {
    Perl5Pattern expression;

    expression = (Perl5Pattern)pattern;

    __originalInput = input;

    if(expression._isCaseInsensitive)
      input = _toLower(input);

    return __interpret(expression, input, 0, input.length, 0);
  }


  private static final int __DEFAULT_LAST_MATCH_END_OFFSET = -100;
  private int __lastMatchInputEndOffset = __DEFAULT_LAST_MATCH_END_OFFSET;
  /**
   * Determines if the contents of a PatternMatcherInput, starting from the
   * current offset of the input contains a pattern.
   * If a pattern match is found, a MatchResult
   * instance representing the <b>first</b> such match is made acessible via 
   * {@link #getMatch()}.  The current offset of the
   * PatternMatcherInput is set to the offset corresponding to the end
   * of the match, so that a subsequent call to this method will continue
   * searching where the last call left off.  You should remember that the
   * region between the begin and end offsets of the PatternMatcherInput are
   * considered the input to be searched, and that the current offset
   * of the PatternMatcherInput reflects where a search will start from.
   * Matches extending beyond the end offset of the PatternMatcherInput
   * will not be matched.  In other words, a match must occur entirely
   * between the begin and end offsets of the input.  See
   * {@link PatternMatcherInput} for more details.
   * <p>
   * As a side effect, if a match is found, the PatternMatcherInput match
   * offset information is updated.  See the
   * {@link PatternMatcherInput#setMatchOffsets(int, int)}
   * method for more details.
   * <p>
   * The pattern must be a Perl5Pattern instance, otherwise a
   * ClassCastException will be thrown.  You are not required to, and 
   * indeed should NOT try to (for performance reasons), catch a
   * ClassCastException because it will never be thrown as long as you use
   * a Perl5Pattern as the pattern parameter.
   * <p>
   * This method is usually used in a loop as follows:
   * <blockquote><pre>
   * PatternMatcher matcher;
   * PatternCompiler compiler;
   * Pattern pattern;
   * PatternMatcherInput input;
   * MatchResult result;
   *
   * compiler = new Perl5Compiler();
   * matcher  = new Perl5Matcher();
   *
   * try {
   *   pattern = compiler.compile(somePatternString);
   * } catch(MalformedPatternException e) {
   *   System.err.println("Bad pattern.");
   *   System.err.println(e.getMessage());
   *   return;
   * }
   *
   * input   = new PatternMatcherInput(someStringInput);
   *
   * while(matcher.contains(input, pattern)) {
   *   result = matcher.getMatch();  
   *   // Perform whatever processing on the result you want.
   * }
   *
   * </pre></blockquote>
   * <p>
   * @param input  The PatternMatcherInput to test for a match.
   * @param pattern  The Pattern to be matched.
   * @return True if the input contains a pattern match, false otherwise.
   * @exception ClassCastException If a Pattern instance other than a
   *         Perl5Pattern is passed as the pattern parameter.
   */
  public boolean contains(PatternMatcherInput input, Pattern pattern) {
    char[] inp;
    Perl5Pattern expression;
    boolean matchFound;


    //if(input.length() > 0) {
    // We want to allow a null string to match at the end of the input
    // which is why we don't check endOfInput.  Not sure if this is a
    // safe thing to do or not.
    if(input._currentOffset > input._endOffset)
      return false;
    //}
    /* else 
      if(input._endOfInput())
  return false;
  */
    expression = (Perl5Pattern)pattern;
    __originalInput = input._originalBuffer;

    // Todo:
    // Really should only reduce to lowercase that part of the
    // input that is necessary, instead of the whole thing.
    // Adjust MatchResult offsets accordingly.  Actually, pass an adjustment
    // value to __interpret.
    __originalInput = input._originalBuffer;
    if(expression._isCaseInsensitive) {
      if(input._toLowerBuffer == null)
  input._toLowerBuffer = _toLower(__originalInput);
      inp = input._toLowerBuffer;
    } else
      inp = __originalInput;

    __lastMatchInputEndOffset = input.getMatchEndOffset();

    matchFound =
      __interpret(expression, inp, input._beginOffset, input._endOffset,
      input._currentOffset);

    if(matchFound) {
      input.setCurrentOffset(__endMatchOffsets[0]);
      input.setMatchOffsets(__beginMatchOffsets[0], __endMatchOffsets[0]);
    } else {
      input.setCurrentOffset(input._endOffset + 1);
    }

    // Restore so it doesn't interfere with other unrelated matches.
    __lastMatchInputEndOffset = __DEFAULT_LAST_MATCH_END_OFFSET;

    return matchFound;
  }


  /**
   * Fetches the last match found by a call to a matches() or contains()
   * method.  If you plan on modifying the original search input, you
   * must call this method BEFORE you modify the original search input,
   * as a lazy evaluation technique is used to create the MatchResult.
   * This reduces the cost of pattern matching when you don't care about
   * the actual match and only care if the pattern occurs in the input.
   * Otherwise, a MatchResult would be created for every match found,
   * whether or not the MatchResult was later used by a call to getMatch().
   * <p>
   * @return A MatchResult instance containing the pattern match found
   *         by the last call to any one of the matches() or contains()
   *         methods.  If no match was found by the last call, returns
   *         null. 
   */
  public MatchResult getMatch() { 
    if(!__lastSuccess)
      return null;

    if(__lastMatchResult == null)
      __setLastMatchResult();

    return __lastMatchResult;
  }

}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.