SearchMethods.java :  » IDE » Schmortopf » Schmortopf » SearchResults » Java Open Source

Java Open Source » IDE » Schmortopf 
Schmortopf » Schmortopf » SearchResults » SearchMethods.java
package Schmortopf.SearchResults;


  /**
   *  Contains static search methods used by text searches.
   *
   */

import java.util.*;
import Schmortopf.Utility.Text.*;
import Shared.Logging.Log;                


public class SearchMethods
{

  // Available search methods:
  public final static String[] ApproximateSearchMethodNames = { "Edit-distance", "Soundex"}; // , "Metaphone", "Editex"
      

  // More would be ridiculous, cause the user would be confronted
  // with a huge tree. One search can go over thousands of single files searches.
  private final static int MaximumNumberOfHitsPerSingleSearch = 2000;



 /**
  *  @returns an array with all positions of found occurences of the
  *  rawSearchedText in the textBuffer.
  *
  *  @param doApproximateSearch
  *  @param approximateSearchMethodIndex is the index of the method in the array ApproximateSearchMethodNames
  *  @param tolerance is the tolerance that permit to identify approximate match.
  *
  */    
  public static int[] SearchTextOccurences( final String rawSearchedText,
                                            final boolean doCaseSensitiveSearch,
                                            final boolean doWholeWordsSearch,
                                            final boolean doApproximateSearch,
                                            final int approximateSearchMethodIndex,
                                            final int tolerance,
                                            final StringBuffer textBuffer,
                                            final boolean doReturnAfterFirstMatch )
  {

    return( doApproximateSearch ?
            SearchApproximateWordOccurences(rawSearchedText, approximateSearchMethodIndex, tolerance, doReturnAfterFirstMatch, textBuffer ):
            SearchExactTextOccurences(rawSearchedText, doCaseSensitiveSearch, doWholeWordsSearch, doReturnAfterFirstMatch, textBuffer )
    );
  }

                                            
                                            
                                            
                                            
  public final static boolean debug = false;
  /** perform an approximate word search, using soundex to locate words
      and editdistance to measure matching
      @param editTolerance 0 return exact match, 1 allow one error
        (insert char, delete char, ...)
  */                                        
  private static int[] SearchApproximateWordOccurences( final String rawSearchedText,
                                                        final int approximateSearchMethodIndex,
                                                        final int editTolerance,
                                                        final boolean doReturnAfterFirstMatch,
                                                        final StringBuffer textBuffer )
  {
    String searchTextUP = rawSearchedText.toUpperCase();
    String soundexToSearch = Soundex.GetSoundexEnglish( searchTextUP );
    Vector positionVector = new Vector();

    // first of all : do an exact search, because the approximate won't find everything,
    // for example steph will not match stephan with an edit distance of 1
    int[] exact = SearchExactTextOccurences(rawSearchedText, false, false,doReturnAfterFirstMatch, textBuffer);
    if(doReturnAfterFirstMatch && exact.length>0) return exact;
    for(int i=0; i<exact.length; i++)
    {
       positionVector.addElement(new Integer(exact[i]));
    }

    WordTokenizer wt = new WordTokenizer(textBuffer.toString().toUpperCase());

    if(approximateSearchMethodIndex==1)
    {
      // Soundex
      //
      while(wt.hasNext())
      {
        String word = wt.nextWord();
        String snd = Soundex.GetSoundexEnglish(word);

        if(soundexToSearch.equals(snd))
        {
           // metric, allow only up to editDistance
           int dst = EditDistance.EditDistance(word, searchTextUP, editTolerance);
           if(debug) Log.Info("   "+word+", "+dst);
           if(dst<=editTolerance && dst>0)
           {
              if(debug) Log.Info("++++++++++++   "+word+", "+dst);
              // add the new position, IF not already present from exact search,
              // this is reached here in excluding dst==0

              // position of match
              positionVector.addElement(new Integer(wt.getLastWordStartPos()));

              // Size limitation : Break when we reach MaximumNumberOfHits added entries.
              if( ( positionVector.size() >= MaximumNumberOfHitsPerSingleSearch ) ||
                  ( doReturnAfterFirstMatch )  )
              {
                 break;
              }
           }
        }

      }
    }
    else if(approximateSearchMethodIndex==0)
    {
      // Edit-Distance
      //                    
      while(wt.hasNext())
      {                   
        String word = wt.nextWord();
                        
        // directly compute distance
        int dst = EditDistance.EditDistance(word, searchTextUP, editTolerance);
        if(debug) Log.Info("   "+word+", "+dst);
        if(dst<=editTolerance && dst>0)
        {
           if(debug) Log.Info("****************   "+word+", "+dst);
           // add the new position, IF not already present from exact search,
           // this is reached here in excluding dst==0

           // position of match
           positionVector.addElement(new Integer(wt.getLastWordStartPos()));

           // Size limitation : Break when we reach MaximumNumberOfHits added entries.
           if( ( positionVector.size() >= MaximumNumberOfHitsPerSingleSearch ) ||
               ( doReturnAfterFirstMatch )  )
           {
              break;
           }
        }

      }
    }
    // return the positions found
    int[] positionArray = new int[ positionVector.size() ];
    for( int i=0; i < positionVector.size(); i++ )
    {
       positionArray[i] = ((Integer)positionVector.elementAt(i)).intValue();
    }
    // sort (because we parsed twice, an exact then an approximate)
    Arrays.sort(positionArray);
    return positionArray;
  }








  private static int[] SearchExactTextOccurences( final String rawSearchedText,
                                                  final boolean doCaseSensitiveSearch,
                                                  final boolean doWholeWordsSearch,
                                                  final boolean doReturnAfterFirstMatch,
                                                  final StringBuffer textBuffer )
  { 
    /*
    ystem.out.println("wwwww> ");
    ystem.out.println("wwwww> SearchMethods.SearchExactTextOccurences() called");
    ystem.out.println("wwwww> rawSearchedText.length()= " + rawSearchedText.length() );
    ystem.out.println("wwwww> textBuffer.length()= " + textBuffer.length() );
    ystem.out.println("wwwww> ");
    */
    
    int numberOfHits = 0;
    Vector positionVector = new Vector();
    boolean leftBorderIsWordDelimiter, rightBorderIsWordDelimiter;
    char testCharacter;
    int searchedTextLength = rawSearchedText.length();
    int textBufferLength = textBuffer.length();
    if( searchedTextLength > 0 )
     {
      String searchedText;
      String sourceText;
      if( doCaseSensitiveSearch )
       {
         searchedText = rawSearchedText;
         sourceText = textBuffer.toString(); // is a fast pointer assignment
       }
      else
       { 
         searchedText = rawSearchedText.toLowerCase();
         sourceText = textBuffer.toString().toLowerCase(); // is a fast pointer assignment
       }
      int searchIndex = 0;
      int newSearchIndex = 0;
      while( searchIndex < sourceText.length() )
       {     
         newSearchIndex = sourceText.indexOf( searchedText,searchIndex );
         if( newSearchIndex >= 0 ) // hit one
          {
            // quick test - temporary :
            if(  newSearchIndex < searchIndex )
             {
               Log.Error("FATAL SEARCH ERROR: z < indexOf(..,z) ABORTING");
               break;
             }
            searchIndex = newSearchIndex;
            // Add this position:
            if( doWholeWordsSearch )
             {
               // Check the borders
               leftBorderIsWordDelimiter = true;
               rightBorderIsWordDelimiter = true;
               if( searchIndex > 1 )
                {        
                  testCharacter = sourceText.charAt(searchIndex-1);
                  leftBorderIsWordDelimiter = !Character.isJavaIdentifierPart(testCharacter);
                }
               if( searchIndex + searchedTextLength < textBufferLength-1 )
                {
                  testCharacter = sourceText.charAt( searchIndex + searchedTextLength );
                  rightBorderIsWordDelimiter = !Character.isJavaIdentifierPart(testCharacter);
                }
               if( leftBorderIsWordDelimiter && rightBorderIsWordDelimiter )
                {
                  positionVector.addElement( new Integer(searchIndex) );
                  numberOfHits++;
                }
             }
            else
             {
               // no conditions, so add it:
               positionVector.addElement( new Integer(searchIndex) );
               numberOfHits++;
             }
            // Size limitation : Break when we reach MaximumNumberOfHits added entries.
            // This is IMPORTANT, cause it would be a vulnerability of the IDE, if not checked.
            // There is a global check outside, which ends the search over all files,
            // once an overall limit has been reached.
            if( ( positionVector.size() >= MaximumNumberOfHitsPerSingleSearch ) ||
                ( doReturnAfterFirstMatch && ( numberOfHits > 0 ) )  )
            {    
               break;
            }
            // There has been a possible match (either inside or outside the allowed intervalls)
            // For the next search run, we can increment by the searchtext length therefore :
            searchIndex += searchedTextLength;
           }
         else
          {
            break;
          }
       } // while
     } // if
    int[] positionArray = new int[ positionVector.size() ];
    for( int i=0; i < positionVector.size(); i++ )
     {
       positionArray[i] = ((Integer)positionVector.elementAt(i)).intValue();
     }
    return positionArray;
  } // SearchTextOccurences

             
             
             
             


} // SearchMethods
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.