Strings -- extract printable strings from binary file : String search « Data Type « Java






Strings -- extract printable strings from binary file

          
/*
 * Copyright (c) Ian F. Darwin, http://www.darwinsys.com/, 1996-2002.
 * All rights reserved. Software written by Ian F. Darwin and others.
 * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Java, the Duke mascot, and all variants of Sun's Java "steaming coffee
 * cup" logo are trademarks of Sun Microsystems. Sun's, and James Gosling's,
 * pioneering role in inventing and promulgating (and standardizing) the Java 
 * language and environment is gratefully acknowledged.
 * 
 * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
 * inventing predecessor languages C and C++ is also gratefully acknowledged.
 */

import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;

/**
 * Strings -- extract printable strings from binary file
 * 
 * @author Ian F. Darwin, http://www.darwinsys.com/
 * @version $Id: Strings.java,v 1.3 2004/02/08 23:57:29 ian Exp $
 */

public class Strings {

  protected int minLength = 4;

  /**
   * Return true if the character is printable IN ASCII. Not using
   * Character.isLetterOrDigit(); applies to all unicode ranges
   */
  protected boolean isStringChar(char ch) {
    if (ch >= 'a' && ch <= 'z')
      return true;
    if (ch >= 'A' && ch <= 'Z')
      return true;
    if (ch >= '0' && ch <= '9')
      return true;
    switch (ch) {
    case '/':
    case '-':
    case ':':
    case '.':
    case ',':
    case '_':
    case '$':
    case '%':
    case '\'':
    case '(':
    case ')':
    case '[':
    case ']':
    case '<':
    case '>':
      return true;
    }
    return false;
  }

  /** Process one file */
  protected void process(String fileName, InputStream inStream) {
    try {
      int i;
      char ch;

      // This line alone cuts the runtime by about 66% on large files.
      BufferedInputStream is = new BufferedInputStream(inStream);

      StringBuffer sb = new StringBuffer();

      // Read a byte, cast it to char, check if part of printable string.
      while ((i = is.read()) != -1) {
        ch = (char) i;
        if (isStringChar(ch) || (sb.length() > 0 && ch == ' '))
          // If so, build up string.
          sb.append(ch);
        else {
          // if not, see if anything to output.
          if (sb.length() == 0)
            continue;
          if (sb.length() >= minLength) {
            report(fileName, sb);
          }
          sb.setLength(0);
        }
      }
      is.close();
    } catch (IOException e) {
      System.out.println("IOException: " + e);
    }
  }

  /**
   * This simple main program looks after filenames and opening files and such
   * like for you.
   */
  public static void main(String[] av) {
    Strings o = new Strings();
    if (av.length == 0) {
      o.process("standard input", System.in);
    } else {
      for (int i = 0; i < av.length; i++)
        try {
          o.process(av[i], new FileInputStream(av[i]));
        } catch (FileNotFoundException e) {
          System.err.println(e);
        }
    }
  }

  /** Output a match. Made a separate method for use by subclassers. */
  protected void report(String fName, StringBuffer theString) {
    System.out.println(fName + ": " + theString);
  }
}


           
         
    
    
    
    
    
    
    
    
    
  








Related examples in the same category

1.String Region Match DemoString Region Match Demo
2.PalindromePalindrome
3.Look for particular sequences in sentencesLook for particular sequences in sentences
4.Java Search String
5.Java String endsWith
6.Java String startsWith
7.Search a substring Anywhere
8.Starts with, ignore case( regular expressions )
9.Ends with, ignore case( regular expressions )
10.Anywhere, ignore case( regular expressions )
11.Searching a String for a Character or a Substring
12.Not found returns -1
13.If a string contains a specific word
14.Not found
15.if a String starts with a digit or uppercase letter
16.Search a String to find the first index of any character in the given set of characters.
17.Search a String to find the first index of any character not in the given set of characters.
18.Searches a String for substrings delimited by a start and end tag, returning all matching substrings in an array.
19.Helper functions to query a strings end portion. The comparison is case insensitive.
20.Helper functions to query a strings start portion. The comparison is case insensitive.
21.Wrapper for arrays of ordered strings. This verifies the arrays and supports efficient lookups.
22.Returns an index into arra (or -1) where the character is not in the charset byte array.
23.Returns an int[] array of length segments containing the distribution count of the elements in unsorted int[] array with values between min and max (range).
24.Returns the next index of a character from the chars string
25.Finds the first index within a String, handling null.
26.Finds the last index within a String from a start position, handling null.
27.Finds the n-th index within a String, handling null.
28.Case insensitive check if a String ends with a specified suffix.
29.Case insensitive check if a String starts with a specified prefix.
30.Case insensitive removal of a substring if it is at the begining of a source string, otherwise returns the source string.
31.Case insensitive removal of a substring if it is at the end of a source string, otherwise returns the source string.
32.Check if a String ends with a specified suffix.
33.Check if a String starts with a specified prefix.
34.Determine if a String is contained in a String Collection
35.Determine if a String is contained in a String Collection, ignoring case
36.Determine if a String is contained in a String [], ignoring case
37.Determine if a String is contained in a String [], ignoring case or not as specified
38.Determine if a String is contained in a String[]
39.Determines if the specified string contains only Unicode letters or digits as defined by Character#isLetterOrDigit(char)
40.Determining the validity of various XML names
41.Return the nth index of the given token occurring in the given string
42.Find the earliest index of any of a set of potential substrings.
43.Find the latest index of any of a set of potential substrings.
44.Fast String Search
45.Performs continuous matching of a pattern in a given string.
46.Count match