BGrep: a regular expression search utility, like Unix grep : File Commands « File Input Output « Java






BGrep: a regular expression search utility, like Unix grep

    

/*
 * Copyright (c) 2004 David Flanagan.  All rights reserved.
 * This code is from the book Java Examples in a Nutshell, 3nd Edition.
 * It is provided AS-IS, WITHOUT ANY WARRANTY either expressed or implied.
 * You may study, use, and modify it for any non-commercial purpose,
 * including teaching and use in open-source projects.
 * You may distribute it non-commercially as long as you retain this notice.
 * For a commercial use license, or to purchase the book, 
 * please visit http://www.davidflanagan.com/javaexamples3.
 */
//package je3.nio;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

/**
 * BGrep: a regular expression search utility, like Unix grep, but
 * block-oriented instead of line-oriented. For any match found, the filename
 * and character position within the file (note: not the line number) are
 * printed along with the text that matched.
 * 
 * Usage: java je3.nio.BGrep [options] <pattern> <files>...
 * 
 * Options: -e <encoding> specifies and encoding. UTF-8 is the default -i
 * enables case-insensitive matching. Use -s also for non-ASCII text -s enables
 * strict (but slower) processing of non-ASCII characters
 * 
 * This program requires that each file to be searched fits into main memory,
 * and so does not work with extremely large files.
 */
public class BGrep {
  public static void main(String[] args) {
    String encodingName = "UTF-8"; // Default to UTF-8 encoding
    int flags = Pattern.MULTILINE; // Default regexp flags

    try { // Fatal exceptions are handled after this try block
      // First, process any options
      int nextarg = 0;
      while (args[nextarg].charAt(0) == '-') {
        String option = args[nextarg++];
        if (option.equals("-e")) {
          encodingName = args[nextarg++];
        } else if (option.equals("-i")) { // case-insensitive matching
          flags |= Pattern.CASE_INSENSITIVE;
        } else if (option.equals("-s")) { // Strict Unicode processing
          flags |= Pattern.UNICODE_CASE; // case-insensitive Unicode
          flags |= Pattern.CANON_EQ; // canonicalize Unicode
        } else {
          System.err.println("Unknown option: " + option);
          usage();
        }
      }

      // Get the Charset for converting bytes to chars
      Charset charset = Charset.forName(encodingName);

      // Next argument must be a regexp. Compile it to a Pattern object
      Pattern pattern = Pattern.compile(args[nextarg++], flags);

      // Require that at least one file is specified
      if (nextarg == args.length)
        usage();

      // Loop through each of the specified filenames
      while (nextarg < args.length) {
        String filename = args[nextarg++];
        CharBuffer chars; // This will hold complete text of the file
        try { // Handle per-file errors locally
          // Open a FileChannel to the named file
          FileInputStream stream = new FileInputStream(filename);
          FileChannel f = stream.getChannel();

          // Memory-map the file into one big ByteBuffer. This is
          // easy but may be somewhat inefficient for short files.
          ByteBuffer bytes = f.map(FileChannel.MapMode.READ_ONLY, 0, f.size());

          // We can close the file once it is is mapped into memory.
          // Closing the stream closes the channel, too.
          stream.close();

          // Decode the entire ByteBuffer into one big CharBuffer
          chars = charset.decode(bytes);
        } catch (IOException e) { // File not found or other problem
          System.err.println(e); // Print error message
          continue; // and move on to the next file
        }

        // This is the basic regexp loop for finding all matches in a
        // CharSequence. Note that CharBuffer implements CharSequence.
        // A Matcher holds state for a given Pattern and text.
        Matcher matcher = pattern.matcher(chars);
        while (matcher.find()) { // While there are more matches
          // Print out details of the match
          System.out.println(filename + ":" + // file name
              matcher.start() + ": " + // character pos
              matcher.group()); // matching text
        }
      }
    }
    // These are the things that can go wrong in the code above
    catch (UnsupportedCharsetException e) { // Bad encoding name
      System.err.println("Unknown encoding: " + encodingName);
    } catch (PatternSyntaxException e) { // Bad pattern
      System.err.println("Syntax error in search pattern:\n" + e.getMessage());
    } catch (ArrayIndexOutOfBoundsException e) { // Wrong number of arguments
      usage();
    }
  }

  /** A utility method to display invocation syntax and exit. */
  public static void usage() {
    System.err.println("Usage: java BGrep [-e <encoding>] [-i] [-s]" + " <pattern> <filename>...");
    System.exit(1);
  }
}

   
    
    
    
  








Related examples in the same category

1.Touch: set File Last Modified Time
2.File Copy in Java with NIO
3.File Copy in Java
4.Counts words in a file, outputs results in sorted form
5.Copying a file using channels and buffers
6.Copy files using Java IO APICopy files using Java IO API
7.Mimic the Unix Grep command
8.Grep tools
9.File concatenation
10.Compress files using the Java ZIP API
11.Delete file using Java IO API
12.Undent - remove leading spaces
13.TeePrintStream tees all PrintStream operations into a file, rather like the UNIX tee(1) command
14.Delete a file from within Java, with error handling
15.DirTree - directory lister, like UNIX ls or DOS and VMS dirDirTree - directory lister, like UNIX ls or DOS and VMS dir
16.Program to empty a directory
17.Report on a file's status in Java
18.Simple directory lister
19.Readonly Files
20.List root directoryList root directory
21.Rename a file in Java
22.FNFilter - directory lister using FilenameFilter
23.mkdir examples
24.Program to remove files matching a name in a directory
25.Ls directory lister modified to use FilenameFilterLs directory lister modified to use FilenameFilter
26.Move a File
27.Word Count
28.Diff: text file difference utility.Diff: text file difference utility.
29.Count chars in a File
30.Move File
31.Get file date and time
32.Return readable file size with selected value measure
33.Move a file
34.File Compressor
35.Recursive Delete File
36.Read number of lines from a File
37.Copy and overwrite files