ColumnExtractor.java :  » Media » tweet-detector » com » hrstc » trec » Java Open Source

Java Open Source » Media » tweet detector 
tweet detector » com » hrstc » trec » ColumnExtractor.java
package com.hrstc.trec;

import java.io.*;
import java.util.*;

import com.hrstc.utils.*;

/**
 * ColumnExtractor.java
 *
 * Created on November 3, 2004, 8:41 AM
 *
 * Extracts specified columns from a file
 * regular args: queries_even.txt.result 4 5 6 7 8 9 10
 * lucene args: queries.txt.result 4 5 6 7 8 9 10
 * desc run args: 
 *
 * @author  Neil O. Rouben
 */
public class ColumnExtractor
{
    
    /** Creates a new instance of ColumnExtractor */
    public ColumnExtractor()
    {
    }
    
    /**
     * Extracts specific columns; if there are only 2 args
     * extracts columns starting with <code> args[1] </code>
     * 
     * @param args
     * @throws IOException
     */
    public void process( String[] args ) throws IOException
    {
        BufferedReader in = new BufferedReader( new FileReader( args[0] ));
        FileWriter out = new FileWriter( args[0] + ".MatIn" );
        String outLine = null;
        
        while ( in.ready() )
        {
            String line = in.readLine();

            
            // Perform Column Extraction
            // Range Extraction 
            if ( args.length == 2 )
            {
                outLine = extractColumns( line, Integer.valueOf( args[1] ).intValue() );
            }
            // Specific columns extraction
            else
            {
                outLine = extractColumns( line, args );
            }
            
            out.write( outLine + "\n" );
        }
        
        in.close();
        out.close();
    }
    
    
    /**
     * Extracts specific columns
     * 
     * @param str 
     * @param args - from main
     * @return
     */
    private String extractColumns( String str, String[] args )
    {
        StringTokenizer tknzr = new StringTokenizer( str );
        StringBuffer buf = new StringBuffer();
        
        // Process Line
        int tokenCount = tknzr.countTokens();
        for ( int i = 0; i < tokenCount; i++ )
        {
            String iStr = String.valueOf( i );
            String token = tknzr.nextToken();
            if ( Utils.search( args, iStr ) != -1 )
            {
                buf.append( token + " " );
            }
        }
        
        return buf.toString();
    }
    
    
    
    /**
     * Extract columns range 
     * 
     * @param str
     * @param fromIdx - inclusive 1 based
     * @return
     */
    private String extractColumns( String str, int fromIdx )
    {
        StringBuffer buf = new StringBuffer();
        StringTokenizer tknzr = new StringTokenizer( str );
        
        // Extract tokens
        for ( int i = 1; tknzr.hasMoreElements(); i++ )
        {
            String token = tknzr.nextToken();
            if ( i >= fromIdx )
            {
                buf.append( token + " " );
            }
        }
        
        return buf.toString();
    }
    
    
    /**
     * Extracts specific columns; if there are only 2 args
     * extracts columns starting with <code> args[1] </code> 
     * 
     * @param args fileName col0 col2 (columns are 0 based)
     */
    public static void main(String[] args) throws IOException
    {
        ColumnExtractor extractor = new ColumnExtractor();
        extractor.process(args);
        System.out.println( "FIN" );
    }
    
}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.