Example usage for org.apache.commons.io FileUtils lineIterator

List of usage examples for org.apache.commons.io FileUtils lineIterator

Introduction

In this page you can find the example usage for org.apache.commons.io FileUtils lineIterator.

Prototype

public static LineIterator lineIterator(File file) throws IOException 

Source Link

Document

Returns an Iterator for the lines in a File using the default encoding for the VM.

Usage

From source file:com.sangupta.fileanalysis.formats.ApacheLogFileHandler.java

@Override
public void initialize(Database database, File file) {
    super.setDBAndFile(database, file);
    try {//from w  ww  .  j a  va2 s. c  o  m
        iterator = FileUtils.lineIterator(this.file);
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:avantssar.aslanpp.testing.DiskSpecificationsProvider.java

private boolean isSpec(File f) {
    boolean isSpecification = false;
    try {/*from   ww w  . j  a v  a  2 s. c  o m*/
        LineIterator li = FileUtils.lineIterator(f);
        while (li.hasNext()) {
            String line = li.nextLine();
            if (line.trim().length() > 0) {
                if (line.trim().startsWith("%")) {
                    continue;
                } else {
                    if (line.trim().startsWith("specification")) {
                        isSpecification = true;
                    }
                    break;
                }
            }
        }
    } catch (IOException e) {
        Debug.logger.error("Failed to decide if specification or not: " + f.getAbsolutePath(), e);
    }
    return isSpecification;
}

From source file:at.ac.ait.ubicity.fileloader.FileLoader.java

/**
 * // w w  w .  ja  v a 2  s.  c  o  m
 * @param _fileInfo A FileInformation object representing usage information on the file we are supposed to load: line count already ingested, last usage time...
 * @param _keySpace Cassandra key space into which to ingest
 * @param _host Cassandra host / server
 * @param _batchSize MutationBatch size
 * @throws Exception Shouldn't happen, although the Disruptor may throw an Exception under duress
 */
@SuppressWarnings("unchecked")
public final static void load(final FileInformation _fileInfo, final String _keySpace, final String _host,
        final int _batchSize) throws Exception {

    if (!cassandraInitialized) {
        keySpace = AstyanaxInitializer.doInit("Test Cluster", _host, _keySpace);
        cassandraInitialized = true;
    }

    LongTimeStampSorter tsSorter = new LongTimeStampSorter();
    Thread tTSSorter = new Thread(tsSorter);
    tTSSorter.setPriority(Thread.MAX_PRIORITY - 1);
    tTSSorter.setName("long timestamp sorter ");
    tTSSorter.start();
    //get the log id from the file's URI
    final String log_id = _fileInfo.getURI().toString();

    final MutationBatch batch = keySpace.prepareMutationBatch();

    logger.info("got keyspace " + keySpace.getKeyspaceName() + " from Astyanax initializer");

    final LineIterator onLines = FileUtils.lineIterator(new File(_fileInfo.getURI()));

    final ExecutorService exec = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() * 2);

    ColumnFamily crawl_stats = null;

    AggregationJob aggregationJob = new AggregationJob(keySpace, crawl_stats);
    Thread tAggJob = new Thread(aggregationJob);
    tAggJob.setName("Monitrix loader / aggregation job ");
    tAggJob.setPriority(Thread.MIN_PRIORITY + 1);
    tAggJob.start();
    logger.info("[FILELOADER] started aggregation job, ring buffer running");

    final Disruptor<SingleLogLineAsString> disruptor = new Disruptor(SingleLogLineAsString.EVENT_FACTORY,
            (int) Math.pow(TWO, 17), exec);
    SingleLogLineAsStringEventHandler.batch = batch;
    SingleLogLineAsStringEventHandler.keySpace = keySpace;
    SingleLogLineAsStringEventHandler.batchSize = _batchSize;
    SingleLogLineAsStringEventHandler.LOG_ID = log_id;
    SingleLogLineAsStringEventHandler.tsSorter = tsSorter;
    SingleLogLineAsStringEventHandler.aggregationJob = aggregationJob;

    //The EventHandler contains the actual logic for ingesting
    final EventHandler<SingleLogLineAsString> handler = new SingleLogLineAsStringEventHandler();

    disruptor.handleEventsWith(handler);

    //get our Aggregate job in place

    //we are almost ready to start
    final RingBuffer<SingleLogLineAsString> rb = disruptor.start();

    int _lineCount = 0;
    long _start, _lapse;
    _start = System.nanoTime();

    int _linesAlreadyProcessed = _fileInfo.getLineCount();

    //cycle through the lines already processed
    while (_lineCount < _linesAlreadyProcessed) {
        onLines.nextLine();
        _lineCount++;
    }

    //now get down to the work we actually must do, and fill the ring buffer
    logger.info("begin proccessing of file " + _fileInfo.getURI() + " @line #" + _lineCount);
    while (onLines.hasNext()) {

        final long _seq = rb.next();
        final SingleLogLineAsString event = rb.get(_seq);
        event.setValue(onLines.nextLine());
        rb.publish(_seq);
        _lineCount++;
    }
    _lapse = System.nanoTime() - _start;
    logger.info("ended proccessing of file " + _fileInfo.getURI() + " @line #" + _lineCount);

    //stop, waiting for last threads still busy to finish their work
    disruptor.shutdown();

    //update the file info, this will  land in the cache
    _fileInfo.setLineCount(_lineCount);
    _fileInfo.setLastAccess(System.currentTimeMillis());
    int _usageCount = _fileInfo.getUsageCount();
    _fileInfo.setUsageCount(_usageCount++);

    //make sure we release resources
    onLines.close();

    logger.info(
            "handled " + (_lineCount - _linesAlreadyProcessed) + " log lines in " + _lapse + " nanoseconds");

    //now go to aggregation step
    SortedSet<Long> timeStamps = new TreeSet(tsSorter.timeStamps);

    long _minTs = timeStamps.first();
    long _maxTs = timeStamps.last();
    logger.info("**** min TimeStamp = " + _minTs);
    logger.info("**** max TimeStamp = " + _maxTs);

    StatsTableActualizer.update(_fileInfo.getURI().toString(), _minTs, _maxTs, _lineCount);

    //        AggregationJob aggJob = new AggregationJob( keySpace, _host, _batchSize );
    //        Thread tAgg = new Thread( aggJob );
    //        tAgg.setName( "aggregation job " );
    //        tAgg.setPriority( Thread.MAX_PRIORITY - 1 );
    //        tAgg.start();

}

From source file:ctrus.pa.bow.en.EnBagOfWords.java

@Override
public void create() {
    try {// ww  w.  j  a  va 2s  .c o  m
        Collection<File> srcfiles = getSourceDocuments("*");
        int totalFiles = srcfiles.size();
        int currentFile = 0;
        CtrusHelper.printToConsole("Number of files to process - " + srcfiles.size());

        for (File srcFile : srcfiles) {

            // Read each line
            Iterator<String> lines = FileUtils.lineIterator(srcFile);

            if (_options.hasOption(DefaultOptions.DOCUMENT_PER_LINE)) {
                while (lines.hasNext()) {
                    String line = lines.next();
                    String delimiter = " ";
                    if (_options.hasOption(DefaultOptions.DOCUMENT_ID_DELIMITER))
                        delimiter = _options.getOption(DefaultOptions.DOCUMENT_ID_DELIMITER);
                    String[] docAndContent = line.split(delimiter);
                    if (docAndContent.length > 1 && !docAndContent[1].isEmpty()) {

                        String docref = getDocumentId(docAndContent[0]);

                        // Add document to the vocabulary first before adding terms
                        Vocabulary.getInstance().addDocument(docref, docAndContent[0]);

                        addTerms(docAndContent[1].split("\\p{Space}"), docref);
                        writeToOutput(docref);
                        reset();
                    }
                }
            } else {
                // Add document to file name mapping
                String fileName = srcFile.getName();
                //fileName = fileName.substring(0, fileName.lastIndexOf("."));

                String docref = getDocumentId(fileName);

                // Add document to the vocabulary first before adding terms
                Vocabulary.getInstance().addDocument(docref, fileName);

                while (lines.hasNext()) {
                    String line = lines.next();
                    addTerms(line.split("\\p{Space}"), docref);
                }

                writeToOutput(docref); // Write BOW to output file
                reset(); // Reusing BOW, make sure to reset

            }

            // Update the counter and print progress
            currentFile++;
            CtrusHelper.progressMonitor("Progress - ", currentFile, totalFiles);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:io.druid.segment.realtime.firehose.LocalFirehoseFactory.java

@Override
public Firehose connect(StringInputRowParser firehoseParser) throws IOException {
    log.info("Searching for all [%s] in and beneath [%s]", filter, baseDir.getAbsoluteFile());

    Collection<File> foundFiles = FileUtils.listFiles(baseDir.getAbsoluteFile(), new WildcardFileFilter(filter),
            TrueFileFilter.INSTANCE);//from  ww  w .  j  a v  a  2s . c om

    if (foundFiles == null || foundFiles.isEmpty()) {
        throw new ISE("Found no files to ingest! Check your schema.");
    }
    log.info("Found files: " + foundFiles);

    final LinkedList<File> files = Lists.newLinkedList(foundFiles);

    return new FileIteratingFirehose(new Iterator<LineIterator>() {
        @Override
        public boolean hasNext() {
            return !files.isEmpty();
        }

        @Override
        public LineIterator next() {
            try {
                return FileUtils.lineIterator(files.poll());
            } catch (Exception e) {
                throw Throwables.propagate(e);
            }
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }, firehoseParser);
}

From source file:com.fides.Agent.java

private void dumpPropfileContents(File propFile) {
    LineIterator iter = null;/*from  w  w  w.  j  av a 2 s . co m*/
    try {
        iter = FileUtils.lineIterator(propFile);
        while (iter.hasNext()) {
            logger.debug(iter.next());
        }
    } catch (IOException ioe) {
        ioe.printStackTrace();
    } finally {
        if (iter != null)
            iter.close();
    }
}

From source file:com.github.xbn.testdev.StubFunctionUtil.java

public static final Appendable appendForSourceX(Appendable to_appendTo, File source_code) throws IOException {
    CompositionStubUtil.appendJavaFilePathWithDotsNoDtJava(to_appendTo, source_code);
    to_appendTo.append(LINE_SEP).append(LINE_SEP);
    Iterator<String> li = FileUtils.lineIterator(source_code); //Throws npx if null

    String sClassName = null;/*from ww w.j  a v  a 2 s.  c om*/
    while (li.hasNext()) {
        String sLn = StringUtil.ltrim(li.next());
        if (sLn.startsWith(sSTUB_LINE_MARKER_PREFIX)) {
            appendStubsX(to_appendTo, li);
        }
    }
    return to_appendTo;
}

From source file:com.github.xbn.io.PlainTextFileUtil.java

/**
   <p>Get a line-iterator for a file.</p>
        //from w  ww. j  a  v  a  2s.  co  m
 * @return  <code>{@link org.apache.commons.io.FileUtils}.{@link org.apache.commons.io.FileUtils#lineIterator(File) lineIterator}(file)</code>
 * @exception  RTIOException  If attempting to open the file results in an {@code java.io.IOException}
 * @see  #getLineIterator(String, String)
 **/
public static final Iterator<String> getLineIterator(File file, String file_varName) {
    try {
        return FileUtils.lineIterator(file);
    } catch (IOException iox) {
        throw new RTIOException(file_varName + "=" + file, iox);
    } catch (RuntimeException rx) {
        throw CrashIfObject.nullOrReturnCause(file, file_varName, null, rx);
    }
}

From source file:es.ua.dlsi.lexicalinformation.Corpus.java

/**
 * Method that retrieves all the lines in the corpus containing any of the 
 * surface forms produced by a given candidate.
 * @param c Candidate generating the surface forms to be searched
 * @param dic Dictionary form which the candidate is extracted
 * @return Returns all the lines in the corpus containing any of the surface forms
 * produced by a given candidate/*from w ww  . j a v a  2 s . c  om*/
 */
public Set<String> GetAllExamplesOfInflections(Candidate c, Dictionary dic) {
    Set<String> inflectedwordforms = c.GetSurfaceForms(dic);
    Set<String> examples = new LinkedHashSet<String>();
    LineIterator corpus_it = null;
    try {
        corpus_it = FileUtils.lineIterator(new File(this.path));
    } catch (FileNotFoundException ex) {
        System.err.println("Error while trying to open '" + this.path + "' file.");
        System.exit(-1);
    } catch (IOException ex) {
        System.err.println("Error while reading '" + this.path + "' file.");
        System.exit(-1);
    }
    while (corpus_it.hasNext()) {
        String line = corpus_it.nextLine();
        for (String word : inflectedwordforms) {
            //If the surface form appears in the sentence...
            if (line.matches("^" + word + " .*") || line.matches(".* " + word + "$")
                    || line.matches(".* " + word + " .*")) {
                examples.add(line);
            }
        }
    }
    corpus_it.close();
    return examples;
}

From source file:com.sangupta.httptools.DownloadUrlCommand.java

@Override
public void run() {
    File file = new File(this.urlFile);
    if (file == null || !file.exists()) {
        System.out.println("URL file cannot be found.");
        return;//from   w w w .j a v  a 2s.  c om
    }

    if (!file.isFile()) {
        System.out.println("URL file does not represent a valid file.");
        return;
    }

    if (this.numThreads <= 0 || this.numThreads > 50) {
        System.out.println("Number of assigned threads should be between 1 and 50");
        return;
    }

    outputDir = new File(this.outputFolder);
    if (outputDir.exists() && !outputDir.isDirectory()) {
        System.out.println("Output folder does not represent a valid directory");
        return;
    }

    if (!outputDir.exists()) {
        outputDir.mkdirs();
    }

    // try and parse and read all URLs
    int line = 1;
    try {
        LineIterator iterator = FileUtils.lineIterator(file);
        while (iterator.hasNext()) {
            ++line;
            String readURL = iterator.next();
            createURLTask(readURL);
        }
    } catch (IOException e) {
        System.out.println("Unable to read URLs from the file at line: " + line);
        return;
    }

    // all set - create number of threads
    // and start fetching
    ExecutorService service = Executors.newFixedThreadPool(this.numThreads);

    final long start = System.currentTimeMillis();
    for (Runnable runnable : this.downloadTasks) {
        service.submit(runnable);
    }

    // intialize some variables
    this.numTasks = this.downloadTasks.size();
    this.downloadTasks.clear();

    if (this.numTasks > 1000) {
        this.splitFolders = true;
    }

    // shutdown
    shutdownAndAwaitTermination(service);
    final long end = System.currentTimeMillis();

    // everything done
    System.out.println(this.downloadTasks.size() + " urls downloaded in " + (end - start) + " millis.");
}