List of usage examples for org.apache.commons.io FileUtils lineIterator
public static LineIterator lineIterator(File file) throws IOException
File
using the default encoding for the VM. From source file:com.sangupta.fileanalysis.formats.ApacheLogFileHandler.java
@Override public void initialize(Database database, File file) { super.setDBAndFile(database, file); try {//from w ww . j a va2 s. c o m iterator = FileUtils.lineIterator(this.file); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:avantssar.aslanpp.testing.DiskSpecificationsProvider.java
private boolean isSpec(File f) { boolean isSpecification = false; try {/*from ww w . j a v a 2 s. c o m*/ LineIterator li = FileUtils.lineIterator(f); while (li.hasNext()) { String line = li.nextLine(); if (line.trim().length() > 0) { if (line.trim().startsWith("%")) { continue; } else { if (line.trim().startsWith("specification")) { isSpecification = true; } break; } } } } catch (IOException e) { Debug.logger.error("Failed to decide if specification or not: " + f.getAbsolutePath(), e); } return isSpecification; }
From source file:at.ac.ait.ubicity.fileloader.FileLoader.java
/** * // w w w . ja v a 2 s. c o m * @param _fileInfo A FileInformation object representing usage information on the file we are supposed to load: line count already ingested, last usage time... * @param _keySpace Cassandra key space into which to ingest * @param _host Cassandra host / server * @param _batchSize MutationBatch size * @throws Exception Shouldn't happen, although the Disruptor may throw an Exception under duress */ @SuppressWarnings("unchecked") public final static void load(final FileInformation _fileInfo, final String _keySpace, final String _host, final int _batchSize) throws Exception { if (!cassandraInitialized) { keySpace = AstyanaxInitializer.doInit("Test Cluster", _host, _keySpace); cassandraInitialized = true; } LongTimeStampSorter tsSorter = new LongTimeStampSorter(); Thread tTSSorter = new Thread(tsSorter); tTSSorter.setPriority(Thread.MAX_PRIORITY - 1); tTSSorter.setName("long timestamp sorter "); tTSSorter.start(); //get the log id from the file's URI final String log_id = _fileInfo.getURI().toString(); final MutationBatch batch = keySpace.prepareMutationBatch(); logger.info("got keyspace " + keySpace.getKeyspaceName() + " from Astyanax initializer"); final LineIterator onLines = FileUtils.lineIterator(new File(_fileInfo.getURI())); final ExecutorService exec = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() * 2); ColumnFamily crawl_stats = null; AggregationJob aggregationJob = new AggregationJob(keySpace, crawl_stats); Thread tAggJob = new Thread(aggregationJob); tAggJob.setName("Monitrix loader / aggregation job "); tAggJob.setPriority(Thread.MIN_PRIORITY + 1); tAggJob.start(); logger.info("[FILELOADER] started aggregation job, ring buffer running"); final Disruptor<SingleLogLineAsString> disruptor = new Disruptor(SingleLogLineAsString.EVENT_FACTORY, (int) Math.pow(TWO, 17), exec); SingleLogLineAsStringEventHandler.batch = batch; SingleLogLineAsStringEventHandler.keySpace = keySpace; SingleLogLineAsStringEventHandler.batchSize = _batchSize; SingleLogLineAsStringEventHandler.LOG_ID = log_id; SingleLogLineAsStringEventHandler.tsSorter = tsSorter; SingleLogLineAsStringEventHandler.aggregationJob = aggregationJob; //The EventHandler contains the actual logic for ingesting final EventHandler<SingleLogLineAsString> handler = new SingleLogLineAsStringEventHandler(); disruptor.handleEventsWith(handler); //get our Aggregate job in place //we are almost ready to start final RingBuffer<SingleLogLineAsString> rb = disruptor.start(); int _lineCount = 0; long _start, _lapse; _start = System.nanoTime(); int _linesAlreadyProcessed = _fileInfo.getLineCount(); //cycle through the lines already processed while (_lineCount < _linesAlreadyProcessed) { onLines.nextLine(); _lineCount++; } //now get down to the work we actually must do, and fill the ring buffer logger.info("begin proccessing of file " + _fileInfo.getURI() + " @line #" + _lineCount); while (onLines.hasNext()) { final long _seq = rb.next(); final SingleLogLineAsString event = rb.get(_seq); event.setValue(onLines.nextLine()); rb.publish(_seq); _lineCount++; } _lapse = System.nanoTime() - _start; logger.info("ended proccessing of file " + _fileInfo.getURI() + " @line #" + _lineCount); //stop, waiting for last threads still busy to finish their work disruptor.shutdown(); //update the file info, this will land in the cache _fileInfo.setLineCount(_lineCount); _fileInfo.setLastAccess(System.currentTimeMillis()); int _usageCount = _fileInfo.getUsageCount(); _fileInfo.setUsageCount(_usageCount++); //make sure we release resources onLines.close(); logger.info( "handled " + (_lineCount - _linesAlreadyProcessed) + " log lines in " + _lapse + " nanoseconds"); //now go to aggregation step SortedSet<Long> timeStamps = new TreeSet(tsSorter.timeStamps); long _minTs = timeStamps.first(); long _maxTs = timeStamps.last(); logger.info("**** min TimeStamp = " + _minTs); logger.info("**** max TimeStamp = " + _maxTs); StatsTableActualizer.update(_fileInfo.getURI().toString(), _minTs, _maxTs, _lineCount); // AggregationJob aggJob = new AggregationJob( keySpace, _host, _batchSize ); // Thread tAgg = new Thread( aggJob ); // tAgg.setName( "aggregation job " ); // tAgg.setPriority( Thread.MAX_PRIORITY - 1 ); // tAgg.start(); }
From source file:ctrus.pa.bow.en.EnBagOfWords.java
@Override public void create() { try {// ww w. j a va 2s .c o m Collection<File> srcfiles = getSourceDocuments("*"); int totalFiles = srcfiles.size(); int currentFile = 0; CtrusHelper.printToConsole("Number of files to process - " + srcfiles.size()); for (File srcFile : srcfiles) { // Read each line Iterator<String> lines = FileUtils.lineIterator(srcFile); if (_options.hasOption(DefaultOptions.DOCUMENT_PER_LINE)) { while (lines.hasNext()) { String line = lines.next(); String delimiter = " "; if (_options.hasOption(DefaultOptions.DOCUMENT_ID_DELIMITER)) delimiter = _options.getOption(DefaultOptions.DOCUMENT_ID_DELIMITER); String[] docAndContent = line.split(delimiter); if (docAndContent.length > 1 && !docAndContent[1].isEmpty()) { String docref = getDocumentId(docAndContent[0]); // Add document to the vocabulary first before adding terms Vocabulary.getInstance().addDocument(docref, docAndContent[0]); addTerms(docAndContent[1].split("\\p{Space}"), docref); writeToOutput(docref); reset(); } } } else { // Add document to file name mapping String fileName = srcFile.getName(); //fileName = fileName.substring(0, fileName.lastIndexOf(".")); String docref = getDocumentId(fileName); // Add document to the vocabulary first before adding terms Vocabulary.getInstance().addDocument(docref, fileName); while (lines.hasNext()) { String line = lines.next(); addTerms(line.split("\\p{Space}"), docref); } writeToOutput(docref); // Write BOW to output file reset(); // Reusing BOW, make sure to reset } // Update the counter and print progress currentFile++; CtrusHelper.progressMonitor("Progress - ", currentFile, totalFiles); } } catch (Exception e) { e.printStackTrace(); } }
From source file:io.druid.segment.realtime.firehose.LocalFirehoseFactory.java
@Override public Firehose connect(StringInputRowParser firehoseParser) throws IOException { log.info("Searching for all [%s] in and beneath [%s]", filter, baseDir.getAbsoluteFile()); Collection<File> foundFiles = FileUtils.listFiles(baseDir.getAbsoluteFile(), new WildcardFileFilter(filter), TrueFileFilter.INSTANCE);//from ww w . j a v a 2s . c om if (foundFiles == null || foundFiles.isEmpty()) { throw new ISE("Found no files to ingest! Check your schema."); } log.info("Found files: " + foundFiles); final LinkedList<File> files = Lists.newLinkedList(foundFiles); return new FileIteratingFirehose(new Iterator<LineIterator>() { @Override public boolean hasNext() { return !files.isEmpty(); } @Override public LineIterator next() { try { return FileUtils.lineIterator(files.poll()); } catch (Exception e) { throw Throwables.propagate(e); } } @Override public void remove() { throw new UnsupportedOperationException(); } }, firehoseParser); }
From source file:com.fides.Agent.java
private void dumpPropfileContents(File propFile) { LineIterator iter = null;/*from w w w. j av a 2 s . co m*/ try { iter = FileUtils.lineIterator(propFile); while (iter.hasNext()) { logger.debug(iter.next()); } } catch (IOException ioe) { ioe.printStackTrace(); } finally { if (iter != null) iter.close(); } }
From source file:com.github.xbn.testdev.StubFunctionUtil.java
public static final Appendable appendForSourceX(Appendable to_appendTo, File source_code) throws IOException { CompositionStubUtil.appendJavaFilePathWithDotsNoDtJava(to_appendTo, source_code); to_appendTo.append(LINE_SEP).append(LINE_SEP); Iterator<String> li = FileUtils.lineIterator(source_code); //Throws npx if null String sClassName = null;/*from ww w.j a v a 2 s. c om*/ while (li.hasNext()) { String sLn = StringUtil.ltrim(li.next()); if (sLn.startsWith(sSTUB_LINE_MARKER_PREFIX)) { appendStubsX(to_appendTo, li); } } return to_appendTo; }
From source file:com.github.xbn.io.PlainTextFileUtil.java
/** <p>Get a line-iterator for a file.</p> //from w ww. j a v a 2s. co m * @return <code>{@link org.apache.commons.io.FileUtils}.{@link org.apache.commons.io.FileUtils#lineIterator(File) lineIterator}(file)</code> * @exception RTIOException If attempting to open the file results in an {@code java.io.IOException} * @see #getLineIterator(String, String) **/ public static final Iterator<String> getLineIterator(File file, String file_varName) { try { return FileUtils.lineIterator(file); } catch (IOException iox) { throw new RTIOException(file_varName + "=" + file, iox); } catch (RuntimeException rx) { throw CrashIfObject.nullOrReturnCause(file, file_varName, null, rx); } }
From source file:es.ua.dlsi.lexicalinformation.Corpus.java
/** * Method that retrieves all the lines in the corpus containing any of the * surface forms produced by a given candidate. * @param c Candidate generating the surface forms to be searched * @param dic Dictionary form which the candidate is extracted * @return Returns all the lines in the corpus containing any of the surface forms * produced by a given candidate/*from w ww . j a v a 2 s . c om*/ */ public Set<String> GetAllExamplesOfInflections(Candidate c, Dictionary dic) { Set<String> inflectedwordforms = c.GetSurfaceForms(dic); Set<String> examples = new LinkedHashSet<String>(); LineIterator corpus_it = null; try { corpus_it = FileUtils.lineIterator(new File(this.path)); } catch (FileNotFoundException ex) { System.err.println("Error while trying to open '" + this.path + "' file."); System.exit(-1); } catch (IOException ex) { System.err.println("Error while reading '" + this.path + "' file."); System.exit(-1); } while (corpus_it.hasNext()) { String line = corpus_it.nextLine(); for (String word : inflectedwordforms) { //If the surface form appears in the sentence... if (line.matches("^" + word + " .*") || line.matches(".* " + word + "$") || line.matches(".* " + word + " .*")) { examples.add(line); } } } corpus_it.close(); return examples; }
From source file:com.sangupta.httptools.DownloadUrlCommand.java
@Override public void run() { File file = new File(this.urlFile); if (file == null || !file.exists()) { System.out.println("URL file cannot be found."); return;//from w w w .j a v a 2s. c om } if (!file.isFile()) { System.out.println("URL file does not represent a valid file."); return; } if (this.numThreads <= 0 || this.numThreads > 50) { System.out.println("Number of assigned threads should be between 1 and 50"); return; } outputDir = new File(this.outputFolder); if (outputDir.exists() && !outputDir.isDirectory()) { System.out.println("Output folder does not represent a valid directory"); return; } if (!outputDir.exists()) { outputDir.mkdirs(); } // try and parse and read all URLs int line = 1; try { LineIterator iterator = FileUtils.lineIterator(file); while (iterator.hasNext()) { ++line; String readURL = iterator.next(); createURLTask(readURL); } } catch (IOException e) { System.out.println("Unable to read URLs from the file at line: " + line); return; } // all set - create number of threads // and start fetching ExecutorService service = Executors.newFixedThreadPool(this.numThreads); final long start = System.currentTimeMillis(); for (Runnable runnable : this.downloadTasks) { service.submit(runnable); } // intialize some variables this.numTasks = this.downloadTasks.size(); this.downloadTasks.clear(); if (this.numTasks > 1000) { this.splitFolders = true; } // shutdown shutdownAndAwaitTermination(service); final long end = System.currentTimeMillis(); // everything done System.out.println(this.downloadTasks.size() + " urls downloaded in " + (end - start) + " millis."); }