Example usage for org.apache.commons.io LineIterator LineIterator

List of usage examples for org.apache.commons.io LineIterator LineIterator

Introduction

In this page you can find the example usage for org.apache.commons.io LineIterator LineIterator.

Prototype

public LineIterator(final Reader reader) throws IllegalArgumentException 

Source Link

Document

Constructs an iterator of the lines for a Reader.

Usage

From source file:dk.netarkivet.archive.arcrepositoryadmin.ReplicaCacheDatabase.java

/**
 * Given the output of a checksum job, add the results to the database.
 * <p>/*from  ww  w  .ja v  a 2 s .  c om*/
 * The following fields in the table are updated for each corresponding entry in the replicafileinfo table: <br/>
 * - checksum = the given checksum. <br/>
 * - filelist_status = ok. <br/>
 * - filelist_checkdatetime = now. <br/>
 * - checksum_checkdatetime = now.
 *
 * @param checksumOutputFile The output of a checksum job in a file
 * @param replica The replica this checksum job is for.
 */
@Override
public void addChecksumInformation(File checksumOutputFile, Replica replica) {
    // validate arguments
    ArgumentNotValid.checkNotNull(checksumOutputFile, "File checksumOutputFile");
    ArgumentNotValid.checkNotNull(replica, "Replica replica");

    // Sort the checksumOutputFile file.
    File sortedResult = new File(checksumOutputFile.getParent(), checksumOutputFile.getName() + ".sorted");
    FileUtils.sortFile(checksumOutputFile, sortedResult);
    final long datasize = FileUtils.countLines(sortedResult);

    Set<Long> missingReplicaRFIs = null;
    Connection con = ArchiveDBConnection.get();
    LineIterator lineIterator = null;
    try {
        // Make sure, that the replica exists in the database.
        if (!ReplicaCacheHelpers.existsReplicaInDB(replica, con)) {
            String msg = "Cannot add checksum information, since the replica '" + replica.toString()
                    + "' does not exist within the database.";
            log.warn(msg);
            throw new IOFailure(msg);
        }

        log.info("Starting processing of {} checksum entries for replica {}", datasize, replica.getId());

        // retrieve the list of files already known by this cache.
        // TODO This does not scale! Should the datastructure
        // (missingReplicaRFIs) be disk-bound in some way, or optimized
        // in some way, e.g. using it.unimi.dsi.fastutil.longs.LongArrayList
        missingReplicaRFIs = ReplicaCacheHelpers.retrieveReplicaFileInfoGuidsForReplica(replica.getId(), con);

        // Initialize the String iterator
        lineIterator = new LineIterator(new FileReader(sortedResult));

        String lastFilename = "";
        String lastChecksum = "";

        int i = 0;
        while (lineIterator.hasNext()) {
            String line = lineIterator.next();
            // log that it is in progress every so often.
            if ((i % LOGGING_ENTRY_INTERVAL) == 0) {
                log.info("Processed checksum list entry number {} for replica {}", i, replica);
                // Close connection, and open another one
                // to avoid memory-leak (NAS-2003)
                ArchiveDBConnection.release(con);
                con = ArchiveDBConnection.get();
                log.debug("Databaseconnection has now been renewed");
            }
            ++i;

            // parse the input.
            final KeyValuePair<String, String> entry = ChecksumJob.parseLine(line);
            final String filename = entry.getKey();
            final String checksum = entry.getValue();

            // check for duplicates
            if (filename.equals(lastFilename)) {
                // if different checksums, then
                if (!checksum.equals(lastChecksum)) {
                    // log and send notification
                    String errMsg = "Unidentical duplicates of file '" + filename + "' with the checksums '"
                            + lastChecksum + "' and '" + checksum + "'. First instance used.";
                    log.warn(errMsg);
                    NotificationsFactory.getInstance().notify(errMsg, NotificationType.WARNING);
                } else {
                    // log about duplicate identical
                    log.debug("Duplicates of the file '{}' found with the same checksum '{}'.", filename,
                            checksum);
                }

                // avoid overhead of inserting duplicates twice.
                continue;
            }

            // set these value to be the old values in next iteration.
            lastFilename = filename;
            lastChecksum = checksum;

            // Process the current (filename + checksum) combo for this replica
            // Remove the returned replicafileinfo guid from the missing entries.
            missingReplicaRFIs
                    .remove(ReplicaCacheHelpers.processChecksumline(filename, checksum, replica, con));
        }
    } catch (IOException e) {
        throw new IOFailure("Unable to read checksum entries from file", e);
    } finally {
        ArchiveDBConnection.release(con);
        LineIterator.closeQuietly(lineIterator);
    }

    con = ArchiveDBConnection.get();
    try {
        // go through the not found replicafileinfo for this replica to change
        // their filelist_status to missing.
        if (missingReplicaRFIs.size() > 0) {
            log.warn("Found {} missing files for replica '{}'.", missingReplicaRFIs.size(), replica);
            for (long rfi : missingReplicaRFIs) {
                // set the replicafileinfo in the database to missing.
                ReplicaCacheHelpers.updateReplicaFileInfoMissingFromFilelist(rfi, con);
            }
        }

        // update the checksum updated date for this replica.
        ReplicaCacheHelpers.updateChecksumDateForReplica(replica, con);
        ReplicaCacheHelpers.updateFilelistDateForReplica(replica, con);

        log.info("Finished processing of {} checksum entries for replica {}", datasize, replica.getId());
    } finally {
        ArchiveDBConnection.release(con);
    }
}

From source file:launcher.net.CustomIOUtils.java

/**
 * Return an Iterator for the lines in an <code>InputStream</code>, using
 * the character encoding specified (or default encoding if null).
 * <p>/*from  w w  w  .ja  va 2  s.  co  m*/
 * <code>LineIterator</code> holds a reference to the open
 * <code>InputStream</code> specified here. When you have finished with
 * the iterator you should close the stream to free internal resources.
 * This can be done by closing the stream directly, or by calling
 * {@link LineIterator#close()} or {@link LineIterator#closeQuietly(LineIterator)}.
 * <p>
 * The recommended usage pattern is:
 * <pre>
 * try {
 *   LineIterator it = IOUtils.lineIterator(stream, charset);
 *   while (it.hasNext()) {
 *     String line = it.nextLine();
 *     /// do something with line
 *   }
 * } finally {
 *   IOUtils.closeQuietly(stream);
 * }
 * </pre>
 *
 * @param input  the <code>InputStream</code> to read from, not null
 * @param encoding  the encoding to use, null means platform default
 * @return an Iterator of the lines in the reader, never null
 * @throws IllegalArgumentException if the input is null
 * @throws IOException if an I/O error occurs, such as if the encoding is invalid
 * @since 2.3
 */
public static LineIterator lineIterator(InputStream input, Charset encoding) throws IOException {
    return new LineIterator(new InputStreamReader(input, Charsets.toCharset(encoding)));
}

From source file:dk.netarkivet.archive.arcrepositoryadmin.ReplicaCacheDatabase.java

/**
 * Method for adding the results from a list of filenames on a replica. This list of filenames should return the
 * list of all the files within the database.
 * <p>/*from   ww w .  jav  a 2s.c  o  m*/
 * For each file in the FileListJob the following fields are set for the corresponding entry in the replicafileinfo
 * table: <br/>
 * - filelist_status = ok. <br/>
 * - filelist_checkdatetime = now.
 * <p>
 * For each entry in the replicafileinfo table for the replica which are missing in the results from the FileListJob
 * the following fields are assigned the following values: <br/>
 * - filelist_status = missing. <br/>
 * - filelist_checkdatetime = now.
 *
 * @param filelistFile The list of filenames either parsed from a FilelistJob or the result from a
 * GetAllFilenamesMessage.
 * @param replica The replica, which the FilelistBatchjob has run upon.
 * @throws ArgumentNotValid If the filelist or the replica is null.
 * @throws UnknownID If the replica does not already exist in the database.
 */
@Override
public void addFileListInformation(File filelistFile, Replica replica) throws ArgumentNotValid, UnknownID {
    ArgumentNotValid.checkNotNull(filelistFile, "File filelistFile");
    ArgumentNotValid.checkNotNull(replica, "Replica replica");

    // Sort the filelist file.
    File sortedResult = new File(filelistFile.getParent(), filelistFile.getName() + ".sorted");
    FileUtils.sortFile(filelistFile, sortedResult);
    final long datasize = FileUtils.countLines(sortedResult);

    Connection con = ArchiveDBConnection.get();
    Set<Long> missingReplicaRFIs = null;
    LineIterator lineIterator = null;
    try {
        // Make sure, that the replica exists in the database.
        if (!ReplicaCacheHelpers.existsReplicaInDB(replica, con)) {
            String errorMsg = "Cannot add filelist information, since the replica '" + replica.toString()
                    + "' does not exist in the database.";
            log.warn(errorMsg);
            throw new UnknownID(errorMsg);
        }

        log.info("Starting processing of {} filelist entries for replica {}", datasize, replica.getId());

        // retrieve the list of files already known by this cache.
        // TODO This does not scale! Should this datastructure
        // (missingReplicaRFIs) be disk-bound in some way.
        missingReplicaRFIs = ReplicaCacheHelpers.retrieveReplicaFileInfoGuidsForReplica(replica.getId(), con);

        // Initialize String iterator
        lineIterator = new LineIterator(new FileReader(sortedResult));

        String lastFileName = "";
        int i = 0;
        while (lineIterator.hasNext()) {
            String file = lineIterator.next();
            // log that it is in progress every so often.
            if ((i % LOGGING_ENTRY_INTERVAL) == 0) {
                log.info("Processed file list entry number {} for replica {}", i, replica);
                // Close connection, and open another one
                // to avoid memory-leak (NAS-2003)
                ArchiveDBConnection.release(con);
                con = ArchiveDBConnection.get();
                log.debug("Databaseconnection has now been renewed");
            }
            ++i;

            // handle duplicates.
            if (file.equals(lastFileName)) {
                log.warn("There have been found multiple files with the name '{}'", file);
                continue;
            }

            lastFileName = file;
            // Add information for one file, and remove the ReplicaRFI from the
            // set of missing ones.
            missingReplicaRFIs.remove(ReplicaCacheHelpers.addFileInformation(file, replica, con));
        }
    } catch (IOException e) {
        throw new IOFailure("Unable to read the filenames from file", e);
    } finally {
        ArchiveDBConnection.release(con);
        LineIterator.closeQuietly(lineIterator);
    }

    con = ArchiveDBConnection.get();
    try {
        // go through the not found replicafileinfo for this replica to change
        // their filelist_status to missing.
        if (missingReplicaRFIs.size() > 0) {
            log.warn("Found {} missing files for replica '{}'.", missingReplicaRFIs.size(), replica);
            for (long rfi : missingReplicaRFIs) {
                // set the replicafileinfo in the database to missing.
                ReplicaCacheHelpers.updateReplicaFileInfoMissingFromFilelist(rfi, con);
            }
        }
        // Update the date for filelist update for this replica.
        ReplicaCacheHelpers.updateFilelistDateForReplica(replica, con);
    } finally {
        ArchiveDBConnection.release(con);
    }
}

From source file:de.tudarmstadt.ukp.clarin.webanno.api.dao.RepositoryServiceDbData.java

/**
 * Check if a TAB-Sep training file is in correct format before importing
 *//*www.  jav  a 2s.c o m*/
private boolean isTabSepFileFormatCorrect(File aFile) {
    try {
        LineIterator it = new LineIterator(new FileReader(aFile));
        while (it.hasNext()) {
            String line = it.next();
            if (line.trim().length() == 0) {
                continue;
            }
            if (line.split("\t").length != 2) {
                return false;
            }
        }
    } catch (Exception e) {
        return false;
    }
    return true;
}

From source file:norbert.mynemo.dataimport.fileformat.input.TenMillionRatingImporter.java

public TenMillionRatingImporter(String ratingFilepath, String mappingFilepath) throws IOException {
    checkNotNull(ratingFilepath);/*from w  w  w. j  av  a 2s .  com*/
    checkNotNull(mappingFilepath);

    idConverter = new MovieLensIdConverter(mappingFilepath);
    lineIterator = new LineIterator(new BufferedReader(new FileReader(ratingFilepath)));
}

From source file:org.aludratest.service.gitclient.GitClient.java

/** Provides the status.
 * @param data/* w ww  . j a v a2s. com*/
 * @return a reference to this */
public GitClient status(StatusData data) {

    // clear status object for supporting data object reuse
    data.getUntrackedFiles().clear();
    data.getUnmodifiedFiles().clear();
    data.getModifiedFiles().clear();
    data.getAddedFiles().clear();
    data.getDeletedFiles().clear();
    data.getRenamedFiles().clear();
    data.getCopiedFiles().clear();
    data.getUpdatedFiles().clear();

    // invoke git
    String output = invokeGenerically(GIT_STATUS_PROCESS_NAME, true, "status", "--short", "--branch");
    LineIterator iterator = new LineIterator(new StringReader(output));
    while (iterator.hasNext()) {
        String line = iterator.next();
        if (line.startsWith("##")) {
            data.setCurrentBranch(line.substring(3));
        } else {
            StringData filePath = new StringData(line.substring(3));
            char statusCode = line.substring(0, 2).trim().charAt(0);
            switch (statusCode) {
            case '?':
                data.getUntrackedFiles().add(filePath);
                break;
            case '\'':
                data.getUnmodifiedFiles().add(filePath);
                break;
            case 'M':
                data.getModifiedFiles().add(filePath);
                break;
            case 'A':
                data.getAddedFiles().add(filePath);
                break;
            case 'D':
                data.getDeletedFiles().add(filePath);
                break;
            case 'R':
                data.getRenamedFiles().add(parseRename(filePath.getValue()));
                break;
            case 'C':
                data.getCopiedFiles().add(filePath);
                break;
            case 'U':
                data.getUpdatedFiles().add(filePath);
                break;
            default:
                throw new TechnicalException("Unknown status '" + statusCode + "' in git output: " + line);
            }
        }
    }
    return this;
}

From source file:org.aludratest.service.gitclient.GitClient.java

/** Provides the git log.
 * @param data//from  w w  w .j  a  v a 2  s.  c  o m
 * @return */
public GitClient log(LogData data) {
    ArrayBuilder<String> builder = new ArrayBuilder<String>(String.class).add("log");
    if (data.getMaxCount() != null) {
        builder.add("--max-count=" + data.getMaxCount());
    }
    String output = invokeGenerically(GIT_LOG_PROCESS_NAME, true, builder.toArray());
    LineIterator iterator = new LineIterator(new StringReader(output));
    while (iterator.hasNext()) {
        parseLogItem(iterator, data);
    }
    return this;
}

From source file:org.aludratest.service.gitclient.GitClient.java

/** Lists branches.
 * @param data// w  w w .j  av  a 2 s .  co m
 * @return a reference to this */
public GitClient listBranches(BranchListData data) {
    String output = invokeGenerically(GIT_LIST_BRANCHES_PROCESS_NAME, true, "branch", "--list");
    LineIterator iterator = new LineIterator(new StringReader(output));
    while (iterator.hasNext()) {
        String line = iterator.next();
        boolean current = line.startsWith("*");
        String branch = line.substring(2).trim();
        data.getBranches().add(new StringData(branch));
        if (current) {
            data.setCurrentBranch(branch);
        }
    }
    return this;
}

From source file:org.apache.druid.data.input.impl.FileIteratingFirehoseTest.java

@Test
public void testFirehose() throws Exception {
    final List<LineIterator> lineIterators = inputs.stream().map(s -> new LineIterator(new StringReader(s)))
            .collect(Collectors.toList());

    try (final FileIteratingFirehose firehose = new FileIteratingFirehose(lineIterators.iterator(), parser)) {
        final List<String> results = Lists.newArrayList();

        while (firehose.hasMore()) {
            final InputRow inputRow = firehose.nextRow();
            if (inputRow == null) {
                results.add(null);//from  w  w  w.  j  a  va  2 s . c  o m
            } else {
                results.add(Joiner.on("|").join(inputRow.getDimension("x")));
            }
        }

        Assert.assertEquals(expectedResults, results);
    }
}

From source file:org.apache.druid.data.input.impl.FileIteratingFirehoseTest.java

@Test(expected = RuntimeException.class)
public void testClose() throws IOException {
    final LineIterator lineIterator = new LineIterator(new Reader() {
        @Override//from   w  ww  . jav a 2s. c  o m
        public int read(char[] cbuf, int off, int len) {
            System.arraycopy(LINE_CHARS, 0, cbuf, 0, LINE_CHARS.length);
            return LINE_CHARS.length;
        }

        @Override
        public void close() {
            throw new RuntimeException("close test for FileIteratingFirehose");
        }
    });

    final TestCloseable closeable = new TestCloseable();
    final FileIteratingFirehose firehose = new FileIteratingFirehose(ImmutableList.of(lineIterator).iterator(),
            parser, closeable);
    firehose.hasMore(); // initialize lineIterator
    firehose.close();
    Assert.assertTrue(closeable.closed);
}