List of usage examples for org.apache.commons.io LineIterator LineIterator
public LineIterator(final Reader reader) throws IllegalArgumentException
Reader
. From source file:dk.netarkivet.archive.arcrepositoryadmin.ReplicaCacheDatabase.java
/** * Given the output of a checksum job, add the results to the database. * <p>/*from ww w .ja v a 2 s . c om*/ * The following fields in the table are updated for each corresponding entry in the replicafileinfo table: <br/> * - checksum = the given checksum. <br/> * - filelist_status = ok. <br/> * - filelist_checkdatetime = now. <br/> * - checksum_checkdatetime = now. * * @param checksumOutputFile The output of a checksum job in a file * @param replica The replica this checksum job is for. */ @Override public void addChecksumInformation(File checksumOutputFile, Replica replica) { // validate arguments ArgumentNotValid.checkNotNull(checksumOutputFile, "File checksumOutputFile"); ArgumentNotValid.checkNotNull(replica, "Replica replica"); // Sort the checksumOutputFile file. File sortedResult = new File(checksumOutputFile.getParent(), checksumOutputFile.getName() + ".sorted"); FileUtils.sortFile(checksumOutputFile, sortedResult); final long datasize = FileUtils.countLines(sortedResult); Set<Long> missingReplicaRFIs = null; Connection con = ArchiveDBConnection.get(); LineIterator lineIterator = null; try { // Make sure, that the replica exists in the database. if (!ReplicaCacheHelpers.existsReplicaInDB(replica, con)) { String msg = "Cannot add checksum information, since the replica '" + replica.toString() + "' does not exist within the database."; log.warn(msg); throw new IOFailure(msg); } log.info("Starting processing of {} checksum entries for replica {}", datasize, replica.getId()); // retrieve the list of files already known by this cache. // TODO This does not scale! Should the datastructure // (missingReplicaRFIs) be disk-bound in some way, or optimized // in some way, e.g. using it.unimi.dsi.fastutil.longs.LongArrayList missingReplicaRFIs = ReplicaCacheHelpers.retrieveReplicaFileInfoGuidsForReplica(replica.getId(), con); // Initialize the String iterator lineIterator = new LineIterator(new FileReader(sortedResult)); String lastFilename = ""; String lastChecksum = ""; int i = 0; while (lineIterator.hasNext()) { String line = lineIterator.next(); // log that it is in progress every so often. if ((i % LOGGING_ENTRY_INTERVAL) == 0) { log.info("Processed checksum list entry number {} for replica {}", i, replica); // Close connection, and open another one // to avoid memory-leak (NAS-2003) ArchiveDBConnection.release(con); con = ArchiveDBConnection.get(); log.debug("Databaseconnection has now been renewed"); } ++i; // parse the input. final KeyValuePair<String, String> entry = ChecksumJob.parseLine(line); final String filename = entry.getKey(); final String checksum = entry.getValue(); // check for duplicates if (filename.equals(lastFilename)) { // if different checksums, then if (!checksum.equals(lastChecksum)) { // log and send notification String errMsg = "Unidentical duplicates of file '" + filename + "' with the checksums '" + lastChecksum + "' and '" + checksum + "'. First instance used."; log.warn(errMsg); NotificationsFactory.getInstance().notify(errMsg, NotificationType.WARNING); } else { // log about duplicate identical log.debug("Duplicates of the file '{}' found with the same checksum '{}'.", filename, checksum); } // avoid overhead of inserting duplicates twice. continue; } // set these value to be the old values in next iteration. lastFilename = filename; lastChecksum = checksum; // Process the current (filename + checksum) combo for this replica // Remove the returned replicafileinfo guid from the missing entries. missingReplicaRFIs .remove(ReplicaCacheHelpers.processChecksumline(filename, checksum, replica, con)); } } catch (IOException e) { throw new IOFailure("Unable to read checksum entries from file", e); } finally { ArchiveDBConnection.release(con); LineIterator.closeQuietly(lineIterator); } con = ArchiveDBConnection.get(); try { // go through the not found replicafileinfo for this replica to change // their filelist_status to missing. if (missingReplicaRFIs.size() > 0) { log.warn("Found {} missing files for replica '{}'.", missingReplicaRFIs.size(), replica); for (long rfi : missingReplicaRFIs) { // set the replicafileinfo in the database to missing. ReplicaCacheHelpers.updateReplicaFileInfoMissingFromFilelist(rfi, con); } } // update the checksum updated date for this replica. ReplicaCacheHelpers.updateChecksumDateForReplica(replica, con); ReplicaCacheHelpers.updateFilelistDateForReplica(replica, con); log.info("Finished processing of {} checksum entries for replica {}", datasize, replica.getId()); } finally { ArchiveDBConnection.release(con); } }
From source file:launcher.net.CustomIOUtils.java
/** * Return an Iterator for the lines in an <code>InputStream</code>, using * the character encoding specified (or default encoding if null). * <p>/*from w w w .ja va 2 s. co m*/ * <code>LineIterator</code> holds a reference to the open * <code>InputStream</code> specified here. When you have finished with * the iterator you should close the stream to free internal resources. * This can be done by closing the stream directly, or by calling * {@link LineIterator#close()} or {@link LineIterator#closeQuietly(LineIterator)}. * <p> * The recommended usage pattern is: * <pre> * try { * LineIterator it = IOUtils.lineIterator(stream, charset); * while (it.hasNext()) { * String line = it.nextLine(); * /// do something with line * } * } finally { * IOUtils.closeQuietly(stream); * } * </pre> * * @param input the <code>InputStream</code> to read from, not null * @param encoding the encoding to use, null means platform default * @return an Iterator of the lines in the reader, never null * @throws IllegalArgumentException if the input is null * @throws IOException if an I/O error occurs, such as if the encoding is invalid * @since 2.3 */ public static LineIterator lineIterator(InputStream input, Charset encoding) throws IOException { return new LineIterator(new InputStreamReader(input, Charsets.toCharset(encoding))); }
From source file:dk.netarkivet.archive.arcrepositoryadmin.ReplicaCacheDatabase.java
/** * Method for adding the results from a list of filenames on a replica. This list of filenames should return the * list of all the files within the database. * <p>/*from ww w . jav a 2s.c o m*/ * For each file in the FileListJob the following fields are set for the corresponding entry in the replicafileinfo * table: <br/> * - filelist_status = ok. <br/> * - filelist_checkdatetime = now. * <p> * For each entry in the replicafileinfo table for the replica which are missing in the results from the FileListJob * the following fields are assigned the following values: <br/> * - filelist_status = missing. <br/> * - filelist_checkdatetime = now. * * @param filelistFile The list of filenames either parsed from a FilelistJob or the result from a * GetAllFilenamesMessage. * @param replica The replica, which the FilelistBatchjob has run upon. * @throws ArgumentNotValid If the filelist or the replica is null. * @throws UnknownID If the replica does not already exist in the database. */ @Override public void addFileListInformation(File filelistFile, Replica replica) throws ArgumentNotValid, UnknownID { ArgumentNotValid.checkNotNull(filelistFile, "File filelistFile"); ArgumentNotValid.checkNotNull(replica, "Replica replica"); // Sort the filelist file. File sortedResult = new File(filelistFile.getParent(), filelistFile.getName() + ".sorted"); FileUtils.sortFile(filelistFile, sortedResult); final long datasize = FileUtils.countLines(sortedResult); Connection con = ArchiveDBConnection.get(); Set<Long> missingReplicaRFIs = null; LineIterator lineIterator = null; try { // Make sure, that the replica exists in the database. if (!ReplicaCacheHelpers.existsReplicaInDB(replica, con)) { String errorMsg = "Cannot add filelist information, since the replica '" + replica.toString() + "' does not exist in the database."; log.warn(errorMsg); throw new UnknownID(errorMsg); } log.info("Starting processing of {} filelist entries for replica {}", datasize, replica.getId()); // retrieve the list of files already known by this cache. // TODO This does not scale! Should this datastructure // (missingReplicaRFIs) be disk-bound in some way. missingReplicaRFIs = ReplicaCacheHelpers.retrieveReplicaFileInfoGuidsForReplica(replica.getId(), con); // Initialize String iterator lineIterator = new LineIterator(new FileReader(sortedResult)); String lastFileName = ""; int i = 0; while (lineIterator.hasNext()) { String file = lineIterator.next(); // log that it is in progress every so often. if ((i % LOGGING_ENTRY_INTERVAL) == 0) { log.info("Processed file list entry number {} for replica {}", i, replica); // Close connection, and open another one // to avoid memory-leak (NAS-2003) ArchiveDBConnection.release(con); con = ArchiveDBConnection.get(); log.debug("Databaseconnection has now been renewed"); } ++i; // handle duplicates. if (file.equals(lastFileName)) { log.warn("There have been found multiple files with the name '{}'", file); continue; } lastFileName = file; // Add information for one file, and remove the ReplicaRFI from the // set of missing ones. missingReplicaRFIs.remove(ReplicaCacheHelpers.addFileInformation(file, replica, con)); } } catch (IOException e) { throw new IOFailure("Unable to read the filenames from file", e); } finally { ArchiveDBConnection.release(con); LineIterator.closeQuietly(lineIterator); } con = ArchiveDBConnection.get(); try { // go through the not found replicafileinfo for this replica to change // their filelist_status to missing. if (missingReplicaRFIs.size() > 0) { log.warn("Found {} missing files for replica '{}'.", missingReplicaRFIs.size(), replica); for (long rfi : missingReplicaRFIs) { // set the replicafileinfo in the database to missing. ReplicaCacheHelpers.updateReplicaFileInfoMissingFromFilelist(rfi, con); } } // Update the date for filelist update for this replica. ReplicaCacheHelpers.updateFilelistDateForReplica(replica, con); } finally { ArchiveDBConnection.release(con); } }
From source file:de.tudarmstadt.ukp.clarin.webanno.api.dao.RepositoryServiceDbData.java
/** * Check if a TAB-Sep training file is in correct format before importing *//*www. jav a 2s.c o m*/ private boolean isTabSepFileFormatCorrect(File aFile) { try { LineIterator it = new LineIterator(new FileReader(aFile)); while (it.hasNext()) { String line = it.next(); if (line.trim().length() == 0) { continue; } if (line.split("\t").length != 2) { return false; } } } catch (Exception e) { return false; } return true; }
From source file:norbert.mynemo.dataimport.fileformat.input.TenMillionRatingImporter.java
public TenMillionRatingImporter(String ratingFilepath, String mappingFilepath) throws IOException { checkNotNull(ratingFilepath);/*from w w w. j av a 2s . com*/ checkNotNull(mappingFilepath); idConverter = new MovieLensIdConverter(mappingFilepath); lineIterator = new LineIterator(new BufferedReader(new FileReader(ratingFilepath))); }
From source file:org.aludratest.service.gitclient.GitClient.java
/** Provides the status. * @param data/* w ww . j a v a2s. com*/ * @return a reference to this */ public GitClient status(StatusData data) { // clear status object for supporting data object reuse data.getUntrackedFiles().clear(); data.getUnmodifiedFiles().clear(); data.getModifiedFiles().clear(); data.getAddedFiles().clear(); data.getDeletedFiles().clear(); data.getRenamedFiles().clear(); data.getCopiedFiles().clear(); data.getUpdatedFiles().clear(); // invoke git String output = invokeGenerically(GIT_STATUS_PROCESS_NAME, true, "status", "--short", "--branch"); LineIterator iterator = new LineIterator(new StringReader(output)); while (iterator.hasNext()) { String line = iterator.next(); if (line.startsWith("##")) { data.setCurrentBranch(line.substring(3)); } else { StringData filePath = new StringData(line.substring(3)); char statusCode = line.substring(0, 2).trim().charAt(0); switch (statusCode) { case '?': data.getUntrackedFiles().add(filePath); break; case '\'': data.getUnmodifiedFiles().add(filePath); break; case 'M': data.getModifiedFiles().add(filePath); break; case 'A': data.getAddedFiles().add(filePath); break; case 'D': data.getDeletedFiles().add(filePath); break; case 'R': data.getRenamedFiles().add(parseRename(filePath.getValue())); break; case 'C': data.getCopiedFiles().add(filePath); break; case 'U': data.getUpdatedFiles().add(filePath); break; default: throw new TechnicalException("Unknown status '" + statusCode + "' in git output: " + line); } } } return this; }
From source file:org.aludratest.service.gitclient.GitClient.java
/** Provides the git log. * @param data//from w w w .j a v a 2 s. c o m * @return */ public GitClient log(LogData data) { ArrayBuilder<String> builder = new ArrayBuilder<String>(String.class).add("log"); if (data.getMaxCount() != null) { builder.add("--max-count=" + data.getMaxCount()); } String output = invokeGenerically(GIT_LOG_PROCESS_NAME, true, builder.toArray()); LineIterator iterator = new LineIterator(new StringReader(output)); while (iterator.hasNext()) { parseLogItem(iterator, data); } return this; }
From source file:org.aludratest.service.gitclient.GitClient.java
/** Lists branches. * @param data// w w w .j av a 2 s . co m * @return a reference to this */ public GitClient listBranches(BranchListData data) { String output = invokeGenerically(GIT_LIST_BRANCHES_PROCESS_NAME, true, "branch", "--list"); LineIterator iterator = new LineIterator(new StringReader(output)); while (iterator.hasNext()) { String line = iterator.next(); boolean current = line.startsWith("*"); String branch = line.substring(2).trim(); data.getBranches().add(new StringData(branch)); if (current) { data.setCurrentBranch(branch); } } return this; }
From source file:org.apache.druid.data.input.impl.FileIteratingFirehoseTest.java
@Test public void testFirehose() throws Exception { final List<LineIterator> lineIterators = inputs.stream().map(s -> new LineIterator(new StringReader(s))) .collect(Collectors.toList()); try (final FileIteratingFirehose firehose = new FileIteratingFirehose(lineIterators.iterator(), parser)) { final List<String> results = Lists.newArrayList(); while (firehose.hasMore()) { final InputRow inputRow = firehose.nextRow(); if (inputRow == null) { results.add(null);//from w w w. j a va 2 s . c o m } else { results.add(Joiner.on("|").join(inputRow.getDimension("x"))); } } Assert.assertEquals(expectedResults, results); } }
From source file:org.apache.druid.data.input.impl.FileIteratingFirehoseTest.java
@Test(expected = RuntimeException.class) public void testClose() throws IOException { final LineIterator lineIterator = new LineIterator(new Reader() { @Override//from w ww . jav a 2s. c o m public int read(char[] cbuf, int off, int len) { System.arraycopy(LINE_CHARS, 0, cbuf, 0, LINE_CHARS.length); return LINE_CHARS.length; } @Override public void close() { throw new RuntimeException("close test for FileIteratingFirehose"); } }); final TestCloseable closeable = new TestCloseable(); final FileIteratingFirehose firehose = new FileIteratingFirehose(ImmutableList.of(lineIterator).iterator(), parser, closeable); firehose.hasMore(); // initialize lineIterator firehose.close(); Assert.assertTrue(closeable.closed); }