List of usage examples for org.apache.commons.io FileUtils lineIterator
public static LineIterator lineIterator(File file, String encoding) throws IOException
File
. From source file:nl.knaw.huygens.timbuctoo.util.Files.java
public static LineIterator getLineIterator(File file) throws IOException { return FileUtils.lineIterator(file, ENCODING); }
From source file:nl.opengeogroep.safetymaps.server.admin.stripes.LayerActionBean.java
@Before(stages = LifecycleStage.BindingAndValidation) private void findMapfiles() { try {/*from www . j a v a 2 s . c o m*/ JSONArray a = new JSONArray(); File search = Cfg.getPath("static_mapserver_searchdirs"); if (search != null) { for (File f : FileUtils.listFiles(search, new String[] { "map" }, true)) { JSONObject m = new JSONObject(); m.put("path", f.getPath().substring(search.getPath().length() + 1)); a.put(m); // Naive mapfile parser. Perhaps replace by JavaScript client-side // GetCap parsing JSONArray l = new JSONArray(); m.put("layers", l); LineIterator it = FileUtils.lineIterator(f, "US-ASCII"); try { while (it.hasNext()) { String line = it.nextLine().trim(); if (line.equals("LAYER")) { String n = it.nextLine().trim(); n = n.substring(6, n.length() - 1); l.put(n); } } } finally { it.close(); } } } mapFilesJson = a.toString(4); } catch (Exception e) { } }
From source file:org.apache.accumulo.test.AuditMessageIT.java
/** * Returns a List of Audit messages that have been grep'd out of the MiniAccumuloCluster output. * * @param stepName/* w w w.j av a2s . com*/ * A unique name for the test being executed, to identify the System.out messages. * @return A List of the Audit messages, sorted (so in chronological order). */ private ArrayList<String> getAuditMessages(String stepName) throws IOException { // ACCUMULO-3144 Make sure we give the processes enough time to flush the write buffer try { Thread.sleep(2000); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new IOException("Interrupted waiting for data to be flushed to output streams"); } for (MiniAccumuloClusterImpl.LogWriter lw : getCluster().getLogWriters()) { lw.flush(); } // Grab the audit messages System.out.println("Start of captured audit messages for step " + stepName); ArrayList<String> result = new ArrayList<String>(); File[] files = getCluster().getConfig().getLogDir().listFiles(); assertNotNull(files); for (File file : files) { // We want to grab the files called .out if (file.getName().contains(".out") && file.isFile() && file.canRead()) { LineIterator it = FileUtils.lineIterator(file, UTF_8.name()); try { while (it.hasNext()) { String line = it.nextLine(); if (line.matches(".* \\[" + AuditedSecurityOperation.AUDITLOG + "\\s*\\].*")) { // Only include the message if startTimestamp is null. or the message occurred after the startTimestamp value if ((lastAuditTimestamp == null) || (line.substring(0, 23).compareTo(lastAuditTimestamp) > 0)) result.add(line); } } } finally { LineIterator.closeQuietly(it); } } } Collections.sort(result); for (String s : result) { System.out.println(s); } System.out.println("End of captured audit messages for step " + stepName); if (result.size() > 0) lastAuditTimestamp = (result.get(result.size() - 1)).substring(0, 23); return result; }
From source file:org.apache.accumulo.test.AuditMessageIT.java
@Test public void testImportExportOperationsAudits() throws AccumuloSecurityException, AccumuloException, TableExistsException, TableNotFoundException, IOException, InterruptedException { conn.securityOperations().createLocalUser(AUDIT_USER_1, new PasswordToken(PASSWORD)); conn.securityOperations().grantSystemPermission(AUDIT_USER_1, SystemPermission.SYSTEM); conn.securityOperations().changeUserAuthorizations(AUDIT_USER_1, auths); grantEverySystemPriv(conn, AUDIT_USER_1); // Connect as Audit User and do a bunch of stuff. // Start testing activities here auditConnector = getCluster().getConnector(AUDIT_USER_1, new PasswordToken(PASSWORD)); auditConnector.tableOperations().create(OLD_TEST_TABLE_NAME); // Insert some play data BatchWriter bw = auditConnector.createBatchWriter(OLD_TEST_TABLE_NAME, new BatchWriterConfig()); Mutation m = new Mutation("myRow"); m.put("cf1", "cq1", "v1"); m.put("cf1", "cq2", "v3"); bw.addMutation(m);/*from w w w . jav a 2 s. c o m*/ bw.close(); // Prepare to export the table File exportDir = new File(getCluster().getConfig().getDir().toString() + "/export"); auditConnector.tableOperations().offline(OLD_TEST_TABLE_NAME); auditConnector.tableOperations().exportTable(OLD_TEST_TABLE_NAME, exportDir.toString()); // We've exported the table metadata to the MiniAccumuloCluster root dir. Grab the .rf file path to re-import it File distCpTxt = new File(exportDir.toString() + "/distcp.txt"); File importFile = null; LineIterator it = FileUtils.lineIterator(distCpTxt, UTF_8.name()); // Just grab the first rf file, it will do for now. String filePrefix = "file:"; try { while (it.hasNext() && importFile == null) { String line = it.nextLine(); if (line.matches(".*\\.rf")) { importFile = new File(line.replaceFirst(filePrefix, "")); } } } finally { LineIterator.closeQuietly(it); } FileUtils.copyFileToDirectory(importFile, exportDir); auditConnector.tableOperations().importTable(NEW_TEST_TABLE_NAME, exportDir.toString()); // Now do a Directory (bulk) import of the same data. auditConnector.tableOperations().create(THIRD_TEST_TABLE_NAME); File failDir = new File(exportDir + "/tmp"); assertTrue(failDir.mkdirs() || failDir.isDirectory()); auditConnector.tableOperations().importDirectory(THIRD_TEST_TABLE_NAME, exportDir.toString(), failDir.toString(), false); auditConnector.tableOperations().online(OLD_TEST_TABLE_NAME); // Stop testing activities here ArrayList<String> auditMessages = getAuditMessages("testImportExportOperationsAudits"); assertEquals(1, findAuditMessage(auditMessages, String .format(AuditedSecurityOperation.CAN_CREATE_TABLE_AUDIT_TEMPLATE, OLD_TEST_TABLE_NAME)) .size()); assertEquals(1, findAuditMessage(auditMessages, String.format(AuditedSecurityOperation.CAN_ONLINE_OFFLINE_TABLE_AUDIT_TEMPLATE, "offlineTable", OLD_TEST_TABLE_NAME)).size()); assertEquals(1, findAuditMessage(auditMessages, String.format(AuditedSecurityOperation.CAN_EXPORT_AUDIT_TEMPLATE, OLD_TEST_TABLE_NAME, exportDir.toString())).size()); assertEquals(1, findAuditMessage(auditMessages, String.format(AuditedSecurityOperation.CAN_IMPORT_AUDIT_TEMPLATE, NEW_TEST_TABLE_NAME, filePrefix + exportDir.toString())).size()); assertEquals(1, findAuditMessage(auditMessages, String .format(AuditedSecurityOperation.CAN_CREATE_TABLE_AUDIT_TEMPLATE, THIRD_TEST_TABLE_NAME)) .size()); assertEquals(1, findAuditMessage(auditMessages, String.format(AuditedSecurityOperation.CAN_BULK_IMPORT_AUDIT_TEMPLATE, THIRD_TEST_TABLE_NAME, filePrefix + exportDir.toString(), filePrefix + failDir.toString())).size()); assertEquals(1, findAuditMessage(auditMessages, String.format(AuditedSecurityOperation.CAN_ONLINE_OFFLINE_TABLE_AUDIT_TEMPLATE, "onlineTable", OLD_TEST_TABLE_NAME)).size()); }
From source file:org.apache.accumulo.test.AuditMessageTest.java
/** * Returns a List of Audit messages that have been grep'd out of the MiniAccumuloCluster output. * /* ww w . j a v a2s. c o m*/ * @param stepName * A unique name for the test being executed, to identify the System.out messages. * @return A List of the Audit messages, sorted (so in chronological order). */ private ArrayList<String> getAuditMessages(String stepName) throws IOException { for (MiniAccumuloCluster.LogWriter lw : logWriters) { lw.flush(); } // Grab the audit messages System.out.println("Start of captured audit messages for step " + stepName); ArrayList<String> result = new ArrayList<String>(); for (File file : logDir.listFiles()) { // We want to grab the files called .out if (file.getName().contains(".out") && file.isFile() && file.canRead()) { LineIterator it = FileUtils.lineIterator(file, Constants.UTF8.name()); try { while (it.hasNext()) { String line = it.nextLine(); if (line.matches(".* \\[" + AuditedSecurityOperation.AUDITLOG + "\\s*\\].*")) { // Only include the message if startTimestamp is null. or the message occurred after the startTimestamp value if ((lastAuditTimestamp == null) || (line.substring(0, 23).compareTo(lastAuditTimestamp) > 0)) result.add(line); } } } finally { LineIterator.closeQuietly(it); } } } Collections.sort(result); for (String s : result) { System.out.println(s); } System.out.println("End of captured audit messages for step " + stepName); if (result.size() > 0) lastAuditTimestamp = (result.get(result.size() - 1)).substring(0, 23); return result; }
From source file:org.apache.accumulo.test.AuditMessageTest.java
@Test(timeout = 60 * 1000) public void testImportExportOperationsAudits() throws AccumuloSecurityException, AccumuloException, TableExistsException, TableNotFoundException, IOException, InterruptedException { conn.securityOperations().createLocalUser(AUDIT_USER_1, new PasswordToken(PASSWORD)); conn.securityOperations().grantSystemPermission(AUDIT_USER_1, SystemPermission.SYSTEM); conn.securityOperations().changeUserAuthorizations(AUDIT_USER_1, auths); grantEverySystemPriv(conn, AUDIT_USER_1); // Connect as Audit User and do a bunch of stuff. // Start testing activities here auditConnector = accumulo.getConnector(AUDIT_USER_1, PASSWORD); auditConnector.tableOperations().create(OLD_TEST_TABLE_NAME); // Insert some play data BatchWriter bw = auditConnector.createBatchWriter(OLD_TEST_TABLE_NAME, new BatchWriterConfig()); Mutation m = new Mutation("myRow"); m.put("cf1", "cq1", "v1"); m.put("cf1", "cq2", "v3"); bw.addMutation(m);//from w w w .java2 s.c o m bw.close(); // Prepare to export the table File exportDir = new File(accumulo.getConfig().getDir().toString() + "/export"); auditConnector.tableOperations().offline(OLD_TEST_TABLE_NAME); auditConnector.tableOperations().exportTable(OLD_TEST_TABLE_NAME, exportDir.toString()); // We've exported the table metadata to the MiniAccumuloCluster root dir. Grab the .rf file path to re-import it File distCpTxt = new File(exportDir.toString() + "/distcp.txt"); File importFile = null; LineIterator it = FileUtils.lineIterator(distCpTxt, Constants.UTF8.name()); // Just grab the first rf file, it will do for now. String filePrefix = "file:"; try { while (it.hasNext() && importFile == null) { String line = it.nextLine(); if (line.matches(".*\\.rf")) { importFile = new File(line.replaceFirst(filePrefix, "")); } } } finally { LineIterator.closeQuietly(it); } FileUtils.copyFileToDirectory(importFile, exportDir); auditConnector.tableOperations().importTable(NEW_TEST_TABLE_NAME, exportDir.toString()); // Now do a Directory (bulk) import of the same data. auditConnector.tableOperations().create(THIRD_TEST_TABLE_NAME); File failDir = new File(exportDir + "/tmp"); failDir.mkdirs(); auditConnector.tableOperations().importDirectory(THIRD_TEST_TABLE_NAME, exportDir.toString(), failDir.toString(), false); auditConnector.tableOperations().online(OLD_TEST_TABLE_NAME); // Stop testing activities here ArrayList<String> auditMessages = getAuditMessages("testImportExportOperationsAudits"); assertEquals(1, findAuditMessage(auditMessages, String .format(AuditedSecurityOperation.CAN_CREATE_TABLE_AUDIT_TEMPLATE, OLD_TEST_TABLE_NAME)) .size()); assertEquals(1, findAuditMessage(auditMessages, String.format(AuditedSecurityOperation.CAN_ONLINE_OFFLINE_TABLE_AUDIT_TEMPLATE, "offlineTable", OLD_TEST_TABLE_NAME)).size()); assertEquals(1, findAuditMessage(auditMessages, String.format(AuditedSecurityOperation.CAN_EXPORT_AUDIT_TEMPLATE, OLD_TEST_TABLE_NAME, exportDir.toString())).size()); assertEquals(1, findAuditMessage(auditMessages, String.format(AuditedSecurityOperation.CAN_IMPORT_AUDIT_TEMPLATE, NEW_TEST_TABLE_NAME, exportDir.toString())).size()); assertEquals(1, findAuditMessage(auditMessages, String .format(AuditedSecurityOperation.CAN_CREATE_TABLE_AUDIT_TEMPLATE, THIRD_TEST_TABLE_NAME)) .size()); assertEquals(1, findAuditMessage(auditMessages, String.format(AuditedSecurityOperation.CAN_BULK_IMPORT_AUDIT_TEMPLATE, THIRD_TEST_TABLE_NAME, filePrefix + exportDir.toString(), filePrefix + failDir.toString())).size()); assertEquals(1, findAuditMessage(auditMessages, String.format(AuditedSecurityOperation.CAN_ONLINE_OFFLINE_TABLE_AUDIT_TEMPLATE, "onlineTable", OLD_TEST_TABLE_NAME)).size()); }
From source file:org.apache.jackrabbit.oak.plugins.blob.MarkSweepGarbageCollector.java
/** * Sweep phase of gc candidate deletion. * <p>/*from w w w . ja v a2 s .c om*/ * Performs the following steps depending upon the type of the blob store refer * {@link org.apache.jackrabbit.oak.plugins.blob.SharedDataStore.Type}: * * <ul> * <li>Shared</li> * <li> * <ul> * <li> Merge all marked references (from the mark phase run independently) available in the data store meta * store (from all configured independent repositories). * <li> Retrieve all blob ids available. * <li> Diffs the 2 sets above to retrieve list of blob ids not used. * <li> Deletes only blobs created after * (earliest time stamp of the marked references - #maxLastModifiedInterval) from the above set. * </ul> * </li> * * <li>Default</li> * <li> * <ul> * <li> Mark phase already run. * <li> Retrieve all blob ids available. * <li> Diffs the 2 sets above to retrieve list of blob ids not used. * <li> Deletes only blobs created after * (time stamp of the marked references - #maxLastModifiedInterval). * </ul> * </li> * </ul> * * @return the number of blobs deleted * @throws Exception the exception * @param fs the garbage collector file state * @param markStart the start time of mark to take as reference for deletion */ protected long sweep(GarbageCollectorFileState fs, long markStart) throws Exception { long earliestRefAvailTime; // Merge all the blob references available from all the reference files in the data store meta store // Only go ahead if merge succeeded try { earliestRefAvailTime = GarbageCollectionType.get(blobStore).mergeAllMarkedReferences(blobStore, fs); LOG.debug("Earliest reference available for timestamp [{}]", earliestRefAvailTime); earliestRefAvailTime = (earliestRefAvailTime < markStart ? earliestRefAvailTime : markStart); } catch (Exception e) { return 0; } // Find all blob references after iterating over the whole repository (new BlobIdRetriever(fs)).call(); // Calculate the references not used difference(fs); long count = 0; long deleted = 0; long lastMaxModifiedTime = getLastMaxModifiedTime(earliestRefAvailTime); LOG.debug("Starting sweep phase of the garbage collector"); LOG.debug("Sweeping blobs with modified time > than the configured max deleted time ({}). ", timestampToString(lastMaxModifiedTime)); ConcurrentLinkedQueue<String> exceptionQueue = new ConcurrentLinkedQueue<String>(); LineIterator iterator = FileUtils.lineIterator(fs.getGcCandidates(), Charsets.UTF_8.name()); List<String> ids = newArrayList(); while (iterator.hasNext()) { ids.add(iterator.next()); if (ids.size() >= getBatchCount()) { count += ids.size(); deleted += sweepInternal(ids, exceptionQueue, lastMaxModifiedTime); ids = newArrayList(); } } if (!ids.isEmpty()) { count += ids.size(); deleted += sweepInternal(ids, exceptionQueue, lastMaxModifiedTime); } BufferedWriter writer = null; try { if (!exceptionQueue.isEmpty()) { writer = Files.newWriter(fs.getGarbage(), Charsets.UTF_8); saveBatchToFile(newArrayList(exceptionQueue), writer); } } finally { LineIterator.closeQuietly(iterator); IOUtils.closeQuietly(writer); } if (!exceptionQueue.isEmpty()) { LOG.warn( "Unable to delete some blobs entries from the blob store. Details around such blob entries can " + "be found in [{}]", fs.getGarbage().getAbsolutePath()); } if (count != deleted) { LOG.warn( "Deleted only [{}] blobs entries from the [{}] candidates identified. This may happen if blob " + "modified time is > " + "than the max deleted time ({})", deleted, count, timestampToString(lastMaxModifiedTime)); } // Remove all the merged marked references GarbageCollectionType.get(blobStore).removeAllMarkedReferences(blobStore); LOG.debug("Ending sweep phase of the garbage collector"); return deleted; }
From source file:org.asqatasun.referential.creator.CodeGeneratorMojo.java
/** * * @return// www . j a v a 2 s .c o m */ private Iterable<CSVRecord> getCsv() { // we parse the csv file to extract the first line and get the headers LineIterator lineIterator; try { lineIterator = FileUtils.lineIterator(dataFile, Charset.defaultCharset().name()); } catch (IOException ex) { Logger.getLogger(CodeGeneratorMojo.class.getName()).log(Level.SEVERE, null, ex); lineIterator = null; } String[] csvHeaders = lineIterator != null ? lineIterator.next().split(String.valueOf(delimiter)) : new String[0]; isCriterionPresent = extractCriterionFromCsvHeader(csvHeaders); try { extractAvailableLangsFromCsvHeader(csvHeaders); } catch (I18NLanguageNotFoundException ex) { Logger.getLogger(CodeGeneratorMojo.class.getName()).log(Level.SEVERE, null, ex); return null; } // from here we just add each line to a build to re-create the csv content // without the first line. StringBuilder strb = new StringBuilder(); while (lineIterator.hasNext()) { strb.append(lineIterator.next()); strb.append("\n"); } Reader in; try { in = new StringReader(strb.toString()); CSVFormat csvf = CSVFormat.newFormat(delimiter).withHeader(csvHeaders); return csvf.parse(in); } catch (FileNotFoundException ex) { Logger.getLogger(CodeGeneratorMojo.class.getName()).log(Level.SEVERE, null, ex); return null; } catch (IOException ex) { Logger.getLogger(CodeGeneratorMojo.class.getName()).log(Level.SEVERE, null, ex); return null; } }
From source file:org.ednovo.data.handlers.FileInputProcessor.java
@Override public void handleRow(Object row) throws Exception { File folder = new File(fileInputData.get("file-path")); Collection<File> files = FileUtils.listFiles(folder, new WildcardFileFilter(fileInputData.get("path-pattern")), DirectoryFileFilter.DIRECTORY); StopWatch sw = new StopWatch(); for (final File file : files) { LOG.info("processing file {}", file.getAbsolutePath()); sw.start();/*from w w w.j a v a 2 s .c o m*/ long lines = 0; try { LineIterator it = FileUtils.lineIterator(file, "UTF-8"); try { while (it.hasNext()) { final String line = it.nextLine(); // Send the row to the next process handler. getNextRowHandler().processRow(line); lines++; if (lines % 1000 == 0) { LOG.info("file-lines: {} ", lines); } } } finally { LineIterator.closeQuietly(it); } } catch (IOException e) { LOG.error("Error processing file {} ", file.getAbsolutePath(), e); } sw.stop("file:" + file.getAbsolutePath() + ": lines= " + lines + " "); LOG.info(sw.toString(Integer.parseInt(lines + ""))); } }
From source file:org.freeeed.main.DocumentParser.java
/** * This function is specifically Memex crawler. *jl means JSON lines. * Furthermore, each JSON line has the expected fields * * @param fileName input file in *jl format * @param metadata extracted metadata/*from ww w . j a v a2 s . c o m*/ */ // TODO make the code more elegant, try-with-exceptions private void extractJlFields(String fileName, DocumentMetadata metadata) { LineIterator it = null; try { it = FileUtils.lineIterator(new File(fileName), "UTF-8"); while (it.hasNext()) { String jsonAsString = it.nextLine(); String htmlText = JsonParser.getJsonField(jsonAsString, "extracted_text"); String text = Jsoup.parse(htmlText).text(); metadata.set(DocumentMetadataKeys.DOCUMENT_TEXT, text); metadata.setContentType("application/jl"); } } catch (IOException e) { LOGGER.error("Problem with JSON line", e); } finally { assert it != null; it.close(); } }