List of usage examples for org.apache.commons.io FileUtils lineIterator
public static LineIterator lineIterator(File file, String encoding) throws IOException
File
. From source file:org.wikimedia.analytics.varnishkafka.Cli.java
private Integer writeJsonOutput() { int n = 0;/* w w w . java 2 s .c om*/ JsonFactory jfactory = new JsonFactory(); /*** write to file ***/ try { JsonGenerator jGenerator; SnappyOutputStream snappyOutputStream = null; File outputFile = new File(cwd.getPath(), "test." + getFormat()); OutputStream out = new FileOutputStream(outputFile); BufferedOutputStream bos = new BufferedOutputStream(out); if (compress) { snappyOutputStream = new SnappyOutputStream(bos); jGenerator = jfactory.createJsonGenerator(snappyOutputStream, JsonEncoding.UTF8); } else { jGenerator = jfactory.createJsonGenerator(bos, JsonEncoding.UTF8); } log.info("Output file path: " + outputFile.toString()); LineIterator it = FileUtils.lineIterator(inputFile, "UTF-8"); try { setStart(System.nanoTime()); while (it.hasNext()) { n++; String line = it.nextLine(); String[] fields = line.split("\\t"); jGenerator.writeStartObject(); jGenerator.writeNumberField("kafka_offset", Long.parseLong(fields[0])); jGenerator.writeStringField("host", fields[1]); jGenerator.writeNumberField("seq_num", Long.parseLong(fields[2])); jGenerator.writeStringField("timestamp", fields[3]); jGenerator.writeNumberField("response", Float.parseFloat(fields[4])); jGenerator.writeStringField("ip", fields[5]); jGenerator.writeStringField("http_status", fields[6]); jGenerator.writeNumberField("bytes_sent", parseBytesSent(fields[7])); jGenerator.writeStringField("request_method", fields[8]); jGenerator.writeStringField("uri", fields[9]); jGenerator.writeStringField("proxy_host", fields[10]); jGenerator.writeStringField("mime_type", fields[11]); jGenerator.writeStringField("referer", fields[12]); jGenerator.writeStringField("x_forwarded_for", fields[13]); jGenerator.writeStringField("user_agent", fields[14]); jGenerator.writeStringField("accept_language", fields[15]); jGenerator.writeStringField("x_analytics", fields[16]); jGenerator.writeEndObject(); } setEnd(System.nanoTime()); } finally { it.close(); jGenerator.flush(); jGenerator.close(); if (compress) { snappyOutputStream.close(); } else { out.close(); bos.close(); } } } catch (JsonGenerationException e) { e.printStackTrace(); } catch (JsonMappingException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return n; }
From source file:org.wikimedia.analytics.varnishkafka.Cli.java
private Integer writeProtobufOutput() { int n = 0;//from w ww .j a v a2 s. c o m try { LineIterator it = FileUtils.lineIterator(inputFile, "UTF-8"); File outputFile = new File(cwd.getPath(), "test." + getFormat()); outputFile.delete(); OutputStream out = new FileOutputStream(outputFile); BufferedOutputStream bos = new BufferedOutputStream(out); SnappyOutputStream snappyOutputStream = null; if (compress) { snappyOutputStream = new SnappyOutputStream(bos); } log.info("Output file path: " + outputFile.toString()); try { setStart(System.nanoTime()); while (it.hasNext()) { n++; String line = it.nextLine(); String[] fields = line.split("\\t"); Logline.LogLine logline = Logline.LogLine.newBuilder().setKafkaOffset(Long.parseLong(fields[0])) .setHost(fields[1]).setSeqNum(Long.parseLong(fields[2])).setTimestamp(fields[3]) .setResponse(Float.parseFloat(fields[4])).setIp(fields[5]).setHttpStatus(fields[6]) .setBytesSent(parseBytesSent(fields[7])).setRequestMethod(fields[8]).setUri(fields[9]) .setProxyHost(fields[10]).setMimeType(fields[11]).setReferer(fields[12]) .setXForwardedFor(fields[13]).setUserAgent(fields[14]).setAcceptLanguage(fields[15]) .setXAnalytics(fields[16]).build(); if (compress) { snappyOutputStream.write(logline.toByteArray()); } else { bos.write(logline.toByteArray()); } } setEnd(System.nanoTime()); } finally { try { bos.flush(); out.flush(); out.close(); bos.close(); } catch (IOException e) { e.printStackTrace(); } } } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } return n; }
From source file:org.wikimedia.analytics.varnishkafka.Cli.java
private Integer writeAvroOutput() { Schema schema = null;/*from w ww . j a v a 2s . c o m*/ int n = 0; try { InputStream inputStream = ClassLoader.getSystemClassLoader() .getResourceAsStream("WebRequest.avro.json"); schema = new Schema.Parser().parse(inputStream); inputStream.close(); File file = new File(cwd.getPath(), "test." + getFormat()); log.info("Output file path: " + file.toString()); file.delete(); DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(writer); if (compress) { dataFileWriter.setCodec(CodecFactory.snappyCodec()); } dataFileWriter.create(schema, file); try { LineIterator it = FileUtils.lineIterator(inputFile, "UTF-8"); try { setStart(System.nanoTime()); while (it.hasNext()) { n++; String line = it.nextLine(); String[] fields = line.split("\\t"); // Populate data GenericRecord r = new GenericData.Record(schema); r.put("kafka_offset", Long.parseLong(fields[0])); r.put("host", fields[1]); r.put("seq_num", Long.parseLong(fields[2])); r.put("timestamp", fields[3]); r.put("response", Float.parseFloat(fields[4])); r.put("ip", fields[5]); r.put("http_status", fields[6]); r.put("bytes_sent", parseBytesSent(fields[7])); r.put("request_method", fields[8]); r.put("uri", fields[9]); r.put("proxy_host", fields[10]); r.put("mime_type", fields[11]); r.put("referer", fields[12]); r.put("x_forwarded_for", fields[13]); r.put("user_agent", fields[14]); r.put("accept_language", fields[15]); r.put("x_analytics", fields[16]); dataFileWriter.append(r); } setEnd(System.nanoTime()); } finally { dataFileWriter.flush(); dataFileWriter.close(); } } catch (IOException ex) { ex.printStackTrace(); } } catch (IOException e) { e.printStackTrace(); } return n; }
From source file:org.wso2.identity.integration.test.user.mgt.UserImportLoggingTestCase.java
/** * Read the audit log file and extract the log entries as lines. * * @return : An Array List which contains audit log lines. * @throws IOException : If any error occurred while reading the file. */// www .ja v a 2 s . c o m private List<String> readAuditLogFile() throws IOException { List<String> bulkUserImportAuditLogs = new ArrayList<>(); String auditLogFile = LOG_FILE_LOCATION + File.separatorChar + AUDIT_LOG_FILE_NAME; File auditFile = new File(auditLogFile); // Iterate through the file and read lines. LineIterator iterator = FileUtils.lineIterator(auditFile, ENCODING); while (iterator.hasNext()) { String auditLine = iterator.nextLine(); if (StringUtils.contains(auditLine, BULK_USER_IMPORT_OP)) { bulkUserImportAuditLogs.add(auditLine); } } return bulkUserImportAuditLogs; }
From source file:Parsing.DeweyParser.java
public ArrayList<Dewey> parseDewey() { ArrayList<Dewey> deweys = new ArrayList<>(); try {//from w ww. j av a 2 s . c o m LineIterator reader = FileUtils.lineIterator(file, "UTF-8"); LOG.debug(file.getName()); String line; while (reader.hasNext()) { line = reader.next(); if (!line.equals("")) { deweys.add(getDewey(line)); } } } catch (IOException ex) { LOG.error("IOException", ex); } return deweys; }
From source file:playground.anhorni.locationchoice.preprocess.facilities.facilitiescreation.fromBZ.entreprisecensus.EnterpriseCensusParser.java
private final void readPresenceCodes(EnterpriseCensus ec) { log.info("Reading the presence code file..."); int lineCounter = 0; int skip = 1; String filename = presenceCodeFile; String separator = ";"; File file = new File(filename); LineIterator it = null;/*from w ww . j av a2 s . c o m*/ String line = null; String[] tokens = null; String reli = null; try { it = FileUtils.lineIterator(file, "UTF-8"); } catch (IOException e) { e.printStackTrace(); } try { while (it.hasNext()) { line = it.nextLine(); tokens = line.split(separator); if (lineCounter == 0) { log.info("Processing header line..."); for (String token : tokens) { ec.addPresenceCodeNOGAType(token.replaceAll("\"", "")); } log.info("Processing header line...done."); } else { reli = tokens[0]; for (int pos = 0; pos < tokens.length; pos++) { if (Pattern.matches("1", tokens[pos])) { ec.addPresenceCode(reli, ec.getPresenceCodeNOGAType(pos)); } } } lineCounter++; if (lineCounter % skip == 0) { log.info("Processed hectares: " + Integer.toString(lineCounter)); skip *= 2; } } } finally { LineIterator.closeQuietly(it); } log.info("Processed hectares: " + Integer.toString(lineCounter)); log.info("Reading the presence code file...done."); }
From source file:playground.anhorni.locationchoice.preprocess.facilities.facilitiescreation.fromBZ.entreprisecensus.EnterpriseCensusParser.java
private final void readHectareAggregations(EnterpriseCensus ec) { log.info("Reading the hectare aggregation file..."); String separator = ","; String filename = inputHectareAggregationFile; File file = new File(filename); LineIterator it = null;/*from ww w . j a va 2 s.c om*/ String line = null; String[] tokens = null; String reli = null; int lineCounter = 0, skip = 1; try { it = FileUtils.lineIterator(file, "UTF-8"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } try { while (it.hasNext()) { line = it.nextLine(); tokens = line.split(separator); if (lineCounter == 0) { log.info("Processing header line..."); for (String token : tokens) { ec.addhectareAggregationNOGAType(token.replaceAll("\"", "")); } log.info("Processing header line...done."); } else { reli = tokens[0]; for (int pos = 0; pos < tokens.length; pos++) { if (!Pattern.matches("0", tokens[pos])) { ec.addHectareAggregationInformation(reli, ec.getHectareAggregationNOGAType(pos), Double.parseDouble(tokens[pos])); } } } lineCounter++; if (lineCounter % skip == 0) { log.info("Processed hectares: " + Integer.toString(lineCounter)); skip *= 2; } } } finally { LineIterator.closeQuietly(it); } log.info("Processed hectares: " + Integer.toString(lineCounter)); log.info("Reading the hectare aggregation file...done."); }
From source file:playground.meisterk.org.matsim.enterprisecensus.EnterpriseCensusParser.java
private final void readPresenceCodes(EnterpriseCensus ec, final Config config) { log.info("Reading the presence code file..."); int lineCounter = 0; int skip = 1; String filename = config.getParam(EnterpriseCensus.EC_MODULE, EnterpriseCensus.EC_PRESENCECODEFILE); String separator = config.getParam(EnterpriseCensus.EC_MODULE, EnterpriseCensus.EC_PRESENCECODESEPARATOR); File file = new File(filename); LineIterator it = null;/*from w ww. ja v a 2 s . co m*/ String line = null; String[] tokens = null; String reli = null; try { it = FileUtils.lineIterator(file, "UTF-8"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } try { while (it.hasNext()) { line = it.nextLine(); tokens = line.split(separator); if (lineCounter == 0) { log.info("Processing header line..."); for (String token : tokens) { ec.addPresenceCodeNOGAType(token.replaceAll("\"", "")); } log.info("Processing header line...done."); } else { reli = tokens[0]; for (int pos = 0; pos < tokens.length; pos++) { if (Pattern.matches("1", tokens[pos])) { ec.addPresenceCode(reli, ec.getPresenceCodeNOGAType(pos)); } } } lineCounter++; if (lineCounter % skip == 0) { log.info("Processed hectares: " + Integer.toString(lineCounter)); skip *= 2; } } } finally { LineIterator.closeQuietly(it); } log.info("Processed hectares: " + Integer.toString(lineCounter)); log.info("Reading the presence code file...done."); }
From source file:playground.meisterk.org.matsim.enterprisecensus.EnterpriseCensusParser.java
private final void readHectareAggregations(EnterpriseCensus ec, Config config) { log.info("Reading the hectare aggregation file..."); String filename = config.getParam(EnterpriseCensus.EC_MODULE, EnterpriseCensus.EC_INPUTHECTAREAGGREGATIONFILE); String separator = config.getParam(EnterpriseCensus.EC_MODULE, EnterpriseCensus.EC_INPUTHECTAREAGGREGATIONSEPARATOR); File file = new File(filename); LineIterator it = null;/*w ww. jav a 2s .c o m*/ String line = null; String[] tokens = null; String reli = null; int lineCounter = 0, skip = 1; try { it = FileUtils.lineIterator(file, "UTF-8"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } try { while (it.hasNext()) { line = it.nextLine(); tokens = line.split(separator); if (lineCounter == 0) { log.info("Processing header line..."); for (String token : tokens) { ec.addhectareAggregationNOGAType(token.replaceAll("\"", "")); } log.info("Processing header line...done."); } else { reli = tokens[0]; for (int pos = 0; pos < tokens.length; pos++) { if (!Pattern.matches("0", tokens[pos])) { ec.addHectareAggregationInformation(reli, ec.getHectareAggregationNOGAType(pos), Double.parseDouble(tokens[pos])); } } } lineCounter++; if (lineCounter % skip == 0) { log.info("Processed hectares: " + Integer.toString(lineCounter)); skip *= 2; } } } finally { LineIterator.closeQuietly(it); } log.info("Processed hectares: " + Integer.toString(lineCounter)); log.info("Reading the hectare aggregation file...done."); }
From source file:playground.staheale.preprocess.AgentInteractionEnterpriseCensusParser.java
private final void readPresenceCodes(AgentInteractionEnterpriseCensus ec) { log.info("Reading the presence code file..."); int lineCounter = 0; int skip = 1; String filename = presenceCodeFile; String separator = ";"; File file = new File(filename); LineIterator it = null;//from w w w . jav a 2 s. c om String line = null; String[] tokens = null; String reli = null; try { it = FileUtils.lineIterator(file, "UTF-8"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } try { while (it.hasNext()) { line = it.nextLine(); tokens = line.split(separator); if (lineCounter == 0) { log.info("Processing header line..."); for (String token : tokens) { ec.addPresenceCodeNOGAType(token.replaceAll("\"", "")); } log.info("Processing header line...done."); } else { reli = tokens[0]; for (int pos = 0; pos < tokens.length; pos++) { if (Pattern.matches("1", tokens[pos])) { ec.addPresenceCode(reli, ec.getPresenceCodeNOGAType(pos)); } } } lineCounter++; if (lineCounter % skip == 0) { log.info("Processed hectares: " + Integer.toString(lineCounter)); skip *= 2; } } } finally { LineIterator.closeQuietly(it); } log.info("Processed hectares: " + Integer.toString(lineCounter)); log.info("Reading the presence code file...done."); }