Example usage for org.apache.commons.io FileUtils lineIterator

List of usage examples for org.apache.commons.io FileUtils lineIterator

Introduction

In this page you can find the example usage for org.apache.commons.io FileUtils lineIterator.

Prototype

public static LineIterator lineIterator(File file, String encoding) throws IOException 

Source Link

Document

Returns an Iterator for the lines in a File.

Usage

From source file:org.wikimedia.analytics.varnishkafka.Cli.java

private Integer writeJsonOutput() {
    int n = 0;/* w w w .  java  2  s  .c om*/
    JsonFactory jfactory = new JsonFactory();

    /*** write to file ***/
    try {
        JsonGenerator jGenerator;

        SnappyOutputStream snappyOutputStream = null;
        File outputFile = new File(cwd.getPath(), "test." + getFormat());
        OutputStream out = new FileOutputStream(outputFile);
        BufferedOutputStream bos = new BufferedOutputStream(out);

        if (compress) {
            snappyOutputStream = new SnappyOutputStream(bos);
            jGenerator = jfactory.createJsonGenerator(snappyOutputStream, JsonEncoding.UTF8);
        } else {
            jGenerator = jfactory.createJsonGenerator(bos, JsonEncoding.UTF8);
        }

        log.info("Output file path: " + outputFile.toString());

        LineIterator it = FileUtils.lineIterator(inputFile, "UTF-8");

        try {
            setStart(System.nanoTime());
            while (it.hasNext()) {
                n++;
                String line = it.nextLine();
                String[] fields = line.split("\\t");

                jGenerator.writeStartObject();

                jGenerator.writeNumberField("kafka_offset", Long.parseLong(fields[0]));
                jGenerator.writeStringField("host", fields[1]);
                jGenerator.writeNumberField("seq_num", Long.parseLong(fields[2]));
                jGenerator.writeStringField("timestamp", fields[3]);
                jGenerator.writeNumberField("response", Float.parseFloat(fields[4]));
                jGenerator.writeStringField("ip", fields[5]);
                jGenerator.writeStringField("http_status", fields[6]);
                jGenerator.writeNumberField("bytes_sent", parseBytesSent(fields[7]));
                jGenerator.writeStringField("request_method", fields[8]);
                jGenerator.writeStringField("uri", fields[9]);
                jGenerator.writeStringField("proxy_host", fields[10]);
                jGenerator.writeStringField("mime_type", fields[11]);
                jGenerator.writeStringField("referer", fields[12]);
                jGenerator.writeStringField("x_forwarded_for", fields[13]);
                jGenerator.writeStringField("user_agent", fields[14]);
                jGenerator.writeStringField("accept_language", fields[15]);
                jGenerator.writeStringField("x_analytics", fields[16]);

                jGenerator.writeEndObject();
            }
            setEnd(System.nanoTime());
        } finally {
            it.close();
            jGenerator.flush();
            jGenerator.close();
            if (compress) {
                snappyOutputStream.close();
            } else {
                out.close();
                bos.close();
            }
        }
    } catch (JsonGenerationException e) {
        e.printStackTrace();
    } catch (JsonMappingException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
    return n;
}

From source file:org.wikimedia.analytics.varnishkafka.Cli.java

private Integer writeProtobufOutput() {
    int n = 0;//from   w  ww  .j a  v  a2 s.  c o m
    try {
        LineIterator it = FileUtils.lineIterator(inputFile, "UTF-8");
        File outputFile = new File(cwd.getPath(), "test." + getFormat());
        outputFile.delete();
        OutputStream out = new FileOutputStream(outputFile);
        BufferedOutputStream bos = new BufferedOutputStream(out);
        SnappyOutputStream snappyOutputStream = null;

        if (compress) {
            snappyOutputStream = new SnappyOutputStream(bos);
        }

        log.info("Output file path: " + outputFile.toString());
        try {
            setStart(System.nanoTime());
            while (it.hasNext()) {
                n++;
                String line = it.nextLine();
                String[] fields = line.split("\\t");
                Logline.LogLine logline = Logline.LogLine.newBuilder().setKafkaOffset(Long.parseLong(fields[0]))
                        .setHost(fields[1]).setSeqNum(Long.parseLong(fields[2])).setTimestamp(fields[3])
                        .setResponse(Float.parseFloat(fields[4])).setIp(fields[5]).setHttpStatus(fields[6])
                        .setBytesSent(parseBytesSent(fields[7])).setRequestMethod(fields[8]).setUri(fields[9])
                        .setProxyHost(fields[10]).setMimeType(fields[11]).setReferer(fields[12])
                        .setXForwardedFor(fields[13]).setUserAgent(fields[14]).setAcceptLanguage(fields[15])
                        .setXAnalytics(fields[16]).build();

                if (compress) {
                    snappyOutputStream.write(logline.toByteArray());
                } else {
                    bos.write(logline.toByteArray());
                }
            }
            setEnd(System.nanoTime());
        } finally {
            try {
                bos.flush();
                out.flush();
                out.close();
                bos.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }
    return n;
}

From source file:org.wikimedia.analytics.varnishkafka.Cli.java

private Integer writeAvroOutput() {
    Schema schema = null;/*from  w ww .  j  a v a 2s  . c  o  m*/
    int n = 0;

    try {
        InputStream inputStream = ClassLoader.getSystemClassLoader()
                .getResourceAsStream("WebRequest.avro.json");
        schema = new Schema.Parser().parse(inputStream);
        inputStream.close();

        File file = new File(cwd.getPath(), "test." + getFormat());
        log.info("Output file path: " + file.toString());
        file.delete();
        DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
        DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(writer);

        if (compress) {
            dataFileWriter.setCodec(CodecFactory.snappyCodec());
        }

        dataFileWriter.create(schema, file);

        try {
            LineIterator it = FileUtils.lineIterator(inputFile, "UTF-8");

            try {
                setStart(System.nanoTime());
                while (it.hasNext()) {
                    n++;
                    String line = it.nextLine();
                    String[] fields = line.split("\\t");

                    // Populate data
                    GenericRecord r = new GenericData.Record(schema);
                    r.put("kafka_offset", Long.parseLong(fields[0]));
                    r.put("host", fields[1]);
                    r.put("seq_num", Long.parseLong(fields[2]));
                    r.put("timestamp", fields[3]);
                    r.put("response", Float.parseFloat(fields[4]));
                    r.put("ip", fields[5]);
                    r.put("http_status", fields[6]);
                    r.put("bytes_sent", parseBytesSent(fields[7]));
                    r.put("request_method", fields[8]);
                    r.put("uri", fields[9]);
                    r.put("proxy_host", fields[10]);
                    r.put("mime_type", fields[11]);
                    r.put("referer", fields[12]);
                    r.put("x_forwarded_for", fields[13]);
                    r.put("user_agent", fields[14]);
                    r.put("accept_language", fields[15]);
                    r.put("x_analytics", fields[16]);
                    dataFileWriter.append(r);
                }

                setEnd(System.nanoTime());
            } finally {
                dataFileWriter.flush();
                dataFileWriter.close();
            }
        } catch (IOException ex) {
            ex.printStackTrace();
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return n;
}

From source file:org.wso2.identity.integration.test.user.mgt.UserImportLoggingTestCase.java

/**
 * Read the audit log file and extract the log entries as lines.
 *
 * @return : An Array List which contains audit log lines.
 * @throws IOException : If any error occurred while reading the file.
 */// www  .ja v a  2  s .  c  o  m
private List<String> readAuditLogFile() throws IOException {

    List<String> bulkUserImportAuditLogs = new ArrayList<>();
    String auditLogFile = LOG_FILE_LOCATION + File.separatorChar + AUDIT_LOG_FILE_NAME;
    File auditFile = new File(auditLogFile);

    // Iterate through the file and read lines.
    LineIterator iterator = FileUtils.lineIterator(auditFile, ENCODING);

    while (iterator.hasNext()) {
        String auditLine = iterator.nextLine();

        if (StringUtils.contains(auditLine, BULK_USER_IMPORT_OP)) {
            bulkUserImportAuditLogs.add(auditLine);
        }
    }
    return bulkUserImportAuditLogs;
}

From source file:Parsing.DeweyParser.java

public ArrayList<Dewey> parseDewey() {
    ArrayList<Dewey> deweys = new ArrayList<>();
    try {//from   w ww.  j  av  a  2 s  . c o  m
        LineIterator reader = FileUtils.lineIterator(file, "UTF-8");
        LOG.debug(file.getName());
        String line;

        while (reader.hasNext()) {

            line = reader.next();
            if (!line.equals("")) {
                deweys.add(getDewey(line));
            }
        }

    } catch (IOException ex) {
        LOG.error("IOException", ex);
    }
    return deweys;
}

From source file:playground.anhorni.locationchoice.preprocess.facilities.facilitiescreation.fromBZ.entreprisecensus.EnterpriseCensusParser.java

private final void readPresenceCodes(EnterpriseCensus ec) {

    log.info("Reading the presence code file...");

    int lineCounter = 0;
    int skip = 1;

    String filename = presenceCodeFile;
    String separator = ";";
    File file = new File(filename);

    LineIterator it = null;/*from  w  ww . j av a2  s  . c o  m*/
    String line = null;
    String[] tokens = null;
    String reli = null;

    try {
        it = FileUtils.lineIterator(file, "UTF-8");
    } catch (IOException e) {
        e.printStackTrace();
    }
    try {
        while (it.hasNext()) {
            line = it.nextLine();
            tokens = line.split(separator);

            if (lineCounter == 0) {
                log.info("Processing header line...");
                for (String token : tokens) {
                    ec.addPresenceCodeNOGAType(token.replaceAll("\"", ""));
                }
                log.info("Processing header line...done.");
            } else {

                reli = tokens[0];
                for (int pos = 0; pos < tokens.length; pos++) {
                    if (Pattern.matches("1", tokens[pos])) {
                        ec.addPresenceCode(reli, ec.getPresenceCodeNOGAType(pos));
                    }
                }
            }
            lineCounter++;
            if (lineCounter % skip == 0) {
                log.info("Processed hectares: " + Integer.toString(lineCounter));
                skip *= 2;
            }
        }
    } finally {
        LineIterator.closeQuietly(it);
    }
    log.info("Processed hectares: " + Integer.toString(lineCounter));
    log.info("Reading the presence code file...done.");
}

From source file:playground.anhorni.locationchoice.preprocess.facilities.facilitiescreation.fromBZ.entreprisecensus.EnterpriseCensusParser.java

private final void readHectareAggregations(EnterpriseCensus ec) {

    log.info("Reading the hectare aggregation file...");

    String separator = ",";
    String filename = inputHectareAggregationFile;
    File file = new File(filename);

    LineIterator it = null;/*from  ww  w .  j a  va  2  s.c om*/
    String line = null;
    String[] tokens = null;
    String reli = null;
    int lineCounter = 0, skip = 1;

    try {
        it = FileUtils.lineIterator(file, "UTF-8");
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    try {
        while (it.hasNext()) {
            line = it.nextLine();
            tokens = line.split(separator);

            if (lineCounter == 0) {
                log.info("Processing header line...");
                for (String token : tokens) {
                    ec.addhectareAggregationNOGAType(token.replaceAll("\"", ""));
                }
                log.info("Processing header line...done.");
            } else {

                reli = tokens[0];
                for (int pos = 0; pos < tokens.length; pos++) {
                    if (!Pattern.matches("0", tokens[pos])) {
                        ec.addHectareAggregationInformation(reli, ec.getHectareAggregationNOGAType(pos),
                                Double.parseDouble(tokens[pos]));
                    }
                }
            }

            lineCounter++;
            if (lineCounter % skip == 0) {
                log.info("Processed hectares: " + Integer.toString(lineCounter));
                skip *= 2;
            }
        }
    } finally {
        LineIterator.closeQuietly(it);
    }
    log.info("Processed hectares: " + Integer.toString(lineCounter));
    log.info("Reading the hectare aggregation file...done.");
}

From source file:playground.meisterk.org.matsim.enterprisecensus.EnterpriseCensusParser.java

private final void readPresenceCodes(EnterpriseCensus ec, final Config config) {

    log.info("Reading the presence code file...");

    int lineCounter = 0;
    int skip = 1;

    String filename = config.getParam(EnterpriseCensus.EC_MODULE, EnterpriseCensus.EC_PRESENCECODEFILE);
    String separator = config.getParam(EnterpriseCensus.EC_MODULE, EnterpriseCensus.EC_PRESENCECODESEPARATOR);

    File file = new File(filename);

    LineIterator it = null;/*from  w ww.  ja  v  a  2  s . co m*/
    String line = null;
    String[] tokens = null;
    String reli = null;

    try {
        it = FileUtils.lineIterator(file, "UTF-8");
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    try {
        while (it.hasNext()) {
            line = it.nextLine();
            tokens = line.split(separator);

            if (lineCounter == 0) {
                log.info("Processing header line...");
                for (String token : tokens) {
                    ec.addPresenceCodeNOGAType(token.replaceAll("\"", ""));
                }
                log.info("Processing header line...done.");
            } else {

                reli = tokens[0];
                for (int pos = 0; pos < tokens.length; pos++) {
                    if (Pattern.matches("1", tokens[pos])) {
                        ec.addPresenceCode(reli, ec.getPresenceCodeNOGAType(pos));
                    }
                }
            }

            lineCounter++;
            if (lineCounter % skip == 0) {
                log.info("Processed hectares: " + Integer.toString(lineCounter));
                skip *= 2;
            }
        }
    } finally {
        LineIterator.closeQuietly(it);
    }

    log.info("Processed hectares: " + Integer.toString(lineCounter));

    log.info("Reading the presence code file...done.");

}

From source file:playground.meisterk.org.matsim.enterprisecensus.EnterpriseCensusParser.java

private final void readHectareAggregations(EnterpriseCensus ec, Config config) {

    log.info("Reading the hectare aggregation file...");

    String filename = config.getParam(EnterpriseCensus.EC_MODULE,
            EnterpriseCensus.EC_INPUTHECTAREAGGREGATIONFILE);
    String separator = config.getParam(EnterpriseCensus.EC_MODULE,
            EnterpriseCensus.EC_INPUTHECTAREAGGREGATIONSEPARATOR);
    File file = new File(filename);

    LineIterator it = null;/*w ww.  jav  a 2s .c o m*/
    String line = null;
    String[] tokens = null;
    String reli = null;
    int lineCounter = 0, skip = 1;

    try {
        it = FileUtils.lineIterator(file, "UTF-8");
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    try {
        while (it.hasNext()) {
            line = it.nextLine();
            tokens = line.split(separator);

            if (lineCounter == 0) {
                log.info("Processing header line...");
                for (String token : tokens) {
                    ec.addhectareAggregationNOGAType(token.replaceAll("\"", ""));
                }
                log.info("Processing header line...done.");
            } else {

                reli = tokens[0];
                for (int pos = 0; pos < tokens.length; pos++) {
                    if (!Pattern.matches("0", tokens[pos])) {
                        ec.addHectareAggregationInformation(reli, ec.getHectareAggregationNOGAType(pos),
                                Double.parseDouble(tokens[pos]));
                    }
                }
            }

            lineCounter++;
            if (lineCounter % skip == 0) {
                log.info("Processed hectares: " + Integer.toString(lineCounter));
                skip *= 2;
            }
        }
    } finally {
        LineIterator.closeQuietly(it);
    }

    log.info("Processed hectares: " + Integer.toString(lineCounter));

    log.info("Reading the hectare aggregation file...done.");

}

From source file:playground.staheale.preprocess.AgentInteractionEnterpriseCensusParser.java

private final void readPresenceCodes(AgentInteractionEnterpriseCensus ec) {

    log.info("Reading the presence code file...");

    int lineCounter = 0;
    int skip = 1;

    String filename = presenceCodeFile;
    String separator = ";";

    File file = new File(filename);

    LineIterator it = null;//from   w w  w  . jav a 2  s. c  om
    String line = null;
    String[] tokens = null;
    String reli = null;

    try {
        it = FileUtils.lineIterator(file, "UTF-8");
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    try {
        while (it.hasNext()) {
            line = it.nextLine();
            tokens = line.split(separator);

            if (lineCounter == 0) {
                log.info("Processing header line...");
                for (String token : tokens) {
                    ec.addPresenceCodeNOGAType(token.replaceAll("\"", ""));
                }
                log.info("Processing header line...done.");
            } else {

                reli = tokens[0];
                for (int pos = 0; pos < tokens.length; pos++) {
                    if (Pattern.matches("1", tokens[pos])) {
                        ec.addPresenceCode(reli, ec.getPresenceCodeNOGAType(pos));
                    }
                }
            }

            lineCounter++;
            if (lineCounter % skip == 0) {
                log.info("Processed hectares: " + Integer.toString(lineCounter));
                skip *= 2;
            }
        }
    } finally {
        LineIterator.closeQuietly(it);
    }

    log.info("Processed hectares: " + Integer.toString(lineCounter));

    log.info("Reading the presence code file...done.");

}