Example usage for org.apache.commons.io FileUtils lineIterator

List of usage examples for org.apache.commons.io FileUtils lineIterator

Introduction

In this page you can find the example usage for org.apache.commons.io FileUtils lineIterator.

Prototype

public static LineIterator lineIterator(File file, String encoding) throws IOException 

Source Link

Document

Returns an Iterator for the lines in a File.

Usage

From source file:de.tum.i13.ConvertCsvToProtobuf.java

public static void main(String args[]) {
    try {/*from  www.  j  a va  2  s  . c o  m*/
        LineIterator it = FileUtils.lineIterator(new File("/Users/manit/Projects/sdcbenchmark/Dataset/debscsv"),
                "UTF-8");
        FileOutputStream out = new FileOutputStream("/Users/manit/Projects/sdcbenchmark/Dataset/debsprotobuf",
                true);

        while (it.hasNext()) {

            String csvLine = (String) it.next();
            byte[] csvLineBytes = csvLine.getBytes();
            String line = new String(csvLineBytes, StandardCharsets.UTF_8);
            Debs2015Protos.Taxitrip.Builder builder = Debs2015Protos.Taxitrip.newBuilder();
            String[] splitted = line.split(",");

            builder.setMedallion(splitted[0]);
            builder.setHackLicense(splitted[1]);
            builder.setPickupDatetime(splitted[2]);
            builder.setDropoffDatetime(splitted[3]);
            builder.setTripTimeInSecs(Integer.parseInt(splitted[4]));
            builder.setTripDistance(Float.parseFloat(splitted[5]));
            builder.setPickupLongitude(Float.parseFloat(splitted[6]));
            builder.setPickupLatitude(Float.parseFloat(splitted[7]));
            builder.setDropoffLongitude(Float.parseFloat(splitted[8]));
            builder.setDropoffLatitude(Float.parseFloat(splitted[9]));
            builder.setPaymentType(splitted[10]);
            builder.setFareAmount(Float.parseFloat(splitted[11]));
            builder.setSurcharge(Float.parseFloat(splitted[12]));
            builder.setMtaTax(Float.parseFloat(splitted[13]));
            builder.setTipAmount(Float.parseFloat(splitted[14]));
            builder.setTollsAmount(Float.parseFloat(splitted[15]));
            builder.setTotalAmount(Float.parseFloat(splitted[16]));

            builder.build().writeDelimitedTo(out);
        }
        out.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:de.tu.darmstadt.lt.ner.preprocessing.SentenceToCRFWriter.java

public static void main(String[] args) throws UIMAException, IllegalArgumentException, IOException {
    LineIterator sentIt = FileUtils.lineIterator(new File(args[0]), "UTF-8");
    List<String> sentences = new ArrayList<String>();
    StringBuilder sb = new StringBuilder();
    int index = 0;
    while (sentIt.hasNext()) {
        String line = sentIt.nextLine().toString().trim().split("\t")[1];
        if (line.equals("")) {
            continue;
        }// w  ww  .  jav a2s  . c o  m
        sentences.add(line);
    }
    GermaNERMain.sentenceToCRFFormat(sentences, args[1], "de");
}

From source file:eu.annocultor.converters.geonames.GeonamesDumpToRdf.java

public static void main(String[] args) throws Exception {
    File root = new File("input_source");

    // load country-continent match
    countryToContinent//from w w  w  . j ava 2 s  . com
            .load((new GeonamesDumpToRdf()).getClass().getResourceAsStream("/country-to-continent.properties"));

    // creating files
    Map<String, BufferedWriter> files = new HashMap<String, BufferedWriter>();
    Map<String, Boolean> started = new HashMap<String, Boolean>();

    for (Object string : countryToContinent.keySet()) {
        String continent = countryToContinent.getProperty(string.toString());
        File dir = new File(root, continent);
        if (!dir.exists()) {
            dir.mkdir();
        }
        files.put(string.toString(), new BufferedWriter(new OutputStreamWriter(
                new FileOutputStream(new File(root, continent + "/" + string + ".rdf")), "UTF-8")));
        System.out.println(continent + "/" + string + ".rdf");
        started.put(string.toString(), false);
    }

    System.out.println(started);

    Pattern countryPattern = Pattern
            .compile("<inCountry rdf\\:resource\\=\"http\\://www\\.geonames\\.org/countries/\\#(\\w\\w)\"/>");
    long counter = 0;
    LineIterator it = FileUtils.lineIterator(new File(root, "all-geonames-rdf.txt"), "UTF-8");
    try {
        while (it.hasNext()) {
            String text = it.nextLine();
            if (text.startsWith("http://sws.geonames"))
                continue;

            // progress
            counter++;
            if (counter % 100000 == 0) {
                System.out.print("*");
            }
            //         System.out.println(counter);
            // get country
            String country = null;
            Matcher matcher = countryPattern.matcher(text);
            if (matcher.find()) {
                country = matcher.group(1);
            }
            //         System.out.println(country);
            if (country == null)
                country = "null";
            text = text.replace("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?><rdf:RDF",
                    "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?><rdf:RDF");
            if (started.get(country) == null)
                throw new Exception("Unknow country " + country);
            if (started.get(country).booleanValue()) {
                // remove RDF opening
                text = text.substring(text.indexOf("<rdf:RDF "));
                text = text.substring(text.indexOf(">") + 1);
            }
            // remove RDF ending
            text = text.substring(0, text.indexOf("</rdf:RDF>"));
            files.get(country).append(text + "\n");
            if (!started.get(country).booleanValue()) {
                // System.out.println("Started with country " + country);
            }
            started.put(country, true);
        }
    } finally {
        LineIterator.closeQuietly(it);
    }

    for (Object string : countryToContinent.keySet()) {
        boolean hasStarted = started.get(string.toString()).booleanValue();
        if (hasStarted) {
            BufferedWriter bf = files.get(string.toString());
            bf.append("</rdf:RDF>");
            bf.flush();
            bf.close();
        }
    }
    return;
}

From source file:com.cirro.jsonjoin.utils.FileManager.java

public static <T extends Row> List<T> loadFile(File file, Class<T> valueType) throws IOException {
    List rowList = new ArrayList();
    LineIterator it = FileUtils.lineIterator(file, "UTF-8");
    while (it.hasNext()) {
        String line = it.nextLine();
        Row row = convertToRow(line, valueType);
        rowList.add(row);//from   ww w. j  a  v  a  2  s.c  o m
    }
    return rowList;
}

From source file:com.cirro.jsonjoin.utils.FileManager.java

public static <T extends Row> Stream<T> loadFileStream(File file, Class<T> valueType) throws IOException {
    List rowList = new ArrayList();
    LineIterator it = FileUtils.lineIterator(file, "UTF-8");
    while (it.hasNext()) {
        String line = it.nextLine();
        Row row = convertToRow(line, valueType);
        rowList.add(row);// w w w .  j ava  2 s  . c  o  m
    }
    return rowList.stream();
}

From source file:net.orzo.data.PositionAwareLineIterator.java

/**
 *
 * @param path//from  w w  w .j  av a  2 s . c  o  m
 * @param encoding
 */
public static PositionAwareLineIterator create(String path, String encoding) throws IOException {
    return new PositionAwareLineIterator(FileUtils.lineIterator(new File(path), encoding));
}

From source file:net.orzo.data.PositionAwareLineIterator.java

/**
 *
 * @param encoding/*  w  ww  .  j a  v  a 2  s .co  m*/
 * @return
 * @throws IOException
 */
public static PositionAwareLineIterator create(File file, String encoding) throws IOException {
    return new PositionAwareLineIterator(FileUtils.lineIterator(file, encoding));
}

From source file:net.orzo.data.PositionAwareLineIterator.java

/**
 *
 *//*from w  ww  .j  a va  2  s.  c om*/
public static PositionAwareLineIterator create(File file) throws IOException {
    return new PositionAwareLineIterator(FileUtils.lineIterator(file, "UTF-8"));
}

From source file:com.comcast.cats.monitor.util.FileSearchUtil.java

public static Integer countHitsByRegex(String filePath, String expression) throws IOException {
    int hits = 0;

    if ((null == expression) || (expression.isEmpty()) || (null == filePath) || (filePath.isEmpty())) {
        throw new IllegalArgumentException("Expression/FilePath is NULL of EMPTY !!!");
    } else {/*from  w ww . j  a  va 2s.  c  om*/
        File file = new File(filePath);
        Pattern pattern = Pattern.compile(expression);

        if (file.exists()) {
            LineIterator lineIterator = FileUtils.lineIterator(file, UTF_8_ENCODING);

            LOGGER.info("Expression under search = " + expression);

            try {
                while (lineIterator.hasNext()) {
                    String line = lineIterator.nextLine();

                    if (pattern.matcher(line).find()) {
                        hits++;
                    }
                }
            } finally {
                LineIterator.closeQuietly(lineIterator);
            }
        }
    }

    return hits;
}

From source file:net.orzo.data.PositionAwareLineIterator.java

/**
 *
 *///from   w  w w .ja v  a2s .  com
public static PositionAwareLineIterator create(String path) throws IOException {
    return new PositionAwareLineIterator(FileUtils.lineIterator(new File(path), "UTF-8"));
}