List of usage examples for org.apache.commons.io FileUtils lineIterator
public static LineIterator lineIterator(File file, String encoding) throws IOException
File
. From source file:de.tum.i13.ConvertCsvToProtobuf.java
public static void main(String args[]) { try {/*from www. j a va 2 s . c o m*/ LineIterator it = FileUtils.lineIterator(new File("/Users/manit/Projects/sdcbenchmark/Dataset/debscsv"), "UTF-8"); FileOutputStream out = new FileOutputStream("/Users/manit/Projects/sdcbenchmark/Dataset/debsprotobuf", true); while (it.hasNext()) { String csvLine = (String) it.next(); byte[] csvLineBytes = csvLine.getBytes(); String line = new String(csvLineBytes, StandardCharsets.UTF_8); Debs2015Protos.Taxitrip.Builder builder = Debs2015Protos.Taxitrip.newBuilder(); String[] splitted = line.split(","); builder.setMedallion(splitted[0]); builder.setHackLicense(splitted[1]); builder.setPickupDatetime(splitted[2]); builder.setDropoffDatetime(splitted[3]); builder.setTripTimeInSecs(Integer.parseInt(splitted[4])); builder.setTripDistance(Float.parseFloat(splitted[5])); builder.setPickupLongitude(Float.parseFloat(splitted[6])); builder.setPickupLatitude(Float.parseFloat(splitted[7])); builder.setDropoffLongitude(Float.parseFloat(splitted[8])); builder.setDropoffLatitude(Float.parseFloat(splitted[9])); builder.setPaymentType(splitted[10]); builder.setFareAmount(Float.parseFloat(splitted[11])); builder.setSurcharge(Float.parseFloat(splitted[12])); builder.setMtaTax(Float.parseFloat(splitted[13])); builder.setTipAmount(Float.parseFloat(splitted[14])); builder.setTollsAmount(Float.parseFloat(splitted[15])); builder.setTotalAmount(Float.parseFloat(splitted[16])); builder.build().writeDelimitedTo(out); } out.close(); } catch (Exception e) { e.printStackTrace(); } }
From source file:de.tu.darmstadt.lt.ner.preprocessing.SentenceToCRFWriter.java
public static void main(String[] args) throws UIMAException, IllegalArgumentException, IOException { LineIterator sentIt = FileUtils.lineIterator(new File(args[0]), "UTF-8"); List<String> sentences = new ArrayList<String>(); StringBuilder sb = new StringBuilder(); int index = 0; while (sentIt.hasNext()) { String line = sentIt.nextLine().toString().trim().split("\t")[1]; if (line.equals("")) { continue; }// w ww . jav a2s . c o m sentences.add(line); } GermaNERMain.sentenceToCRFFormat(sentences, args[1], "de"); }
From source file:eu.annocultor.converters.geonames.GeonamesDumpToRdf.java
public static void main(String[] args) throws Exception { File root = new File("input_source"); // load country-continent match countryToContinent//from w w w . j ava 2 s . com .load((new GeonamesDumpToRdf()).getClass().getResourceAsStream("/country-to-continent.properties")); // creating files Map<String, BufferedWriter> files = new HashMap<String, BufferedWriter>(); Map<String, Boolean> started = new HashMap<String, Boolean>(); for (Object string : countryToContinent.keySet()) { String continent = countryToContinent.getProperty(string.toString()); File dir = new File(root, continent); if (!dir.exists()) { dir.mkdir(); } files.put(string.toString(), new BufferedWriter(new OutputStreamWriter( new FileOutputStream(new File(root, continent + "/" + string + ".rdf")), "UTF-8"))); System.out.println(continent + "/" + string + ".rdf"); started.put(string.toString(), false); } System.out.println(started); Pattern countryPattern = Pattern .compile("<inCountry rdf\\:resource\\=\"http\\://www\\.geonames\\.org/countries/\\#(\\w\\w)\"/>"); long counter = 0; LineIterator it = FileUtils.lineIterator(new File(root, "all-geonames-rdf.txt"), "UTF-8"); try { while (it.hasNext()) { String text = it.nextLine(); if (text.startsWith("http://sws.geonames")) continue; // progress counter++; if (counter % 100000 == 0) { System.out.print("*"); } // System.out.println(counter); // get country String country = null; Matcher matcher = countryPattern.matcher(text); if (matcher.find()) { country = matcher.group(1); } // System.out.println(country); if (country == null) country = "null"; text = text.replace("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?><rdf:RDF", "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?><rdf:RDF"); if (started.get(country) == null) throw new Exception("Unknow country " + country); if (started.get(country).booleanValue()) { // remove RDF opening text = text.substring(text.indexOf("<rdf:RDF ")); text = text.substring(text.indexOf(">") + 1); } // remove RDF ending text = text.substring(0, text.indexOf("</rdf:RDF>")); files.get(country).append(text + "\n"); if (!started.get(country).booleanValue()) { // System.out.println("Started with country " + country); } started.put(country, true); } } finally { LineIterator.closeQuietly(it); } for (Object string : countryToContinent.keySet()) { boolean hasStarted = started.get(string.toString()).booleanValue(); if (hasStarted) { BufferedWriter bf = files.get(string.toString()); bf.append("</rdf:RDF>"); bf.flush(); bf.close(); } } return; }
From source file:com.cirro.jsonjoin.utils.FileManager.java
public static <T extends Row> List<T> loadFile(File file, Class<T> valueType) throws IOException { List rowList = new ArrayList(); LineIterator it = FileUtils.lineIterator(file, "UTF-8"); while (it.hasNext()) { String line = it.nextLine(); Row row = convertToRow(line, valueType); rowList.add(row);//from ww w. j a v a 2 s.c o m } return rowList; }
From source file:com.cirro.jsonjoin.utils.FileManager.java
public static <T extends Row> Stream<T> loadFileStream(File file, Class<T> valueType) throws IOException { List rowList = new ArrayList(); LineIterator it = FileUtils.lineIterator(file, "UTF-8"); while (it.hasNext()) { String line = it.nextLine(); Row row = convertToRow(line, valueType); rowList.add(row);// w w w . j ava 2 s . c o m } return rowList.stream(); }
From source file:net.orzo.data.PositionAwareLineIterator.java
/** * * @param path//from w w w .j av a 2 s . c o m * @param encoding */ public static PositionAwareLineIterator create(String path, String encoding) throws IOException { return new PositionAwareLineIterator(FileUtils.lineIterator(new File(path), encoding)); }
From source file:net.orzo.data.PositionAwareLineIterator.java
/** * * @param encoding/* w ww . j a v a 2 s .co m*/ * @return * @throws IOException */ public static PositionAwareLineIterator create(File file, String encoding) throws IOException { return new PositionAwareLineIterator(FileUtils.lineIterator(file, encoding)); }
From source file:net.orzo.data.PositionAwareLineIterator.java
/** * *//*from w ww .j a va 2 s. c om*/ public static PositionAwareLineIterator create(File file) throws IOException { return new PositionAwareLineIterator(FileUtils.lineIterator(file, "UTF-8")); }
From source file:com.comcast.cats.monitor.util.FileSearchUtil.java
public static Integer countHitsByRegex(String filePath, String expression) throws IOException { int hits = 0; if ((null == expression) || (expression.isEmpty()) || (null == filePath) || (filePath.isEmpty())) { throw new IllegalArgumentException("Expression/FilePath is NULL of EMPTY !!!"); } else {/*from w ww . j a va 2s. c om*/ File file = new File(filePath); Pattern pattern = Pattern.compile(expression); if (file.exists()) { LineIterator lineIterator = FileUtils.lineIterator(file, UTF_8_ENCODING); LOGGER.info("Expression under search = " + expression); try { while (lineIterator.hasNext()) { String line = lineIterator.nextLine(); if (pattern.matcher(line).find()) { hits++; } } } finally { LineIterator.closeQuietly(lineIterator); } } } return hits; }
From source file:net.orzo.data.PositionAwareLineIterator.java
/** * *///from w w w .ja v a2s . com public static PositionAwareLineIterator create(String path) throws IOException { return new PositionAwareLineIterator(FileUtils.lineIterator(new File(path), "UTF-8")); }