List of usage examples for org.apache.commons.io FileUtils lineIterator
public static LineIterator lineIterator(File file, String encoding) throws IOException
File
. From source file:eu.annocultor.converters.geonames.GeonamesCsvToRdf.java
void collectParents() throws Exception { System.out.println("Loading parents"); LineIterator it = FileUtils.lineIterator(new File(root, "hierarchy.txt"), "UTF-8"); try {//from w w w . j a va2 s. c o m while (it.hasNext()) { String text = it.nextLine(); String[] fields = text.split("\t"); String parent = NS_GEONAMES_INSTANCES + fields[0] + "/"; String child = NS_GEONAMES_INSTANCES + fields[1] + "/"; broader.put(child, parent); } } finally { LineIterator.closeQuietly(it); } }
From source file:eu.annocultor.converters.geonames.GeonamesCsvToRdf.java
void features() throws Exception { System.out.println("Parsing features"); // load country-continent match countryToContinent.load(/*from w ww. j a v a 2s . com*/ (new GeonamesCsvToRdf("EU")).getClass().getResourceAsStream("/country-to-continent.properties")); createDirsForContinents(); long counter = 0; LineIterator it = FileUtils.lineIterator(new File(root, "allCountries.txt"), "UTF-8"); try { while (it.hasNext()) { String text = it.nextLine(); String[] fields = text.split("\t"); if (fields.length != 19) { throw new Exception("Field names mismatch on " + text); } // progress counter++; if (counter % 100000 == 0) { System.out.print("*"); } String country = fields[countryCode]; String continent = countryToContinent.getProperty(country); if (continent != null && continent.startsWith(continentToConvert)) { String id = fields[geonameid]; String uri = NS_GEONAMES_INSTANCES + id + "/"; String featureCodeField = fields[featureClass] + "." + fields[featureCode]; String populationValue = fields[population]; if (includeRecordInConversion(featureCodeField, populationValue)) { boolean isDescriptionOfCountry = featureCodeField.startsWith("A.PCLI"); if (!fields[name].isEmpty()) { write(country, new Triple(uri, SKOS.LABEL_PREFERRED, new LiteralValue(fields[name]), null), isDescriptionOfCountry); } // String altLabels[] = fields[alternatenames].split(","); // for (String altLabel : altLabels) { // write(country, new Triple(uri, SKOS.LABEL_ALT, new LiteralValue(altLabel), null)); // } Collection<LiteralValue> altLabelCollection = altLabels.getCollection(id); if (altLabelCollection != null) { for (LiteralValue xmlValue : altLabelCollection) { write(country, new Triple(uri, SKOS.LABEL_ALT, xmlValue, null), isDescriptionOfCountry); } altLabels.remove(id); } Collection<String> linkCollection = links.getCollection(id); if (linkCollection != null) { for (String link : linkCollection) { // write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "link"), new LiteralValue(link), null)); } linkCollection.remove(fields[geonameid]); } if (fields[population].length() > 1) { write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "population"), new LiteralValue(fields[population]), null), isDescriptionOfCountry); } if (!fields[longitude].isEmpty()) { write(country, new Triple(uri, new Property(NS_WGS_SCHEMA + "long"), new LiteralValue(fields[longitude]), null), isDescriptionOfCountry); } if (!fields[latitude].isEmpty()) { write(country, new Triple(uri, new Property(NS_WGS_SCHEMA + "lat"), new LiteralValue(fields[latitude]), null), isDescriptionOfCountry); } if (!featureCodeField.isEmpty()) { write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "division"), new ResourceValue(NS_GEONAMES_ONTOLOGY + featureCodeField), null), isDescriptionOfCountry); } if (!country.isEmpty()) { write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "country"), new LiteralValue(country), null), isDescriptionOfCountry); } // alt label as country code if (featureCodeField.startsWith("A.PCL")) { write(country, new Triple(uri, SKOS.LABEL_ALT, new LiteralValue(country), null), isDescriptionOfCountry); } for (String broaderUri : allParents(uri, country)) { write(country, new Triple(uri, Concepts.DCTEMRS.IS_PART_OF, new ResourceValue(broaderUri), null), isDescriptionOfCountry); } // if (!fields[admin1code].isEmpty()) { // write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "admin1"), new LiteralValue(fields[admin1code]), null), isDescriptionOfCountry); // } // if (!fields[admin2code].isEmpty()) { // write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "admin2"), new LiteralValue(fields[admin2code]), null), isDescriptionOfCountry); // } // if (!fields[admin3code].isEmpty()) { // write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "admin3"), new LiteralValue(fields[admin3code]), null), isDescriptionOfCountry); // } // if (!fields[admin4code].isEmpty()) { // write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "admin4"), new LiteralValue(fields[admin4code]), null), isDescriptionOfCountry); // } } } } } finally { LineIterator.closeQuietly(it); } System.out.println("Finished conversion, flushing and closing output files"); System.out.flush(); for (Object country : countryToContinent.keySet()) { SesameWriter bf = files.get(country.toString()); if (bf != null) { bf.endRDF(); } } if (allCountries != null) { allCountries.endRDF(); } }
From source file:com.rodaxsoft.mailgun.message.tools.MailgunSender.java
/** * Sends message to the recipients specified by the <code>-R</code> option. * @param cmd Command line arguments/*from w w w . ja v a 2 s . c o m*/ * @param text Plain text email content * @param html HTML email content * @throws ContextedRuntimeException if the recipients option or -R is omitted. */ private static void sendMessageToRecipientsInFile(CommandLine cmd, String text, String html) { if (cmd.hasOption(RECIPIENTS_FILE_OPT)) { LineIterator it = null; try { it = FileUtils.lineIterator(new File(cmd.getOptionValue(RECIPIENTS_FILE_OPT)), "UTF-8"); while (it.hasNext()) { final String to = it.nextLine(); //Build the email request object final EmailRequest er = makeEmailRequest(cmd, text, html, to); LOG.trace(er); sendMessage(cmd, er); } } catch (IOException e) { LOG.error("Error occurre while sending from recipients file", e); } finally { LineIterator.closeQuietly(it); } } else { final String msg = "Option must be a recipients file"; handleOmittedOptionError(cmd, msg); } }
From source file:egovframework.rte.fdl.filehandling.FilehandlingServiceTest.java
/** * @throws Exception//from w w w.j a va2s . com */ @Test public void testLineIterator() throws Exception { String[] string = { "<project xmlns=\"http://maven.apache.org/POM/4.0.0\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"", " xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd\">", " <parent>", " <groupId>egovframework.rte</groupId>", " <artifactId>egovframework.rte.root</artifactId>", " <version>1.0.0-SNAPSHOT</version>", " </parent>", " <modelVersion>4.0.0</modelVersion>", " <groupId>egovframework.rte</groupId>", " <artifactId>egovframework.rte.fdl.filehandling</artifactId>", " <packaging>jar</packaging>", " <version>1.0.0-SNAPSHOT</version>", " <name>egovframework.rte.fdl.filehandling</name>", " <url>http://maven.apache.org</url>", " <dependencies>", " <dependency>", " <groupId>junit</groupId>", " <artifactId>junit</artifactId>", " <version>4.4</version>", " <scope>test</scope>", " </dependency>", " <dependency>", " <groupId>commons-vfs</groupId>", " <artifactId>commons-vfs</artifactId>", " <version>1.0</version>", " </dependency>", " <dependency>", " <groupId>commons-io</groupId>", " <artifactId>commons-io</artifactId>", " <version>1.4</version>", " </dependency>", " <!-- egovframework.rte -->", " <dependency>", " <groupId>egovframework.rte</groupId>", " <artifactId>egovframework.rte.fdl.string</artifactId>", " <version>1.0.0-SNAPSHOT</version>", " </dependency>", " </dependencies>", "</project>" }; try { File file = new File("pom.xml"); LineIterator it = FileUtils.lineIterator(file, "UTF-8"); try { log.debug("############################# LineIterator ###############################"); for (int i = 0; it.hasNext(); i++) { String line = it.nextLine(); log.info(line); assertEquals(string[i], line); } } finally { LineIterator.closeQuietly(it); } } catch (Exception e) { log.error(e.getCause()); } }
From source file:com.seniorproject.semanticweb.services.WebServices.java
public ArrayList<String> replaceWithPrefix(String filepath) throws IOException { File file = new File(filepath); LineIterator it = FileUtils.lineIterator(file, "UTF-8"); ArrayList<String> results = new ArrayList<>(); try {/* w w w . ja v a2 s .c om*/ while (it.hasNext()) { String content = it.nextLine(); content = content.replace("^^<http://www.w3.org/2001/XMLSchema#int>", ""); content = content.replace("<http://xmlns.com/foaf/0.1/page>\n", ""); content = content.replace("<http://www.w3.org/2002/07/owl#sameAs>\n", ""); content = content.replace("<http://www.w3.org/2000/01/rdf-schema#label>\n", ""); content = content.replace("<http://dbpedia.org/property/hasPhotoCollection>\n", ""); content = content.replace("<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>\n", ""); content = content.replace("<http://www.w3.org/2002/07/owl#", "owl:"); content = content.replace("<http://www.w3.org/2001/XMLSchema#", "xsd:"); content = content.replace("<http://www.w3.org/2000/01/rdf-schema#", "rdfs:"); content = content.replace("<http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdf:"); content = content.replace("<http://xmlns.com/foaf/0.1/", "foaf:"); content = content.replace("<http://data.linkedmdb.org/resource/oddlinker/", "oddlinker:"); content = content.replace("<file:/C:/d2r-server-0.4/mapping.n3#", "map:"); content = content.replace("<http://data.linkedmdb.org/resource/movie/", "movie:"); content = content.replace("<http://data.linkedmdb.org/resource/", "db:"); content = content.replace("<http://dbpedia.org/property/", "dbpedia:"); content = content.replace("<http://www.w3.org/2004/02/skos/core#", "skos:"); content = content.replace("<http://purl.org/dc/terms/", "dc:"); content = content.replace(">", ""); content = content.replace("<", ""); results.add(content); } } finally { LineIterator.closeQuietly(it); } return results; }
From source file:com.ipcglobal.fredimport.process.ProcessReadmeSeriesId.java
/** * Read readme series id./*from w w w . j a va 2 s. c om*/ * * @return the list * @throws Exception the exception */ public List<SeriesIdItem> readReadmeSeriesId() throws Exception { List<SeriesIdItem> seriesIdItems = new ArrayList<SeriesIdItem>(); boolean isHeaderRows = true; boolean isFooterRows = false; LineIterator it = FileUtils.lineIterator(new File(inputPathFredData + "README_SERIES_ID_SORT.txt"), "UTF-8"); int numLines = 0; try { while (it.hasNext()) { String line = it.nextLine(); numLines++; if (isHeaderRows) { if (line.startsWith("File")) isHeaderRows = false; } else if (isFooterRows) { } else { if (line.length() == 0) { isFooterRows = true; continue; } // Data row // File;Title; Units; Frequency; Seasonal Adjustment; Last Updated // Bypass all (DISCONTINUED SERIES) rows; if (line.indexOf("(DISCONTINUED SERIES)") > -1 || line.indexOf("(DISCONTINUED)") > -1 || line.indexOf("(Discontinued Series)") > -1) continue; String[] fields = splitFields(line); seriesIdItems.add(new SeriesIdItem().setCsvFileName(fields[0]) .setTitle(fields[1].replace("", "")).setUnits(fields[2]).setFrequency(fields[3]) .setSeasonalAdj(fields[4]).setLastUpdated(fields[5])); } if ((numLines % 25000) == 0) log.info("readReadmeSeriesId: read lines: " + numLines); } } finally { LineIterator.closeQuietly(it); } return seriesIdItems; }
From source file:gov.nih.nci.ncicb.tcga.dcc.qclive.loader.levelthree.LevelThreeLoader.java
/** * Loads BROAD SNP6, MSKCC and HUDSONALPHA center files. * /* w ww.ja v a2 s .c o m*/ * @param fileName * the file to load * @param extractNameIndex * index of "EXTRACT_NAME" column in SDRF * @param sdrfNavigator * a data structure containing SDRF * @param dataSetId * id of the data set associated with the load * @param platform * platform associated with the load * @param center * center associated with the load * @throws LoaderException * if CNA load resulted in an error */ public void loadCnaValues(File segmentFile, Integer extractNameIndex, TabDelimitedContentNavigator sdrfNavigator, Integer dataSetId, String platform, String center) throws LoaderException { logger.debug(String.format(fileToDbLoadInfoPlaceholder, segmentFile, "cna_value")); SegmentRecord segmentRecord = getSegmentRecordForCenter(center); String prevHybridizationRefName = ""; Integer recordNumber = 0; List<CnaValue> cnaValues = new ArrayList<CnaValue>(); Map<Integer, String> controlSampleRecords = new HashMap<Integer, String>(); LineIterator segmentLineIterator = null; try { segmentLineIterator = FileUtils.lineIterator(segmentFile, CharEncoding.UTF_8); String segmentFileRecord = segmentLineIterator.nextLine(); String[] recordValues = segmentFileRecord.split(SEGMENT_RECORD_DELIMITER); segmentRecord.setRecordValues(Arrays.asList(recordValues)); segmentRecord.assertRecord(true); ++recordNumber; while (segmentLineIterator.hasNext()) { segmentFileRecord = segmentLineIterator.nextLine(); recordValues = segmentFileRecord.split(SEGMENT_RECORD_DELIMITER); segmentRecord.setRecordValues(Arrays.asList(recordValues)); segmentRecord.setRecordNumber(recordNumber); segmentRecord.assertRecord(false); CnaValue cnaValue = segmentRecord.getCnaValue(); cnaValue.setDataSetId(dataSetId); resolveHybridizationRefId(cnaValue, prevHybridizationRefName, sdrfNavigator, extractNameIndex, dataSetId); validate(cnaValue, recordNumber); if (cnaValue.getHybridizationRefId() != null) { cnaValues.add(cnaValue.copy()); } else { controlSampleRecords.put(recordNumber, cnaValue.getHybridizationRefName()); } if (cnaValues.size() == getBatchSize()) { if (logger.isDebugEnabled()) { printCnaLoadBatchInfo(controlSampleRecords); } persistCnaValues(cnaValues); cnaValues.clear(); } prevHybridizationRefName = cnaValue.getHybridizationRefName(); ++recordNumber; } if (cnaValues.size() > 0) { if (logger.isDebugEnabled()) { printCnaLoadBatchInfo(controlSampleRecords); } persistCnaValues(cnaValues); cnaValues.clear(); } } catch (IOException ioe) { throw new LoaderException(ioe.getMessage(), ioe); } finally { LineIterator.closeQuietly(segmentLineIterator); } }
From source file:net.stargraph.core.DocumentIterator.java
public DocumentIterator(Stargraph core, KBId kbId) { this.core = Objects.requireNonNull(core); this.kbId = Objects.requireNonNull(kbId); this.mapper = ObjectSerializer.createMapper(kbId); Path filePath = getFilePath(kbId.getId()); File file = filePath.toFile(); try {// ww w .j ava 2s .c o m this.lineIt = FileUtils.lineIterator(file, "UTF-8"); parseNext(); } catch (IOException e) { logger.error(marker, "Failed to load documents from file {}.", file); throw new StarGraphException(e); } }
From source file:net.tachtler.browscap4j.Browscap4jFileReader.java
/** * Initialize the Browscap4jDataBean.//from ww w. j a v a 2 s.c o m * * @param csvFile * @return Browscap4jDataBean * @throws IllegalStateException * @throws FileNotFoundException * @throws IOException */ public static Browscap4jDataBean initBrowscap4jData(File csvFile) throws IllegalStateException, FileNotFoundException, IOException { log.debug("*csvFile : " + csvFile); /** * Browscap4jDataBean with the LinkedHashMap<Pattern, Browscap4jPositionBean> * browscap4jMap and the String browscap4jString as data. */ Browscap4jDataBean browscap4jDataBean = new Browscap4jDataBean(); /** * LinkedHashMap with regular expression pattern as key from the original * userAgentString and Browscap4jPositionBean integer variables as value object * by determining the offset and the length for each line. */ LinkedHashMap<Pattern, Browscap4jPositionBean> browscap4jMap = new LinkedHashMap<Pattern, Browscap4jPositionBean>(); /** * Generate browscap4jString with all fields filled up with data from * browscap.csv to a single String. */ String browscap4jString = null; /** * StringBuilder for fast concatenation. */ StringBuilder stringBuilder = new StringBuilder(); /* * Check if csvFile is null. */ if (null == csvFile) { throw new IllegalStateException("Argument csvFile is null (NOT set)."); } /* * Iterate over the csvFile - browscap.csv with all the data and generate a * string with all the lines concatenated. Generate a regular expression pattern * from the first column of the csvFile - browscap.csv as key and calculate the * offset and the length for every single line inside the concatenated string as * Browscap4jPositionBean as value. */ LineIterator lineIterator = FileUtils.lineIterator(csvFile, "UTF-8"); try { int offset = 0; String[] col = null; while (lineIterator.hasNext()) { String line = lineIterator.nextLine(); Browscap4jPositionBean browscap4jPositionBean = new Browscap4jPositionBean(offset, offset + line.length()); offset += line.length(); col = line.split("\",\""); browscap4jMap.put(Pattern.compile(convertToRegex(col[0].substring(1))), browscap4jPositionBean); stringBuilder.append(line); } } finally { LineIterator.closeQuietly(lineIterator); } /* * Generate the String browscap4jString from StringBuilder stringBuilder. */ browscap4jString = stringBuilder.toString(); /* * Debug NOT the Map and the String, because of too much entry's, only the class * should be printed out in debug mode, to see if the browscap4jMap and the * browscap4jString are NOT null. */ log.debug("*browscap4jMap : " + browscap4jMap.getClass()); log.debug("*browscap4jString : " + browscap4jString.getClass()); browscap4jDataBean.setBrowscap4jMap(browscap4jMap); browscap4jDataBean.setBrowscap4jString(browscap4jString); return browscap4jDataBean; }
From source file:nl.knaw.huygens.timbuctoo.tools.importer.neww.WomenWritersImporter.java
private LineIterator getLineIterator(String filename) throws IOException { File file = new File(inputDir, filename); return FileUtils.lineIterator(file, "UTF-8"); }