Example usage for org.apache.commons.io FileUtils lineIterator

List of usage examples for org.apache.commons.io FileUtils lineIterator

Introduction

In this page you can find the example usage for org.apache.commons.io FileUtils lineIterator.

Prototype

public static LineIterator lineIterator(File file, String encoding) throws IOException 

Source Link

Document

Returns an Iterator for the lines in a File.

Usage

From source file:eu.annocultor.converters.geonames.GeonamesCsvToRdf.java

void collectParents() throws Exception {
    System.out.println("Loading parents");
    LineIterator it = FileUtils.lineIterator(new File(root, "hierarchy.txt"), "UTF-8");
    try {//from   w w  w . j  a va2  s. c o m
        while (it.hasNext()) {
            String text = it.nextLine();

            String[] fields = text.split("\t");
            String parent = NS_GEONAMES_INSTANCES + fields[0] + "/";
            String child = NS_GEONAMES_INSTANCES + fields[1] + "/";
            broader.put(child, parent);
        }
    } finally {
        LineIterator.closeQuietly(it);
    }
}

From source file:eu.annocultor.converters.geonames.GeonamesCsvToRdf.java

void features() throws Exception {
    System.out.println("Parsing features");
    // load country-continent match
    countryToContinent.load(/*from  w  ww. j  a  v  a 2s .  com*/
            (new GeonamesCsvToRdf("EU")).getClass().getResourceAsStream("/country-to-continent.properties"));

    createDirsForContinents();

    long counter = 0;
    LineIterator it = FileUtils.lineIterator(new File(root, "allCountries.txt"), "UTF-8");
    try {
        while (it.hasNext()) {
            String text = it.nextLine();

            String[] fields = text.split("\t");
            if (fields.length != 19) {
                throw new Exception("Field names mismatch on " + text);
            }

            // progress
            counter++;
            if (counter % 100000 == 0) {
                System.out.print("*");
            }
            String country = fields[countryCode];
            String continent = countryToContinent.getProperty(country);
            if (continent != null && continent.startsWith(continentToConvert)) {

                String id = fields[geonameid];
                String uri = NS_GEONAMES_INSTANCES + id + "/";
                String featureCodeField = fields[featureClass] + "." + fields[featureCode];
                String populationValue = fields[population];
                if (includeRecordInConversion(featureCodeField, populationValue)) {

                    boolean isDescriptionOfCountry = featureCodeField.startsWith("A.PCLI");

                    if (!fields[name].isEmpty()) {
                        write(country,
                                new Triple(uri, SKOS.LABEL_PREFERRED, new LiteralValue(fields[name]), null),
                                isDescriptionOfCountry);
                    }
                    //            String altLabels[] = fields[alternatenames].split(",");
                    //            for (String altLabel : altLabels) {
                    //               write(country, new Triple(uri, SKOS.LABEL_ALT, new LiteralValue(altLabel), null));               
                    //            }
                    Collection<LiteralValue> altLabelCollection = altLabels.getCollection(id);
                    if (altLabelCollection != null) {
                        for (LiteralValue xmlValue : altLabelCollection) {
                            write(country, new Triple(uri, SKOS.LABEL_ALT, xmlValue, null),
                                    isDescriptionOfCountry);
                        }
                        altLabels.remove(id);
                    }
                    Collection<String> linkCollection = links.getCollection(id);
                    if (linkCollection != null) {
                        for (String link : linkCollection) {
                            // write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "link"), new LiteralValue(link), null));               
                        }
                        linkCollection.remove(fields[geonameid]);
                    }
                    if (fields[population].length() > 1) {
                        write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "population"),
                                new LiteralValue(fields[population]), null), isDescriptionOfCountry);
                    }
                    if (!fields[longitude].isEmpty()) {
                        write(country, new Triple(uri, new Property(NS_WGS_SCHEMA + "long"),
                                new LiteralValue(fields[longitude]), null), isDescriptionOfCountry);
                    }
                    if (!fields[latitude].isEmpty()) {
                        write(country, new Triple(uri, new Property(NS_WGS_SCHEMA + "lat"),
                                new LiteralValue(fields[latitude]), null), isDescriptionOfCountry);
                    }
                    if (!featureCodeField.isEmpty()) {
                        write(country,
                                new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "division"),
                                        new ResourceValue(NS_GEONAMES_ONTOLOGY + featureCodeField), null),
                                isDescriptionOfCountry);
                    }
                    if (!country.isEmpty()) {
                        write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "country"),
                                new LiteralValue(country), null), isDescriptionOfCountry);
                    }
                    // alt label as country code
                    if (featureCodeField.startsWith("A.PCL")) {
                        write(country, new Triple(uri, SKOS.LABEL_ALT, new LiteralValue(country), null),
                                isDescriptionOfCountry);

                    }

                    for (String broaderUri : allParents(uri, country)) {
                        write(country, new Triple(uri, Concepts.DCTEMRS.IS_PART_OF,
                                new ResourceValue(broaderUri), null), isDescriptionOfCountry);
                    }
                    //                        if (!fields[admin1code].isEmpty()) {
                    //                            write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "admin1"), new LiteralValue(fields[admin1code]), null), isDescriptionOfCountry);               
                    //                        }
                    //                        if (!fields[admin2code].isEmpty()) {
                    //                            write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "admin2"), new LiteralValue(fields[admin2code]), null), isDescriptionOfCountry);               
                    //                        }
                    //                        if (!fields[admin3code].isEmpty()) {
                    //                            write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "admin3"), new LiteralValue(fields[admin3code]), null), isDescriptionOfCountry);               
                    //                        }
                    //                        if (!fields[admin4code].isEmpty()) {
                    //                            write(country, new Triple(uri, new Property(NS_EUROPEANA_SCHEMA + "admin4"), new LiteralValue(fields[admin4code]), null), isDescriptionOfCountry);               
                    //                        }

                }
            }

        }
    } finally {
        LineIterator.closeQuietly(it);
    }
    System.out.println("Finished conversion, flushing and closing output files");
    System.out.flush();
    for (Object country : countryToContinent.keySet()) {
        SesameWriter bf = files.get(country.toString());
        if (bf != null) {
            bf.endRDF();
        }
    }
    if (allCountries != null) {
        allCountries.endRDF();
    }
}

From source file:com.rodaxsoft.mailgun.message.tools.MailgunSender.java

/**
 * Sends message to the recipients specified by the <code>-R</code> option.
 * @param cmd Command line arguments/*from   w  w  w  . ja v a 2 s  .  c o m*/
 * @param text Plain text email content
 * @param html HTML email content
 * @throws ContextedRuntimeException if the recipients option or -R is omitted.
 */
private static void sendMessageToRecipientsInFile(CommandLine cmd, String text, String html) {

    if (cmd.hasOption(RECIPIENTS_FILE_OPT)) {
        LineIterator it = null;
        try {
            it = FileUtils.lineIterator(new File(cmd.getOptionValue(RECIPIENTS_FILE_OPT)), "UTF-8");

            while (it.hasNext()) {
                final String to = it.nextLine();
                //Build the email request object
                final EmailRequest er = makeEmailRequest(cmd, text, html, to);
                LOG.trace(er);

                sendMessage(cmd, er);
            }

        } catch (IOException e) {
            LOG.error("Error occurre while sending from recipients file", e);

        } finally {
            LineIterator.closeQuietly(it);
        }
    } else {

        final String msg = "Option must be a recipients file";
        handleOmittedOptionError(cmd, msg);
    }
}

From source file:egovframework.rte.fdl.filehandling.FilehandlingServiceTest.java

/**
 * @throws Exception//from  w  w w.j a va2s . com
 */
@Test
public void testLineIterator() throws Exception {

    String[] string = {
            "<project xmlns=\"http://maven.apache.org/POM/4.0.0\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"",
            "  xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd\">",
            "  <parent>", "     <groupId>egovframework.rte</groupId>",
            "     <artifactId>egovframework.rte.root</artifactId>", "     <version>1.0.0-SNAPSHOT</version>",
            "  </parent>", "  <modelVersion>4.0.0</modelVersion>", "  <groupId>egovframework.rte</groupId>",
            "  <artifactId>egovframework.rte.fdl.filehandling</artifactId>", "  <packaging>jar</packaging>",
            "  <version>1.0.0-SNAPSHOT</version>", "  <name>egovframework.rte.fdl.filehandling</name>",
            "  <url>http://maven.apache.org</url>", "  <dependencies>", "    <dependency>",
            "      <groupId>junit</groupId>", "      <artifactId>junit</artifactId>",
            "      <version>4.4</version>", "      <scope>test</scope>", "    </dependency>",
            "    <dependency>", "      <groupId>commons-vfs</groupId>",
            "      <artifactId>commons-vfs</artifactId>", "      <version>1.0</version>", "    </dependency>",
            "    <dependency>", "      <groupId>commons-io</groupId>",
            "      <artifactId>commons-io</artifactId>", "      <version>1.4</version>", "    </dependency>",
            "    <!-- egovframework.rte -->", "    <dependency>", "      <groupId>egovframework.rte</groupId>",
            "      <artifactId>egovframework.rte.fdl.string</artifactId>",
            "      <version>1.0.0-SNAPSHOT</version>", "    </dependency>", "  </dependencies>", "</project>" };

    try {
        File file = new File("pom.xml");

        LineIterator it = FileUtils.lineIterator(file, "UTF-8");

        try {
            log.debug("############################# LineIterator ###############################");

            for (int i = 0; it.hasNext(); i++) {
                String line = it.nextLine();
                log.info(line);

                assertEquals(string[i], line);
            }
        } finally {
            LineIterator.closeQuietly(it);
        }

    } catch (Exception e) {
        log.error(e.getCause());
    }
}

From source file:com.seniorproject.semanticweb.services.WebServices.java

public ArrayList<String> replaceWithPrefix(String filepath) throws IOException {
    File file = new File(filepath);
    LineIterator it = FileUtils.lineIterator(file, "UTF-8");
    ArrayList<String> results = new ArrayList<>();
    try {/* w w w  .  ja  v a2  s  .c om*/
        while (it.hasNext()) {
            String content = it.nextLine();
            content = content.replace("^^<http://www.w3.org/2001/XMLSchema#int>", "");
            content = content.replace("<http://xmlns.com/foaf/0.1/page>\n", "");
            content = content.replace("<http://www.w3.org/2002/07/owl#sameAs>\n", "");
            content = content.replace("<http://www.w3.org/2000/01/rdf-schema#label>\n", "");
            content = content.replace("<http://dbpedia.org/property/hasPhotoCollection>\n", "");
            content = content.replace("<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>\n", "");
            content = content.replace("<http://www.w3.org/2002/07/owl#", "owl:");
            content = content.replace("<http://www.w3.org/2001/XMLSchema#", "xsd:");
            content = content.replace("<http://www.w3.org/2000/01/rdf-schema#", "rdfs:");
            content = content.replace("<http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdf:");
            content = content.replace("<http://xmlns.com/foaf/0.1/", "foaf:");
            content = content.replace("<http://data.linkedmdb.org/resource/oddlinker/", "oddlinker:");
            content = content.replace("<file:/C:/d2r-server-0.4/mapping.n3#", "map:");
            content = content.replace("<http://data.linkedmdb.org/resource/movie/", "movie:");
            content = content.replace("<http://data.linkedmdb.org/resource/", "db:");
            content = content.replace("<http://dbpedia.org/property/", "dbpedia:");
            content = content.replace("<http://www.w3.org/2004/02/skos/core#", "skos:");
            content = content.replace("<http://purl.org/dc/terms/", "dc:");
            content = content.replace(">", "");
            content = content.replace("<", "");
            results.add(content);
        }
    } finally {
        LineIterator.closeQuietly(it);
    }
    return results;
}

From source file:com.ipcglobal.fredimport.process.ProcessReadmeSeriesId.java

/**
 * Read readme series id./*from w  w w  . j a  va  2 s.  c  om*/
 *
 * @return the list
 * @throws Exception the exception
 */
public List<SeriesIdItem> readReadmeSeriesId() throws Exception {
    List<SeriesIdItem> seriesIdItems = new ArrayList<SeriesIdItem>();
    boolean isHeaderRows = true;
    boolean isFooterRows = false;
    LineIterator it = FileUtils.lineIterator(new File(inputPathFredData + "README_SERIES_ID_SORT.txt"),
            "UTF-8");
    int numLines = 0;
    try {
        while (it.hasNext()) {
            String line = it.nextLine();
            numLines++;
            if (isHeaderRows) {
                if (line.startsWith("File"))
                    isHeaderRows = false;
            } else if (isFooterRows) {

            } else {
                if (line.length() == 0) {
                    isFooterRows = true;
                    continue;
                }
                // Data row
                // File;Title; Units; Frequency; Seasonal Adjustment; Last Updated
                // Bypass all (DISCONTINUED SERIES) rows;
                if (line.indexOf("(DISCONTINUED SERIES)") > -1 || line.indexOf("(DISCONTINUED)") > -1
                        || line.indexOf("(Discontinued Series)") > -1)
                    continue;
                String[] fields = splitFields(line);

                seriesIdItems.add(new SeriesIdItem().setCsvFileName(fields[0])
                        .setTitle(fields[1].replace("", "")).setUnits(fields[2]).setFrequency(fields[3])
                        .setSeasonalAdj(fields[4]).setLastUpdated(fields[5]));
            }

            if ((numLines % 25000) == 0)
                log.info("readReadmeSeriesId: read lines: " + numLines);
        }
    } finally {
        LineIterator.closeQuietly(it);
    }
    return seriesIdItems;
}

From source file:gov.nih.nci.ncicb.tcga.dcc.qclive.loader.levelthree.LevelThreeLoader.java

/**
 * Loads BROAD SNP6, MSKCC and HUDSONALPHA center files.
 * /* w  ww.ja  v a2 s .c o  m*/
 * @param fileName
 *            the file to load
 * @param extractNameIndex
 *            index of "EXTRACT_NAME" column in SDRF
 * @param sdrfNavigator
 *            a data structure containing SDRF
 * @param dataSetId
 *            id of the data set associated with the load
 * @param platform
 *            platform associated with the load
 * @param center
 *            center associated with the load
 * @throws LoaderException
 *             if CNA load resulted in an error
 */
public void loadCnaValues(File segmentFile, Integer extractNameIndex,
        TabDelimitedContentNavigator sdrfNavigator, Integer dataSetId, String platform, String center)
        throws LoaderException {

    logger.debug(String.format(fileToDbLoadInfoPlaceholder, segmentFile, "cna_value"));

    SegmentRecord segmentRecord = getSegmentRecordForCenter(center);
    String prevHybridizationRefName = "";
    Integer recordNumber = 0;

    List<CnaValue> cnaValues = new ArrayList<CnaValue>();
    Map<Integer, String> controlSampleRecords = new HashMap<Integer, String>();
    LineIterator segmentLineIterator = null;
    try {
        segmentLineIterator = FileUtils.lineIterator(segmentFile, CharEncoding.UTF_8);
        String segmentFileRecord = segmentLineIterator.nextLine();
        String[] recordValues = segmentFileRecord.split(SEGMENT_RECORD_DELIMITER);
        segmentRecord.setRecordValues(Arrays.asList(recordValues));
        segmentRecord.assertRecord(true);
        ++recordNumber;

        while (segmentLineIterator.hasNext()) {
            segmentFileRecord = segmentLineIterator.nextLine();
            recordValues = segmentFileRecord.split(SEGMENT_RECORD_DELIMITER);
            segmentRecord.setRecordValues(Arrays.asList(recordValues));
            segmentRecord.setRecordNumber(recordNumber);
            segmentRecord.assertRecord(false);

            CnaValue cnaValue = segmentRecord.getCnaValue();
            cnaValue.setDataSetId(dataSetId);

            resolveHybridizationRefId(cnaValue, prevHybridizationRefName, sdrfNavigator, extractNameIndex,
                    dataSetId);

            validate(cnaValue, recordNumber);

            if (cnaValue.getHybridizationRefId() != null) {
                cnaValues.add(cnaValue.copy());
            } else {
                controlSampleRecords.put(recordNumber, cnaValue.getHybridizationRefName());
            }

            if (cnaValues.size() == getBatchSize()) {
                if (logger.isDebugEnabled()) {
                    printCnaLoadBatchInfo(controlSampleRecords);
                }

                persistCnaValues(cnaValues);
                cnaValues.clear();
            }

            prevHybridizationRefName = cnaValue.getHybridizationRefName();
            ++recordNumber;
        }

        if (cnaValues.size() > 0) {
            if (logger.isDebugEnabled()) {
                printCnaLoadBatchInfo(controlSampleRecords);
            }

            persistCnaValues(cnaValues);
            cnaValues.clear();
        }
    } catch (IOException ioe) {
        throw new LoaderException(ioe.getMessage(), ioe);
    } finally {
        LineIterator.closeQuietly(segmentLineIterator);
    }
}

From source file:net.stargraph.core.DocumentIterator.java

public DocumentIterator(Stargraph core, KBId kbId) {
    this.core = Objects.requireNonNull(core);
    this.kbId = Objects.requireNonNull(kbId);
    this.mapper = ObjectSerializer.createMapper(kbId);

    Path filePath = getFilePath(kbId.getId());
    File file = filePath.toFile();
    try {// ww w .j ava 2s  .c o  m
        this.lineIt = FileUtils.lineIterator(file, "UTF-8");
        parseNext();
    } catch (IOException e) {
        logger.error(marker, "Failed to load documents from file {}.", file);
        throw new StarGraphException(e);
    }
}

From source file:net.tachtler.browscap4j.Browscap4jFileReader.java

/**
 * Initialize the Browscap4jDataBean.//from ww w. j  a  v a  2 s.c  o  m
 * 
 * @param csvFile
 * @return Browscap4jDataBean
 * @throws IllegalStateException
 * @throws FileNotFoundException
 * @throws IOException
 */
public static Browscap4jDataBean initBrowscap4jData(File csvFile)
        throws IllegalStateException, FileNotFoundException, IOException {

    log.debug("*csvFile                                : " + csvFile);

    /**
     * Browscap4jDataBean with the LinkedHashMap<Pattern, Browscap4jPositionBean>
     * browscap4jMap and the String browscap4jString as data.
     */
    Browscap4jDataBean browscap4jDataBean = new Browscap4jDataBean();

    /**
     * LinkedHashMap with regular expression pattern as key from the original
     * userAgentString and Browscap4jPositionBean integer variables as value object
     * by determining the offset and the length for each line.
     */
    LinkedHashMap<Pattern, Browscap4jPositionBean> browscap4jMap = new LinkedHashMap<Pattern, Browscap4jPositionBean>();

    /**
     * Generate browscap4jString with all fields filled up with data from
     * browscap.csv to a single String.
     */
    String browscap4jString = null;

    /**
     * StringBuilder for fast concatenation.
     */
    StringBuilder stringBuilder = new StringBuilder();

    /*
     * Check if csvFile is null.
     */
    if (null == csvFile) {
        throw new IllegalStateException("Argument csvFile is null (NOT set).");
    }

    /*
     * Iterate over the csvFile - browscap.csv with all the data and generate a
     * string with all the lines concatenated. Generate a regular expression pattern
     * from the first column of the csvFile - browscap.csv as key and calculate the
     * offset and the length for every single line inside the concatenated string as
     * Browscap4jPositionBean as value.
     */
    LineIterator lineIterator = FileUtils.lineIterator(csvFile, "UTF-8");

    try {
        int offset = 0;
        String[] col = null;

        while (lineIterator.hasNext()) {
            String line = lineIterator.nextLine();

            Browscap4jPositionBean browscap4jPositionBean = new Browscap4jPositionBean(offset,
                    offset + line.length());

            offset += line.length();

            col = line.split("\",\"");

            browscap4jMap.put(Pattern.compile(convertToRegex(col[0].substring(1))), browscap4jPositionBean);

            stringBuilder.append(line);
        }
    } finally {
        LineIterator.closeQuietly(lineIterator);
    }

    /*
     * Generate the String browscap4jString from StringBuilder stringBuilder.
     */
    browscap4jString = stringBuilder.toString();

    /*
     * Debug NOT the Map and the String, because of too much entry's, only the class
     * should be printed out in debug mode, to see if the browscap4jMap and the
     * browscap4jString are NOT null.
     */
    log.debug("*browscap4jMap                          : " + browscap4jMap.getClass());
    log.debug("*browscap4jString                       : " + browscap4jString.getClass());

    browscap4jDataBean.setBrowscap4jMap(browscap4jMap);
    browscap4jDataBean.setBrowscap4jString(browscap4jString);

    return browscap4jDataBean;
}

From source file:nl.knaw.huygens.timbuctoo.tools.importer.neww.WomenWritersImporter.java

private LineIterator getLineIterator(String filename) throws IOException {
    File file = new File(inputDir, filename);
    return FileUtils.lineIterator(file, "UTF-8");
}