Example usage for org.apache.commons.io LineIterator LineIterator

List of usage examples for org.apache.commons.io LineIterator LineIterator

Introduction

In this page you can find the example usage for org.apache.commons.io LineIterator LineIterator.

Prototype

public LineIterator(final Reader reader) throws IllegalArgumentException 

Source Link

Document

Constructs an iterator of the lines for a Reader.

Usage

From source file:de.tudarmstadt.lt.seg.sentence.SentenceSplitterTest.java

@Test
public void ruleSplitterLineTest() {
    ISentenceSplitter sentenceSplitter = new RuleSplitter();
    ITokenizer tokenizer = new EmptySpaceTokenizer();
    StringWriter s = new StringWriter();
    PrintWriter w = new PrintWriter(s);

    LineIterator liter = new LineIterator(new BufferedReader(new StringReader(TEST_TEXT)));
    for (long lc = 0; liter.hasNext();) {
        if (++lc % 1000 == 0)
            System.err.format("Processing line %d %n", lc);

        Segmenter.split_and_tokenize(new StringReader(liter.next()), String.format("%s:%d", "TEST_TEXT", lc),
                sentenceSplitter, tokenizer, 2, 0, false, false, "\n", "\n", "\n", w);
    }//from   w w  w .  j a  va2 s .c o  m
    System.out.println(s.toString());
}

From source file:net.mikaboshi.intra_mart.tools.log_stats.parser.LogFileReader.java

private void openLogFile()
        throws IllegalArgumentException, UnsupportedEncodingException, FileNotFoundException {

    File logFile = this.logFiles.get(this.iLogFiles);
    this.logParser.setLogFile(logFile);
    this.lineIterator = new LineIterator(getFileReader(logFile));

    logger.info("Open log file : " + logFile.getPath());
    this.iLine = 0;
}

From source file:eu.eexcess.sourceselection.redde.indexer.topterm.TopTermToWNDomain.java

TreeNode<String> inflateDomainTree() throws FileNotFoundException {
    LineIterator iterator = new LineIterator(new FileReader(wordnetCSVTreeFile));
    String[] currentBranch = new String[5];
    currentBranch[0] = rootNodeName;/*  w  ww.  j  a va2  s  .  com*/

    while (iterator.hasNext()) {

        // read current node and store its parents
        String line = iterator.nextLine();
        String[] tokensInLine = line.split(tokenDelimiter);

        int depth = -1;
        for (int i = 0; i < tokensInLine.length; i++) {
            tokensInLine[i] = tokensInLine[i].trim();
            if (!tokensInLine[i].isEmpty()) {
                depth = i;
                currentBranch[1 + depth] = tokensInLine[i];
            }
        }
        // clear tail
        for (int tail = depth + 2; tail < currentBranch.length; tail++) {
            currentBranch[tail] = null;
        }

        // reconstruct and append the missing branch according to the
        // current tree
        ValueTreeNode<String> branch = null;
        for (int branchDepth = currentBranch.length; branchDepth > 0; branchDepth--) {
            String nodeName = currentBranch[branchDepth - 1];
            if (nodeName == null) {
                continue;
            }

            Set<TreeNode<String>> result = new HashSet<TreeNode<String>>();
            ValueTreeNode.findFirstNode(nodeName, wnDomainTree, result);
            TreeNode<String> nodeInTree = null;
            if (result.iterator().hasNext()) {
                nodeInTree = result.iterator().next();
            }

            // if node  tree -> add branch to tree
            if (nodeInTree != null) {
                if (branch != null) {
                    nodeInTree.addChild(branch);
                    branch = null;
                }
                break;
                // if node ! tree -> reconstruct the branch until the mount
                // point is clear
            } else {
                ValueTreeNode<String> newParent = new ValueTreeNode<String>();
                newParent.setName(nodeName);

                if (branch != null) {
                    newParent.addChild(branch);
                }
                branch = newParent;
            }
        }
    }
    iterator.close();
    return wnDomainTree;
}

From source file:de.tudarmstadt.lt.lm.app.GenerateNgrams.java

public static File generateNgrams(File src_dir, AbstractStringProvider prvdr, int from_cardinality,
        int to_cardinality, boolean overwrite) {
    final File ngram_file = new File(src_dir, String.format("%s.%s", src_dir.getName(), "ngrams.txt.gz"));
    int n_b = from_cardinality, n_e = to_cardinality;
    if (ngram_file.exists()) {
        LOG.info("Output file already exists: '{}'.", ngram_file.getAbsolutePath());
        if (overwrite) {
            ngram_file.delete();/*from ww w  .j a  va2  s  . co  m*/
            LOG.info("Overwriting file: '{}'.", ngram_file.getAbsolutePath());
        } else
            return ngram_file;
    }

    File[] src_files = src_dir.listFiles(new FileFilter() {
        @Override
        public boolean accept(File f) {
            return f.isFile() && f.getName().endsWith(".txt") && (!f.equals(ngram_file));
        }
    });

    String[] basenames = new String[src_files.length];
    for (int i = 0; i < basenames.length; i++)
        basenames[i] = src_files[i].getName();

    LOG.info(String.format("Reading txt files from dir: '%s'; Files: %s.", src_dir.getAbsolutePath(),
            StringUtils.abbreviate(Arrays.toString(basenames), 200)));
    LOG.info(String.format("Writing ngrams to file: '%s'.", ngram_file.getAbsolutePath()));

    PrintWriter pw = null;
    try {
        pw = new PrintWriter(
                new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(ngram_file)), "UTF-8"));
    } catch (IOException e) {
        LOG.error("Could not open writer for file: '{}'.", ngram_file.getAbsolutePath(), e);
        return null;
    }

    long num_ngrams = 0l;
    List<String>[] ngrams = null;
    for (int i = 0; i < src_files.length; i++) {
        File src_file = src_files[i];
        LOG.info("Processing file {} / {} ('{}')", i + 1, src_files.length, src_file.getAbsolutePath());
        long num_ngrams_f = 0l;

        try {
            LineIterator liter = new LineIterator(
                    new BufferedReader(new InputStreamReader(new FileInputStream(src_file), "UTF-8")));
            int lc = 0;
            while (liter.hasNext()) {
                if (++lc % 1000 == 0)
                    LOG.debug("Processing line {} ({})", lc, src_file);
                String line = liter.next();
                for (String sentence : prvdr.splitSentences(line)) {
                    for (int n = n_b; n <= n_e; n++) {
                        ngrams = null;
                        try {
                            List<String> tokens = prvdr.tokenizeSentence(sentence);
                            if (tokens.isEmpty())
                                continue;
                            ngrams = AbstractLanguageModel.getNgramSequence(tokens, n);
                        } catch (Exception e) {
                            LOG.warn(
                                    "Could not get ngram of cardinality {} from String '{}' in line '{}' from file '{}'.",
                                    n, StringUtils.abbreviate(line, 100), lc, src_file.getAbsolutePath());
                            continue;
                        }
                        for (List<String> ngram : ngrams)
                            pw.println(StringUtils.join(ngram, " "));
                        pw.flush();
                        num_ngrams_f += ngrams.length;
                    }
                }
            }
            liter.close();
        } catch (Exception e) {
            LOG.warn("Could not read file '{}'.", src_file.getAbsolutePath(), e);
        }
        LOG.debug("Generated {} ngrams from file {}.", num_ngrams_f, src_file);
        num_ngrams += num_ngrams_f;
    }

    if (pw != null)
        pw.close();

    LOG.info("Generated {} ngrams.", num_ngrams);

    return ngram_file;
}

From source file:au.org.ala.names.search.ALANameIndexer.java

public void init() throws Exception {

    tnse = new TaxonNameSoundEx();
    // init the known homonyms
    LineIterator lines = new LineIterator(
            new BufferedReader(new InputStreamReader(this.getClass().getClassLoader()
                    .getResource("au/org/ala/propertystore/known_homonyms.txt").openStream(), "ISO-8859-1")));
    LineIterator blines = new LineIterator(new BufferedReader(
            new InputStreamReader(this.getClass().getClassLoader().getResource("blacklist.txt").openStream())));
    try {/*www  .jav a 2  s . c  o m*/
        //load known homonyms
        while (lines.hasNext()) {
            String line = lines.nextLine().trim();
            knownHomonyms.add(line.toUpperCase());
        }
        //load the blacklist
        while (blines.hasNext()) {
            String line = blines.nextLine().trim();
            if (!line.startsWith("#") && StringUtils.isNotBlank(line))
                blacklist.add(line);
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        lines.close();
        blines.close();
    }
}

From source file:com.shopzilla.hadoop.testing.hdfs.DFSCluster.java

public void processData(final Path path, final Function<String, Void> lineProcessor) throws IOException {
    final Function<Path, Void> pathProcessor = new Function<Path, Void>() {
        @Override/*from   w  w  w.j  a  va  2s  .  co  m*/
        public Void apply(Path path) {
            try {
                final FSDataInputStream in = miniDFSCluster.getFileSystem().open(path);
                final LineIterator lineIterator = new LineIterator(new InputStreamReader(in));
                while (lineIterator.hasNext()) {
                    lineProcessor.apply(lineIterator.next());
                }
                lineIterator.close();
                return null;
            } catch (final Exception ex) {
                throw new RuntimeException(ex);
            }
        }
    };
    processPaths(path, new Function<Path, Void>() {
        @Override
        public Void apply(Path input) {
            pathProcessor.apply(input);
            return null;
        }
    });
}

From source file:de.tudarmstadt.lt.lm.app.FilterLines.java

void runSequential(Reader r) {

    long l = 0;/*from   w  w  w . jav  a  2 s  .  c  o  m*/
    for (LineIterator liter = new LineIterator(r); liter.hasNext();) {
        if (++l % 5000 == 0)
            LOG.info("processing line {}.", l);
        String line = liter.next();
        processLine(line);
    }

}

From source file:com.shopzilla.hadoop.mapreduce.MiniMRClusterContext.java

public void processData(final Path path, final Function<String, Void> lineProcessor) throws IOException {
    final Function<Path, Void> pathProcessor = new Function<Path, Void>() {
        @Override//  w  w  w  . j ava2s .  co m
        public Void apply(Path path) {
            try {
                FSDataInputStream in = miniDFSCluster.getFileSystem().open(path);
                LineIterator lineIterator = new LineIterator(new InputStreamReader(in));
                while (lineIterator.hasNext()) {
                    lineProcessor.apply(lineIterator.next());
                }
                lineIterator.close();
            } catch (Exception ex) {
                throw new RuntimeException(ex);
            }
            return null;
        }
    };
    processPaths(path, new Function<Path, Void>() {
        @Override
        public Void apply(Path input) {
            pathProcessor.apply(input);
            return null;
        }
    });
}

From source file:com.shopzilla.hadoop.testing.hdfs.DFSCluster.java

public void processDataRecursive(final Path path, final Function<String, Void> lineProcessor)
        throws IOException {
    final Function<Path, Void> pathProcessor = new Function<Path, Void>() {
        @Override//from w ww .j  a v a 2 s .  c o  m
        public Void apply(Path path) {
            try {
                final FSDataInputStream in = miniDFSCluster.getFileSystem().open(path);
                final LineIterator lineIterator = new LineIterator(new InputStreamReader(in));
                while (lineIterator.hasNext()) {
                    lineProcessor.apply(lineIterator.next());
                }
                lineIterator.close();
                return null;
            } catch (final Exception ex) {
                throw new RuntimeException(ex);
            }
        }
    };
    processPathsRecursive(path, new Function<Path, Void>() {
        @Override
        public Void apply(Path input) {
            pathProcessor.apply(input);
            return null;
        }
    });
}

From source file:csv.to.sql.parser.mainMenu.java

private void btnParseActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_btnParseActionPerformed
    // TODO add your handling code here:
    String filePath = this.selectedFile.getPath();
    filePath = filePath.replace(".csv", " ");
    File resultFile = new File(this.validFilePath(filePath + "csvTo.sql"));

    BufferedReader br = null;//from w  ww.j a  v a 2s.co m
    BufferedWriter bw = null;
    try {

        if (resultFile.createNewFile()) {

            String currLine = null;

            br = new BufferedReader(new FileReader(this.selectedFile));
            bw = new BufferedWriter(new FileWriter(resultFile));
            bw.write("INSERT INTO " + this.selectedFile.getName().replace(".csv", "") + "  "
                    + this.formatFields(br.readLine()).replace('"', '`') + " VALUES\n");
            LineIterator it = new LineIterator(br);
            boolean lineStatus = it.hasNext();
            while (lineStatus) {
                currLine = it.next();

                bw.write(this.formatFields(currLine) + ((lineStatus = it.hasNext()) ? ",\n" : ";"));
            }
            this.resultOk = true;
        }
    } catch (IOException ex) {
        this.resultOk = false;
        System.out.println("Error al crear el archivo: " + ex.getMessage());
    } finally {
        try {
            if (br != null & bw != null) {
                br.close();
                bw.close();
            }
        } catch (IOException ex) {
            Logger.getLogger(mainMenu.class.getName()).log(Level.SEVERE, null, ex);
        }

        JOptionPane.showMessageDialog(this, "Parse " + (this.resultOk ? "Successful!" : "Error!"));

        this.selectedFile = null;
        this.lblFile.setText("No File Selected!");
        this.btnOpenFile.setEnabled(true);
        this.btnParse.setEnabled(false);

    }

}