Example usage for org.apache.commons.io FileUtils readLines

Introduction

In this page you can find the example usage for org.apache.commons.io FileUtils readLines.

Prototype

public static List readLines(File file, String encoding) throws IOException

Source Link

Document

Reads the contents of a file line by line to a List of Strings.

Usage

From source file:de.unidue.ltl.evalita.feat.IsName.java

private void init() throws TextClassificationException {
    if (namelist != null) {
        return;//from   ww  w  .ja  v  a 2 s .c  o  m
    }
    namelist = new HashSet<String>();

    for (File file : folder.listFiles()) {
        if (file.isHidden()) {
            continue;
        }
        if (file.isDirectory()) {
            throw new TextClassificationException("Did not expect that namelists are stored in subfolders");
        }

        List<String> readLines = null;
        try {
            readLines = FileUtils.readLines(file, "utf-8");
        } catch (IOException e) {
            throw new TextClassificationException(e);
        }
        for (String l : readLines) {
            if (l.startsWith("#")) {
                continue;
            }
            if (lowerCase) {
                l = l.toLowerCase();
            }

            namelist.add(l);
        }
    }
}

From source file:com.github.cereda.arara.langchecker.LanguageUtils.java

/**
 * Reads a list of files and returns a list of reports of each language.
 * @param files A list of files.//from   w  w w  .ja  v  a 2s.  co m
 * @return A list of reports of each language.
 */
public static List<LanguageReport> readLanguages(List<File> files) {

    // check if the provided list is empty
    if (files.isEmpty()) {

        // print error message
        System.err.println(WordUtils.wrap("Fatal exception: I could not find any languages "
                + "in the provided directory. I am afraid I won't be "
                + "be able to continue. Please make sure the "
                + "provided directory contains at least one language "
                + "to be analyzed. The application will halt now.", 60));
    }

    // the resulting list
    List<LanguageReport> reports = new ArrayList<>();

    // read each file of the list and extract
    // each task found
    for (File file : files) {

        try {

            // read each file into a list
            // of strings
            List<String> lines = FileUtils.readLines(file, "UTF-8");

            // get the line analysis
            LanguageReport report = analyze(lines);

            // set the file reference
            report.setReference(file);

            // add to the list
            reports.add(report);

        } catch (IOException exception) {

            // print error message
            System.err.println(WordUtils.wrap("Fatal exception: an error was raised while "
                    + "trying to read one of the languages. Please "
                    + "make sure all languages in the provided " + "directory have read permission. I won't be "
                    + "able to continue. The application will halt " + "now.", 60));
            System.exit(1);
        }

    }

    // return the list of
    // analyzed languages
    return reports;

}

From source file:edu.ehu.galan.lite.algorithms.ranked.supervised.tfidf.corpus.wikipedia.WikiCorpusStatistics.java

private void extractDirectory2(File file) {

    File[] listFiles = file.listFiles();
    if (listFiles != null) {
        for (int i = 0; i < file.listFiles().length; i++) {
            File fil = listFiles[i];
            if (fil != null) {
                boolean start = true;
                String sb = new String();
                String line;/*w w w.j av a  2 s.  c  o m*/
                List<String> readLines = null;
                try {
                    readLines = FileUtils.readLines(fil, StandardCharsets.UTF_8.name());
                } catch (IOException ex) {
                    Logger.getLogger(WikiCorpusStatistics.class.getName()).log(Level.SEVERE, null, ex);
                }
                boolean star = false;

                for (int o = 0; o < readLines.size(); o++) {
                    line = readLines.get(o);
                    if (line.matches("<doc .*?>.*?")) {
                        String[] split = line.split("<.*>");
                        // Add the path of the file as a field named "path".  Use a
                        // field that is indexed (i.e. searchable), but don't tokenize 
                        // the field into separate words and don't index term frequency
                        // or positional information:
                        if (split.length == 2) {
                            sb = sb + split[1];
                        }

                    } else if (line.matches("</doc>")) {
                        numDocs++;
                        int l = sb.toString().split("\\s+").length;
                        numWords = l;
                        sb = new String();
                        System.out.println(l);
                    } else {
                        sb = sb + (line);
                    }
                }
            }
        }
    }
}

From source file:com.github.stagirs.lingvo.build.MorphStateMachineBuilder.java

private static Map<String, List<WordForm[]>> getRaw2WordForms() throws IOException {
    Map<String, List<WordForm[]>> result = new HashMap<String, List<WordForm[]>>();
    for (String line : FileUtils.readLines(new File("dict.opcorpora.plain"), "utf-8")) {
        if (line.isEmpty()) {
            continue;
        }// w  ww. j a  v a  2s . c o  m
        Lemma lemma = Lemma.parse(line);
        WordForm norm = lemma.getNorm();
        for (WordForm raw : lemma.getItems()) {
            add(result, raw, norm);
        }
    }
    return result;
}

From source file:ee.ria.xroad.common.util.SystemMetrics.java

/**
 * @return a snapshot of current network statistics
 *///from ww w. j av  a  2 s  .  co  m
public static NetStats getNetStats() {
    long bytesReceived = 0;
    long bytesTransmitted = 0;

    try {
        List<String> lines = FileUtils.readLines(new File(SystemProperties.getNetStatsFile()),
                StandardCharsets.UTF_8);

        for (String eachLine : lines) {
            String trimmedLine = eachLine.trim();
            Pattern pattern = Pattern.compile("^eth[01]:[\\s\\d]*$");
            Matcher matcher = pattern.matcher(trimmedLine);

            if (matcher.find()) {
                String[] parts = trimmedLine.split("\\s+");

                // Indices according to format of /proc/net/dev
                bytesReceived += Long.parseLong(parts[BYTES_RECEIVED_IDX]);
                bytesTransmitted += Long.parseLong(parts[BYTES_TRANSMITTED_IDX]);
            }
        }

        return new NetStats(bytesReceived, bytesTransmitted);
    } catch (IOException e) {
        log.error("Did not manage to collect network statistics", e);
        return null;
    }
}

From source file:jmupen.MyListSelectionListener.java

private void removeLines(int removeLine, File text) throws IOException {
    List<String> textLines = FileUtils.readLines(text, StandardCharsets.UTF_8);

    textLines.remove(removeLine);/* w  ww .  j  a v a  2  s  .c o  m*/

    StringBuilder builder = new StringBuilder();

    for (String line : textLines) {
        builder.append(line).append(System.lineSeparator());
    }

    FileUtils.writeStringToFile(text, builder.toString());

}

From source file:eu.crydee.alignment.aligner.cr.MetricsCR.java

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
    super.initialize(context);
    whitelistFile = new File(whitelistFilePath);
    List<String> errs = new ArrayList<>();
    if (!whitelistFile.isFile()) {
        errs.add("The run file doesn't resolve to a file.");
    } else if (!whitelistFile.canRead()) {
        errs.add("The run file can't be read.");
    }/*from  w  w w  .  j a  va 2s .co m*/
    if (!errs.isEmpty()) {
        logger.error(errs.stream().collect(Collectors.joining("\n")));
        throw new ResourceInitializationException();
    }

    try {
        Set<String> lines = new HashSet<>(FileUtils.readLines(whitelistFile, StandardCharsets.UTF_8));
        it = Sets.cartesianProduct(lines, lines).iterator();
    } catch (FileNotFoundException e) {
        logger.error("Couldn't find the run file.", e);
        throw new ResourceInitializationException(e);
    } catch (IOException e) {
        logger.error("Couldn't read the whitelist file.", e);
        throw new ResourceInitializationException(e);
    }
    currentIndex = 0;
}

From source file:com.legstar.cob2xsd.Cob2XsdIOTest.java

/**
 * Check that the XML Schema produced has the correct encoding from a file
 * standpoint.//from w w w  .  j av  a 2s  .c o  m
 */
@Test
public void testFileOutputEncoding() {
    try {
        configProps.put(Cob2XsdConfig.XSD_ENCODING, "UTF-8");
        configProps.put(Cob2XsdConfig.ADD_LEGSTAR_ANNOTATIONS, Boolean.toString(true));

        Cob2XsdIO cob2xsd = new Cob2XsdIO(new Cob2XsdConfig(configProps));
        File tempCobolFile = File.createTempFile("test", ".cob");
        tempCobolFile.deleteOnExit();

        FileUtils.write(tempCobolFile, "       01 A.\n           02 B PIC G(4) VALUE '?'.", "UTF8");
        File xmlSchema = cob2xsd.translate(tempCobolFile, "UTF8", tempDir, "http://www.mycompany.com/test",
                null);

        for (String line : FileUtils.readLines(xmlSchema, "UTF8")) {
            if (line.contains("cobolName=\"B\"")) {
                assertTrue(line.contains("value=\"?\""));
            }
        }
        xmlSchema.deleteOnExit();

    } catch (Exception e) {
        e.printStackTrace();
        fail();
    }

}

From source file:com.karumi.marvelapiclient.ApiClientTest.java

private String getContentFromFile(String fileName) throws IOException {
    fileName = getClass().getResource("/" + fileName).getFile();
    File file = new File(fileName);
    List<String> lines = FileUtils.readLines(file, FILE_ENCODING);
    StringBuilder stringBuilder = new StringBuilder();
    for (String line : lines) {
        stringBuilder.append(line);/* www  . j  a v  a2  s  . com*/
    }
    return stringBuilder.toString();
}

From source file:de.unidue.ltl.pos.trainmodel.tc.PostPosUpdateTask.java

@Override
public void execute(TaskContext aContext) throws Exception {

    File file = aContext.getFile(Constants.ID_OUTCOME_KEY, AccessMode.READONLY);
    List<String> readLines = FileUtils.readLines(file, "UTF-8");

    Map<String, String> id2label = getId2LabelMapping(readLines);
    Map<String, String> label2id = getLabel2IdMapping(readLines);

    Map<String, List<String>> groupByCas = sortByCas(readLines);
    Map<String, List<String>> normSeqGroupByCas = normalizeSequenceId(groupByCas);
    Map<String, List<String>> normTokGroupByCas = normalizeTokenId(normSeqGroupByCas);

    StringBuilder head = new StringBuilder();
    head.append(readLines.get(0));// www .  ja v a 2s  . c o m
    head.append("\n");
    head.append(readLines.get(1));

    StringBuilder body = new StringBuilder();

    AnalysisEngine postProcessingEngine = AnalysisEngineFactory.createEngine(PostprocessTagger.class,
            PostprocessTagger.PARAM_NAMELIST_FOLDER, namedEntitiyFolder,
            PostprocessTagger.PARAM_NAMELIST_LOWER_CASE, true);
    for (String key : normTokGroupByCas.keySet()) {

        File testBinCasFolder = aContext.getFolder(InitTask.OUTPUT_KEY_TEST, AccessMode.READONLY);
        CollectionReader binCasReader = CollectionReaderFactory.createReader(BinaryCasReader.class,
                BinaryCasReader.PARAM_SOURCE_LOCATION, testBinCasFolder, BinaryCasReader.PARAM_PATTERNS,
                key + "_0.bin");

        JCas jcas = JCasFactory.createJCas();
        binCasReader.getNext(jcas.getCas());

        p.configure(jcas.getCas());

        List<TextClassificationSequence> sequences = JCasUtil.selectCovering(jcas,
                TextClassificationSequence.class, 0, jcas.getDocumentText().length());

        List<String> list = normTokGroupByCas.get(key);

        for (String e : list) {
            int[] seqTokId = getSeqTokId(e);
            int seqid = seqTokId[0];
            int tokid = seqTokId[1];

            Pattern pattern = Pattern.compile("=([0-9]+);([0-9]+)");
            Matcher m = pattern.matcher(e);
            m.find();

            String prediction = id2label.get(m.group(1));

            TextClassificationSequence sequence = sequences.get(seqid);
            List<Token> tokens = JCasUtil.selectCovered(jcas, Token.class, sequence.getBegin(),
                    sequence.getEnd());

            Token token = tokens.get(tokid);

            Type posTag = p.getTagType(prediction);
            POS pos = (POS) jcas.getCas().createAnnotation(posTag, token.getBegin(), token.getEnd());
            pos.setPosValue(prediction);
            pos.addToIndexes();
            token.setPos(pos);

            // prediction = updatePrediction(prediction, pos.getPosValue());
            // evaluate(prediction, gold);
            //
            // sb.append(key + "_" + seq + "_" + tok + "=" +
            // label2id.get(prediction) + ";"
            // + label2id.get(gold) + "\n");
        }

        postProcessingEngine.process(jcas);
        for (String e : list) {
            int[] seqTokId = getSeqTokId(e);
            int seqid = seqTokId[0];
            int tokid = seqTokId[1];

            Pattern pattern = Pattern.compile("=([0-9]+);([0-9]+)");
            Matcher m = pattern.matcher(e);
            m.find();

            String gold = id2label.get(m.group(2));

            TextClassificationSequence sequence = sequences.get(seqid);
            List<Token> tokens = JCasUtil.selectCovered(jcas, Token.class, sequence.getBegin(),
                    sequence.getEnd());

            POS pos = tokens.get(tokid).getPos();
            evaluate(pos.getPosValue(), gold);

            String predictedLabel = label2id.get(pos.getPosValue());
            String goldLabel = label2id.get(gold);

            if (predictedLabel == null) {

                List<String> keySet = new ArrayList<String>(id2label.keySet());
                int max = -1;
                for (String id : keySet) {
                    int c = Integer.valueOf(id);
                    if (c > max) {
                        max = c;
                    }
                }
                max++;
                label2id.put(pos.getPosValue(), "" + max);
                id2label.put("" + max, pos.getPosValue());
                head.append(" " + max + "=" + pos.getPosValue());
                predictedLabel = label2id.get(pos.getPosValue());
            }

            body.append(key + "_" + seqid + "_" + tokid + "=" + predictedLabel + ";" + goldLabel + "\n");
        }

        // writeUpdatedJcas(aContext, jcas);
    }

    writeAccuracyFile(aContext);

    String out = head.toString() + "\n" + body.toString();
    writeUpdatedId2OutcomeReport(aContext, out);

}