Example usage for org.apache.commons.io FileUtils readLines

List of usage examples for org.apache.commons.io FileUtils readLines

Introduction

In this page you can find the example usage for org.apache.commons.io FileUtils readLines.

Prototype

public static List readLines(File file, String encoding) throws IOException 

Source Link

Document

Reads the contents of a file line by line to a List of Strings.

Usage

From source file:com.bright.utils.rmDuplicateLines.java

public static void main(String args) {
    File monfile = new File(args);

    Set<String> userIdSet = new LinkedHashSet<String>();

    if (monfile.isFile() && monfile.getName().endsWith(".txt")) {
        try {/*from w  w w  .ja v  a 2s. co  m*/
            List<String> content = FileUtils.readLines(monfile, Charset.forName("UTF-8"));
            userIdSet.addAll(content);

            Iterator<String> itr = userIdSet.iterator();
            StringBuffer output = new StringBuffer();
            while (itr.hasNext()) {

                output.append(itr.next() + System.getProperty("line.separator"));

            }

            BufferedWriter out = new BufferedWriter(new FileWriter(monfile));
            String outText = output.toString();

            out.write(outText);

            out.close();
        } catch (IOException e) {
            e.printStackTrace();

        }
    }

}

From source file:edu.cuhk.hccl.TripRealRatingsApp.java

public static void main(String[] args) throws IOException {
    File dir = new File(args[0]);
    File outFile = new File(args[1]);
    outFile.delete();//from   w  ww. jav a 2  s . c  o  m

    StringBuilder buffer = new StringBuilder();

    for (File file : dir.listFiles()) {
        List<String> lines = FileUtils.readLines(file, "UTF-8");
        String hotelID = file.getName().split("_")[1];
        String author = null;
        boolean noContent = false;
        for (String line : lines) {
            if (line.startsWith("<Author>")) {
                try {
                    author = line.split(">")[1].trim();
                } catch (ArrayIndexOutOfBoundsException e) {
                    System.out.println("[ERROR] An error occured on this line:");
                    System.out.println(line);
                    continue;
                }
            } else if (line.startsWith("<Content>")) { // ignore records if they have no content
                String content = line.split(">")[1].trim();
                if (content == null || content.equals(""))
                    noContent = true;
            } else if (line.startsWith("<Rating>")) {
                String[] rates = line.split(">")[1].trim().split("\t");

                if (noContent || rates.length != 8)
                    continue;

                // Change missing rating from -1 to 0
                for (int i = 0; i < rates.length; i++) {
                    if (rates[i].equals("-1"))
                        rates[i] = "0";
                }

                buffer.append(author + "\t");
                buffer.append(hotelID + "\t");

                // overall
                buffer.append(rates[0] + "\t");
                // location
                buffer.append(rates[3] + "\t");
                // room
                buffer.append(rates[2] + "\t");
                // service
                buffer.append(rates[6] + "\t");
                // value
                buffer.append(rates[1] + "\t");
                // cleanliness
                buffer.append(rates[4] + "\t");

                buffer.append("\n");
            }
        }

        // Write once for each file
        FileUtils.writeStringToFile(outFile, buffer.toString(), true);

        // Clear buffer
        buffer.setLength(0);
        System.out.printf("[INFO] Finished processing %s\n", file.getName());
    }
    System.out.println("[INFO] All processinig are finished!");
}

From source file:com.github.stagirs.lingvo.build.SyntaxStatisticsBuilder.java

public static void main(String[] args) throws Exception {
    TObjectIntHashMap<String> map = new TObjectIntHashMap<String>();
    for (String line : FileUtils.readLines(new File("annot.opcorpora.no_ambig.plain"), "utf-8")) {
        List<String> types = types(Annotation.parse(line));
        if (types.isEmpty()) {
            continue;
        }/*w  ww. j a  v  a 2s  .  c  o  m*/
        map.adjustOrPutValue(types.get(0), 1, 1);
        map.adjustOrPutValue(" " + types.get(0), 1, 1);
        for (int i = 1; i < types.size(); i++) {
            map.adjustOrPutValue(types.get(i), 1, 1);
            map.adjustOrPutValue(types.get(i - 1) + " " + types.get(i), 1, 1);
        }
        map.adjustOrPutValue(types.get(types.size() - 1) + " ", 1, 1);
    }
    final List<String> result = new ArrayList<String>();
    map.forEachEntry(new TObjectIntProcedure<String>() {
        @Override
        public boolean execute(String key, int count) {
            result.add(key + "\t" + count);
            return true;
        }
    });
    FileUtils.writeLines(new File("src/main/resources/SyntaxStatistics"), "utf-8", result);
}

From source file:com.gargoylesoftware.htmlunit.source.TestCaseCreator.java

/**
 * The entry point.//w  w  w . j  a  va  2  s  .  c o  m
 *
 * @param args the arguments
 * @throws IOException if an error occurs
 */
public static void main(final String[] args) throws IOException {
    if (args.length == 0) {
        System.out.println("HTML file location is not provided");
        return;
    }

    final File file = new File(args[0]);
    if (!file.exists()) {
        System.out.println("File does not exist " + file.getAbsolutePath());
    }

    System.out.println("        /**");
    System.out.println("         * @throws Exception if an error occurs");
    System.out.println("         */");
    System.out.println("        @Test");
    System.out.println("        @Alerts()");
    System.out.println("        public void test() throws Exception {");

    final List<String> lines = FileUtils.readLines(file, TextUtil.DEFAULT_CHARSET);
    for (int i = 0; i < lines.size(); i++) {
        final String line = lines.get(i);
        if (i == 0) {
            System.out.println("            final String html = \"" + line.replace("\"", "\\\"") + "\\n\"");
        } else {
            System.out.print("                + \"" + line.replace("\"", "\\\"") + "\\n\"");
            if (i == lines.size() - 1) {
                System.out.print(";");
            }
            System.out.println();
        }
    }
    System.out.println("            loadPageWithAlerts2(html);");
    System.out.println("        }");
}

From source file:com.sankalp.characterreader.CharacterReader.java

public static void main(String[] args) throws IOException {

    double learningRate = 0.55;
    List<NeuralNetwork.Layer> hiddenLayerList = new ArrayList();
    hiddenLayerList.add(new NeuralNetwork.Layer(50));
    NeuralNetwork neuralNetwork = new NeuralNetwork(new NeuralNetwork.Layer(28 * 28),
            new NeuralNetwork.Layer(10), hiddenLayerList, learningRate);

    int trainingSampleCount = 60000;
    TrainingData trainingData = new TrainingData(new File("/home/sankalpkulshrestha/mnist/train-images/"),
            trainingSampleCount, new File("/home/sankalpkulshrestha/mnist/train-labels.csv"));

    int totalEpochs = 30;
    for (int index = 0; index < totalEpochs; index++) {
        System.out.println("---------- EPOCH " + index + " ----------");
        neuralNetwork.train(trainingData);

        TrainingData testData = new TrainingData(new File("/home/sankalpkulshrestha/mnist/test-images/"), 10000,
                new File("/home/sankalpkulshrestha/mnist/test-labels.csv"));

        System.out.println("Training over");
        double accuracy = neuralNetwork.measureAccuracy(testData);
        System.out.println(accuracy);
        if (index < totalEpochs - 1) {
            trainingData.shuffle();/*from   w w  w . j a  va2s.  c  o  m*/
            trainingData.reset();
        }
    }
    System.out.println("Model trained. Enter image file names:");
    BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
    int number = -1;
    List<String> expectedOutputList = FileUtils
            .readLines(new File("/home/sankalpkulshrestha/mnist/test-labels.csv"), "UTF-8");
    while ((number = Integer.parseInt(br.readLine())) != -1) {
        int output = neuralNetwork.test(new TrainingData.Sample(
                new File("/home/sankalpkulshrestha/mnist/test-images/" + number + ".jpg"), 0));
        System.out.println(output);
    }
}

From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.sampling.Step5GoldLabelEstimator.java

@SuppressWarnings("unchecked")
public static void main(String[] args) throws Exception {
    String inputDir = args[0];//from   w  w  w  .j  a  va  2s  .c  o  m
    File outputDir = new File(args[1]);

    if (!outputDir.exists()) {
        outputDir.mkdirs();
    }

    // we will process only a subset first
    List<AnnotatedArgumentPair> allArgumentPairs = new ArrayList<>();

    Collection<File> files = IOHelper.listXmlFiles(new File(inputDir));

    for (File file : files) {
        allArgumentPairs.addAll((List<AnnotatedArgumentPair>) XStreamTools.getXStream().fromXML(file));
    }

    // collect turkers and csv
    List<String> turkerIDs = extractAndSortTurkerIDs(allArgumentPairs);
    String preparedCSV = prepareCSV(allArgumentPairs, turkerIDs);

    // save CSV and run MACE
    Path tmpDir = Files.createTempDirectory("mace");
    File maceInputFile = new File(tmpDir.toFile(), "input.csv");
    FileUtils.writeStringToFile(maceInputFile, preparedCSV, "utf-8");

    File outputPredictions = new File(tmpDir.toFile(), "predictions.txt");
    File outputCompetence = new File(tmpDir.toFile(), "competence.txt");

    // run MACE
    MACE.main(new String[] { "--iterations", "500", "--threshold", String.valueOf(MACE_THRESHOLD), "--restarts",
            "50", "--outputPredictions", outputPredictions.getAbsolutePath(), "--outputCompetence",
            outputCompetence.getAbsolutePath(), maceInputFile.getAbsolutePath() });

    // read back the predictions and competence
    List<String> predictions = FileUtils.readLines(outputPredictions, "utf-8");

    // check the output
    if (predictions.size() != allArgumentPairs.size()) {
        throw new IllegalStateException("Wrong size of the predicted file; expected " + allArgumentPairs.size()
                + " lines but was " + predictions.size());
    }

    String competenceRaw = FileUtils.readFileToString(outputCompetence, "utf-8");
    String[] competence = competenceRaw.split("\t");
    if (competence.length != turkerIDs.size()) {
        throw new IllegalStateException(
                "Expected " + turkerIDs.size() + " competence number, got " + competence.length);
    }

    // rank turkers by competence
    Map<String, Double> turkerIDCompetenceMap = new TreeMap<>();
    for (int i = 0; i < turkerIDs.size(); i++) {
        turkerIDCompetenceMap.put(turkerIDs.get(i), Double.valueOf(competence[i]));
    }

    // sort by value descending
    Map<String, Double> sortedCompetences = IOHelper.sortByValue(turkerIDCompetenceMap, false);
    System.out.println("Sorted turker competences: " + sortedCompetences);

    // assign the gold label and competence

    for (int i = 0; i < allArgumentPairs.size(); i++) {
        AnnotatedArgumentPair annotatedArgumentPair = allArgumentPairs.get(i);
        String goldLabel = predictions.get(i).trim();

        // might be empty
        if (!goldLabel.isEmpty()) {
            // so far the gold label has format aXXX_aYYY_a1, aXXX_aYYY_a2, or aXXX_aYYY_equal
            // strip now only the gold label
            annotatedArgumentPair.setGoldLabel(goldLabel);
        }

        // update turker competence
        for (AnnotatedArgumentPair.MTurkAssignment assignment : annotatedArgumentPair.mTurkAssignments) {
            String turkID = assignment.getTurkID();

            int turkRank = getTurkerRank(turkID, sortedCompetences);
            assignment.setTurkRank(turkRank);

            double turkCompetence = turkerIDCompetenceMap.get(turkID);
            assignment.setTurkCompetence(turkCompetence);
        }
    }

    // now sort the data back according to their original file name
    Map<String, List<AnnotatedArgumentPair>> fileNameAnnotatedPairsMap = new HashMap<>();
    for (AnnotatedArgumentPair argumentPair : allArgumentPairs) {
        String fileName = IOHelper.createFileName(argumentPair.getDebateMetaData(),
                argumentPair.getArg1().getStance());

        if (!fileNameAnnotatedPairsMap.containsKey(fileName)) {
            fileNameAnnotatedPairsMap.put(fileName, new ArrayList<AnnotatedArgumentPair>());
        }

        fileNameAnnotatedPairsMap.get(fileName).add(argumentPair);
    }

    // and save them to the output file
    for (Map.Entry<String, List<AnnotatedArgumentPair>> entry : fileNameAnnotatedPairsMap.entrySet()) {
        String fileName = entry.getKey();
        List<AnnotatedArgumentPair> argumentPairs = entry.getValue();

        File outputFile = new File(outputDir, fileName);

        // and save all sampled pairs into a XML file
        XStreamTools.toXML(argumentPairs, outputFile);

        System.out.println("Saved " + argumentPairs.size() + " pairs to " + outputFile);
    }

}

From source file:com.acapulcoapp.alloggiatiweb.FileReader.java

public static void main(String[] args) throws UnknownHostException, IOException {
    // TODO code application logic here

    SpringApplication app = new SpringApplication(AcapulcoappApp.class);
    SimpleCommandLinePropertySource source = new SimpleCommandLinePropertySource(args);
    addDefaultProfile(app, source);/*  w ww .j a v a  2s.  c  o  m*/

    ConfigurableApplicationContext context = app.run(args);

    initBeans(context);

    Map<LocalDate, List<List<String>>> map = new TreeMap<>();

    List<File> files = new ArrayList<>(FileUtils.listFiles(new File("/Users/chiccomask/Downloads/ALLOGGIATI"),
            new String[] { "txt" }, true));

    Collections.reverse(files);

    int count = 0;

    for (File file : files) {

        //            List<String> allLines = FileUtils.readLines(file, "windows-1252");
        List<String> allLines = FileUtils.readLines(file, "UTF-8");

        for (int i = 0; i < allLines.size();) {

            count++;

            List<String> record = new ArrayList<>();

            String line = allLines.get(i);
            String type = TIPO_ALLOGGIO.parse(line);

            switch (type) {
            case "16":
                record.add(line);
                i++;
                break;
            case "17": {
                record.add(line);
                boolean out = false;
                while (!out) {
                    i++;
                    if (i < allLines.size()) {
                        String subline = allLines.get(i);
                        String subtype = TIPO_ALLOGGIO.parse(subline);
                        if (!subtype.equals("19")) {
                            out = true;
                        } else {
                            record.add(subline);
                        }
                    } else {
                        out = true;
                    }
                }
                break;
            }
            case "18": {
                record.add(line);
                boolean out = false;
                while (!out) {
                    i++;
                    if (i < allLines.size()) {
                        String subline = allLines.get(i);
                        String subtype = TIPO_ALLOGGIO.parse(subline);
                        if (!subtype.equals("20")) {
                            out = true;
                        } else {
                            record.add(subline);
                        }
                    } else {
                        out = true;
                    }
                }
                break;
            }
            default:
                break;
            }

            LocalDate arrived = LocalDate.parse(DATA_ARRIVO.parse(line),
                    DateTimeFormatter.ofPattern(DATE_PATTERN));
            if (!map.containsKey(arrived)) {
                map.put(arrived, new ArrayList<>());
            }
            map.get(arrived).add(record);
        }
    }

    for (LocalDate date : map.keySet()) {

        System.out.println();
        System.out.println("process day " + date);

        for (List<String> record : map.get(date)) {

            System.out.println();
            System.out.println("process record ");
            for (String line : record) {
                System.out.println(line);
            }

            CheckinRecord checkinRecord = new CheckinRecord();

            //non lo setto per adesso
            String firstLine = record.get(0);

            String typeStr = TIPO_ALLOGGIO.parse(firstLine);
            CheckinType cht = checkinTypeRepository.find(typeStr);
            checkinRecord.setCheckinType(cht);

            int days = Integer.parseInt(PERMANENZA.parse(firstLine));
            checkinRecord.setDays(days);
            checkinRecord.setArrived(date);

            boolean isMain = true;

            List<Person> others = new ArrayList<>();

            for (String line : record) {
                Person p = extractPerson(line);

                if (p.getDistrictOfBirth() == null) {
                    System.out.println("district of birth not found " + p);
                }

                List<Person> duplicates = personRepository.findDuplicates(p.getSurname(), p.getName(),
                        p.getDateOfBirth());

                if (duplicates.isEmpty()) {
                    System.out.println("add new person " + p.getId() + " " + p);
                    personRepository.saveAndFlush(p);
                } else if (duplicates.size() == 1) {

                    Person found = duplicates.get(0);

                    if (p.getIdentityDocument() != null) {
                        //we sorted by date so we suppose 
                        //the file version is newer so we update the entity
                        p.setId(found.getId());
                        System.out.println("update person " + p.getId() + " " + p);
                        personRepository.saveAndFlush(p);

                    } else if (found.getIdentityDocument() != null) {
                        //on db there are more data so I use them.
                        p = found;
                        System.out.println("use already saved person " + p.getId() + " " + p);
                    } else {
                        p.setId(found.getId());
                        System.out.println("update person " + p.getId() + " " + p);
                        personRepository.saveAndFlush(p);
                    }

                } else {
                    throw new RuntimeException("More duplicated for " + p.getName());
                }

                if (isMain) {
                    checkinRecord.setMainPerson(p);
                    isMain = false;
                } else {
                    others.add(p);
                }
            }

            checkinRecord.setOtherPeople(new HashSet<>(others));

            if (checkinRecordRepository.alreadyExists(checkinRecord.getMainPerson(), date) != null) {
                System.out.println("already exists " + date + " p " + checkinRecord.getMainPerson());
            } else {
                System.out.println("save record ");
                checkinRecordRepository.saveAndFlush(checkinRecord);
            }
        }
    }

    //
    //            if (type.equals("16")) {
    //                List<String> record = new ArrayList<>();
    //                record.add(line);
    //                keepOpen = false;
    //            }
    //
    //            map.get(arrived).add(record);
    //        map.values().forEach((list) -> {
    //
    //            for (String line : list) {
    //
    //                Person p = null;
    //
    //                try {
    //
    //                    p = extractPerson(line);
    //
    //                    List<Person> duplicates = personRepository.findDuplicates(p.getSurname(), p.getName(), p.getDateOfBirth());
    //
    //                    if (duplicates.isEmpty()) {
    //                        personRepository.saveAndFlush(p);
    //
    //                    } else if (duplicates.size() > 1) {
    //                        System.out.println();
    //                        System.out.println("MULIPLE DUPLICATED");
    //
    //                        for (Person dd : duplicates) {
    //                            System.out.println(dd);
    //                        }
    //                        System.out.println("* " + p);
    //                        throw new RuntimeException();
    //                    } else {
    //
    ////                        if (!duplicates.get(0).getDistrictOfBirth().equals(p.getDistrictOfBirth())) {
    ////                        int index = 0;
    ////
    ////                        System.out.println();
    ////                        System.out.println("DUPLICATED");
    ////
    ////                        for (Person dd : duplicates) {
    ////                            System.out.println(dd);
    ////                            index++;
    ////                        }
    ////                        System.out.println("* " + p);
    ////                        System.out.println(file.getAbsolutePath() + " " + p);
    ////
    ////                        System.out.println();
    ////                        System.out.println();
    ////                        }
    ////                        duplicates.remove(0);
    ////                        personRepository.deleteInBatch(duplicates);
    ////                System.out.println();
    ////                System.out.println("Seleziona scelta");
    ////                Scanner s = new Scanner(System.in);
    ////                int selected;
    ////                try {
    ////                    selected = s.nextInt();
    ////                } catch (InputMismatchException e) {
    ////                    selected = 0;
    ////                }
    ////
    ////                if (duplicates.size() <= selected) {
    ////                    personRepository.deleteInBatch(duplicates);
    ////                    personRepository.saveAndFlush(p);
    ////                } else {
    ////                    duplicates.remove(selected);
    ////                    personRepository.deleteInBatch(duplicates);
    ////                }
    //                    }
    //
    //                } catch (Exception e) {
    //
    //                    System.out.println();
    ////                    System.out.println("ERROR READING lineCount=" + allLines.indexOf(line) + " line=" + line);
    ////                    System.out.println(file.getAbsolutePath());
    //                    System.out.println(p);
    //                    e.printStackTrace();
    //                    System.out.println();
    //                }
    //            }
    //        });
    context.registerShutdownHook();

    System.exit(0);
}

From source file:fakedatamaker.util.RandomFileUtil.java

/**
 * // www.j  ava2 s. c  o m
 * @param filePath
 * @param encoding
 * @return
 */
public static String getRandomFileLine(String filePath, String encoding) {
    String result = null;

    try {
        File file = new File(filePath);
        List<String> cityLines = FileUtils.readLines(file, encoding);
        int numberOfCities = cityLines.size();
        Random r = new Random();
        int cityLineNum = r.nextInt(numberOfCities);

        result = cityLines.get(cityLineNum);
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    return result;
}

From source file:com.nubits.nubot.utils.VersionInfo.java

private static HashMap getInfoFile() {
    String wdir = System.getProperty("user.dir");

    String fp = wdir + "/" + Settings.INFO_FILE;
    File file = new File(fp);
    try {//w w  w. j  a  v a  2s .  c  om
        List lines = FileUtils.readLines(file, "UTF-8");
        HashMap km = new HashMap();
        for (Object o : lines) {
            String l = "" + o;
            try {
                String[] a = l.split("=");
                km.put(a[0], a[1]);
            } catch (Exception e) {
                //ignore line with "="
            }

        }
        return km;
    } catch (Exception e) {
        //throw e;
        LOG.error(e.toString());
    }
    return null;
}

From source file:com.github.stagirs.lingvo.syntax.disambiguity.mystem.MyStem.java

public static List<SyntaxItem[][]> process(List<String> text) throws IOException, InterruptedException {
    FileUtils.writeLines(new File("mystem_input"), "utf-8", text);
    Process process = Runtime.getRuntime()
            .exec(new String[] { "./mystem", "-c", "-i", "-d", "mystem_input", "mystem_output" });
    process.waitFor();//ww  w. jav a  2  s .  c om
    InputStream is = process.getInputStream();
    try {
        List<String> lines = FileUtils.readLines(new File("mystem_output"), "utf-8");
        List<SyntaxItem[][]> resultList = new ArrayList<SyntaxItem[][]>();
        for (String line : lines) {
            String[] parts = line.split(" ");
            SyntaxItem[][] result = new SyntaxItem[parts.length][];
            for (int i = 0; i < result.length; i++) {
                if (parts[i].isEmpty()) {
                    result[i] = new SyntaxItem[0];
                    continue;
                }
                if (!parts[i].contains("{")) {
                    final String word = parts[i];
                    result[i] = new SyntaxItem[] { new SyntaxItem() {
                        @Override
                        public String getName() {
                            return word;
                        }

                        @Override
                        public String getType() {
                            return "";
                        }

                        @Override
                        public double getScore() {
                            return 0;
                        }
                    } };
                    continue;
                }
                final String word = parts[i].substring(0, parts[i].indexOf('{'));
                final String[] forms = parts[i].substring(parts[i].indexOf('{') + 1, parts[i].indexOf('}'))
                        .split("\\|");
                result[i] = new SyntaxItem[forms.length];
                for (int j = 0; j < result[i].length; j++) {
                    final String type = forms[j];
                    result[i][j] = new SyntaxItem() {
                        @Override
                        public String getName() {
                            return word;
                        }

                        @Override
                        public String getType() {
                            return type;
                        }

                        @Override
                        public double getScore() {
                            return 0;
                        }
                    };
                }
            }
            resultList.add(result);
        }
        return resultList;
    } finally {
        is.close();
    }
}