Example usage for org.apache.commons.io FileUtils listFiles

Introduction

In this page you can find the example usage for org.apache.commons.io FileUtils listFiles.

Prototype

public static Collection listFiles(File directory, String[] extensions, boolean recursive)

Source Link

Document

Finds files within a given directory (and optionally its subdirectories) which match an array of extensions.

Usage

From source file:com.sangupta.keepwalking.MergeRepo.java

/**
 * @param args//from  ww w  . j  a  v a2s  .  c  om
 * @throws IOException 
 */
public static void main(String[] args) throws IOException {
    if (args.length != 3) {
        usage();
        return;
    }

    final String previousRepo = args[0];
    final String newerRepo = args[1];
    final String mergedRepo = args[2];

    final File previous = new File(previousRepo);
    final File newer = new File(newerRepo);
    final File merged = new File(mergedRepo);

    if (!(previous.exists() && previous.isDirectory())) {
        System.out.println("The previous version does not exists or is not a directory.");
        return;
    }

    if (!(newer.exists() && newer.isDirectory())) {
        System.out.println("The newer version does not exists or is not a directory.");
        return;
    }

    final IOFileFilter directoryFilter = FileFilterUtils.makeCVSAware(FileFilterUtils.makeSVNAware(null));

    final Collection<File> olderFiles = FileUtils.listFiles(previous, TrueFileFilter.TRUE, directoryFilter);
    final Collection<File> newerFiles = FileUtils.listFiles(newer, TrueFileFilter.TRUE, directoryFilter);

    // build a list of unique paths
    System.out.println("Reading files from older version...");
    List<String> olderPaths = new ArrayList<String>();
    for (File oldFile : olderFiles) {
        olderPaths.add(getRelativePath(oldFile, previous));
    }

    System.out.println("Reading files from newer version...");
    List<String> newerPaths = new ArrayList<String>();
    for (File newerFile : newerFiles) {
        newerPaths.add(getRelativePath(newerFile, newer));
    }

    // find which files have been removed from Perforce depot
    List<String> filesRemoved = new ArrayList<String>(olderPaths);
    filesRemoved.removeAll(newerPaths);
    System.out.println("Files removed in newer version: " + filesRemoved.size());
    for (String removed : filesRemoved) {
        System.out.print("    ");
        System.out.println(removed);
    }

    // find which files have been added in Perforce depot
    List<String> filesAdded = new ArrayList<String>(newerPaths);
    filesAdded.removeAll(olderPaths);
    System.out.println("Files added in newer version: " + filesAdded.size());
    for (String added : filesAdded) {
        System.out.print("    ");
        System.out.println(added);
    }

    // find which files are common 
    // now check if they have modified or not
    newerPaths.retainAll(olderPaths);
    List<String> modified = checkModifiedFiles(newerPaths, previous, newer);
    System.out.println("Files modified in newer version: " + modified.size());
    for (String modify : modified) {
        System.out.print("    ");
        System.out.println(modify);
    }

    // clean any previous existence of merged repo
    System.out.println("Cleaning any previous merged repositories...");
    if (merged.exists() && merged.isDirectory()) {
        FileUtils.deleteDirectory(merged);
    }

    System.out.println("Merging from newer to older repository...");
    // copy the original SVN repo to merged
    FileUtils.copyDirectory(previous, merged);

    // now remove all files that need to be
    for (String removed : filesRemoved) {
        File toRemove = new File(merged, removed);
        toRemove.delete();
    }

    // now add all files that are new in perforce
    for (String added : filesAdded) {
        File toAdd = new File(newer, added);
        File destination = new File(merged, added);
        FileUtils.copyFile(toAdd, destination);
    }

    // now over-write modified files
    for (String changed : modified) {
        File change = new File(newer, changed);
        File destination = new File(merged, changed);
        destination.delete();
        FileUtils.copyFile(change, destination);
    }

    System.out.println("Done merging.");
}

From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step10RemoveEmptyDocuments.java

public static void main(String[] args) throws IOException {
    // input dir - list of xml query containers
    File inputDir = new File(args[0]);

    // output dir
    File outputDir = new File(args[1]);
    if (!outputDir.exists()) {
        outputDir.mkdirs();//from w  ww  .j  a  va2  s.  c  o m
    }

    boolean crop = args.length >= 3 && "crop".equals(args[2]);

    // first find the maximum of zero-sized documents
    int maxMissing = 7;

    /*
    // iterate over query containers
    for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) {
    QueryResultContainer queryResultContainer = QueryResultContainer
            .fromXML(FileUtils.readFileToString(f, "utf-8"));
            
    // first find the maximum of zero-sized documents in a query
    int missingInQuery = 0;
            
    for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) {
        // boilerplate removal
        if (rankedResults.plainText == null || rankedResults.plainText.isEmpty()) {
            missingInQuery++;
        }
    }
            
    maxMissing = Math.max(missingInQuery, maxMissing);
    }
    */

    System.out.println("Max zeroLengthDocuments in query: " + maxMissing);
    // max is 7 = we're cut-off at 93

    // iterate over query containers
    for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) {
        QueryResultContainer queryResultContainer = QueryResultContainer
                .fromXML(FileUtils.readFileToString(f, "utf-8"));

        List<QueryResultContainer.SingleRankedResult> nonEmptyDocsList = new ArrayList<>();

        for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) {
            // collect non-empty documents
            if (rankedResults.plainText != null && !rankedResults.plainText.isEmpty()) {
                nonEmptyDocsList.add(rankedResults);
            }
        }

        System.out.println("Non-empty docs coune: " + nonEmptyDocsList.size());

        if (crop) {
            // now cut at 93
            nonEmptyDocsList = nonEmptyDocsList.subList(0, (100 - maxMissing));
            System.out.println("After cropping: " + nonEmptyDocsList.size());
        }
        System.out.println("After cleaning: " + nonEmptyDocsList.size());

        queryResultContainer.rankedResults.clear();
        queryResultContainer.rankedResults.addAll(nonEmptyDocsList);

        // and save the query to output dir
        File outputFile = new File(outputDir, queryResultContainer.qID + ".xml");
        FileUtils.writeStringToFile(outputFile, queryResultContainer.toXML(), "utf-8");
        System.out.println("Finished " + outputFile);
    }

}

From source file:PlyBounder.java

public static void main(String[] args) {

    // Get the commandline arguments
    Options options = new Options();
    // Available options
    Option plyPath = OptionBuilder.withArgName("dir").hasArg()
            .withDescription("directory containing input .ply files").create("plyPath");
    Option boundingbox = OptionBuilder.withArgName("string").hasArg()
            .withDescription("bounding box in WKT notation").create("boundingbox");
    Option outputPlyFile = OptionBuilder.withArgName("file").hasArg().withDescription("output PLY file name")
            .create("outputPlyFile");
    options.addOption(plyPath);/*from  w  w  w.j av a 2 s  .  co m*/
    options.addOption(boundingbox);
    options.addOption(outputPlyFile);

    String plydir = ".";
    String boundingboxstr = "";
    String outputfilename = "";

    CommandLineParser parser = new DefaultParser();
    try {
        // parse the command line arguments
        CommandLine line = parser.parse(options, args);

        boundingboxstr = line.getOptionValue("boundingbox");
        outputfilename = line.getOptionValue("outputPlyFile");

        if (line.hasOption("plyPath")) {
            // print the value of block-size
            plydir = line.getOptionValue("plyPath");
            System.out.println("Using plyPath=" + plydir);
        } else {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("PlyBounder", options);
        }
        //System.out.println( "plyPath=" + line.getOptionValue( "plyPath" ) );
    } catch (ParseException exp) {
        System.err.println("Error getting arguments: " + exp.getMessage());
    }

    // input directory
    // Get list of files
    File dir = new File(plydir);

    //System.out.println("Getting all files in " + dir.getCanonicalPath());
    List<File> files = (List<File>) FileUtils.listFiles(dir, new String[] { "ply", "PLY" }, false);
    for (File file : files) {
        try {
            System.out.println("file=" + file.getCanonicalPath());
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    String sometempfile = "magweg.wkt";
    String s = null;

    // Loop through .ply files in directory
    for (File file : files) {
        try {
            String cmdl[] = { "./ply-tool.py", "intersection", file.getCanonicalPath(), boundingboxstr,
                    sometempfile };
            //System.out.println("Running: " + Arrays.toString(cmdl));
            Process p = Runtime.getRuntime().exec(cmdl);

            BufferedReader stdInput = new BufferedReader(new InputStreamReader(p.getInputStream()));

            BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream()));

            // read the output from the command
            System.out.println("cmdout:\n");
            while ((s = stdInput.readLine()) != null) {
                System.out.println(s);
            }

            // read any errors from the attempted command
            System.out.println("cmderr:\n");
            while ((s = stdError.readLine()) != null) {
                System.out.println(s);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    // Write new .ply file
    //ply-tool write setfile outputPlyFile
    try {
        String cmdl = "./ply-tool.py write " + sometempfile + " " + outputfilename;
        System.out.println("Running: " + cmdl);
        Process p = Runtime.getRuntime().exec(cmdl);

        BufferedReader stdInput = new BufferedReader(new InputStreamReader(p.getInputStream()));

        BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream()));

        // read the output from the command
        System.out.println("cmdout:\n");
        while ((s = stdInput.readLine()) != null) {
            System.out.println(s);
        }

        // read any errors from the attempted command
        System.out.println("cmderr:\n");
        while ((s = stdError.readLine()) != null) {
            System.out.println(s);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }

    // Done
    System.out.println("Done");
}

From source file:de.jwi.zip.Zipper.java

public static void main(String[] args) throws Exception {
    ZipOutputStream z = new ZipOutputStream(new FileOutputStream("d:/temp/myfirst.zip"));

    IOFileFilter filter = new IOFileFilter() {

        public boolean accept(java.io.File file) {
            return true;
        }//from  w w  w  .  ja  v a  2s  .  c om

        public boolean accept(java.io.File dir, java.lang.String name) {
            return true;
        }
    };

    Collection c = FileUtils.listFiles(new File("/java/javadocs/j2sdk-1.4.1/docs/tooldocs"), filter, filter);

    new Zipper().zip(z, c, new File("/java/javadocs/j2sdk-1.4.1"));

    z.close();

}

From source file:com.ontotext.s4.SBTDemo.Main.java

public static void main(String[] args) {
    /*/*from  ww w  . jav  a  2  s  .  c o m*/
     * Read log4j properties file.
     */
    org.apache.log4j.PropertyConfigurator.configure(args.length >= 1 ? args[1] : DEFAULT_LOG4J_FILE);

    /*
     * Read properties file  List all annotated files. We should
     * use absolute path to the files
     */
    init(args);

    ProcessingDocuments processingDocuments = new ProcessingDocuments(
            programProperties.getProperty(PropertiesNames.S4_API_KEY),
            programProperties.getProperty(PropertiesNames.S4_API_PASS),
            programProperties.getProperty(PropertiesNames.RAW_FOLDER),
            programProperties.getProperty(PropertiesNames.ANNOTATED_FOLDER),
            programProperties.getProperty(PropertiesNames.SERVICE),
            programProperties.getProperty(PropertiesNames.MIME_TYPE),
            programProperties.getProperty(PropertiesNames.RESPONSE_FORMAT),
            Integer.parseInt(programProperties.getProperty(PropertiesNames.NUMBER_OF_THREADS)));

    processingDocuments.ProcessData();

    File directory = new File(programProperties.getProperty(PropertiesNames.ANNOTATED_FOLDER));
    listOfAllAnnotatedFiles = FileUtils.listFiles(directory, new RegexFileFilter("^(.*?)"),
            DirectoryFileFilter.DIRECTORY);

    RepoManager repoManager = new RepoManager(programProperties.getProperty(PropertiesNames.REPOSITORY_URL));
    JsonToRDF jsonToRdfParser = new JsonToRDF(programProperties.getProperty(PropertiesNames.MIME_TYPE),
            programProperties.getProperty(PropertiesNames.RDFIZE_FOLDER));

    for (File file : listOfAllAnnotatedFiles) {
        String fileContent = null;
        try {
            fileContent = FileUtils.readFileToString(file, "UTF-8");
        } catch (IOException e) {
            logger.error(e);
        }

        Model graph = jsonToRdfParser.wirteDataToRDF(fileContent, file.getName(),
                programProperties.getProperty(PropertiesNames.RDFIZE_FOLDER));
        try {
            repoManager.sendDataTOGraphDB(graph);
        } catch (RepositoryException e) {
            logger.error(e);
        }

    }

    repoManager.close();
}

From source file:de.micromata.genome.gwiki.tools.PatchJavaHeader.java

public static void main(String[] args) {
    String baseDir = args[0];//from  w ww  .  ja v a 2s. c  o m
    Collection<File> col = FileUtils.listFiles(new File(baseDir), new String[] { "java" }, true);
    for (File f : col) {
        System.out.println("file: " + f.getAbsolutePath());
        patchFile(f);
    }

}

From source file:io.apiman.tools.i18n.TemplateScanner.java

public static void main(String[] args) throws IOException {
    if (args == null || args.length != 1) {
        System.out.println("Template directory not provided (no path provided).");
        System.exit(1);//from   w  w w  .j a  v a  2s.co  m
    }
    File templateDir = new File(args[0]);
    if (!templateDir.isDirectory()) {
        System.out.println("Template directory not provided (provided path is not a directory).");
        System.exit(1);
    }

    if (!new File(templateDir, "dash.html").isFile()) {
        System.out.println("Template directory not provided (dash.html not found).");
        System.exit(1);
    }

    File outputDir = new File(templateDir, "../../../../../../tools/i18n/target");
    if (!outputDir.isDirectory()) {
        System.out.println("Output directory not found: " + outputDir);
        System.exit(1);
    }
    File outputFile = new File(outputDir, "scanner-messages.properties");
    if (outputFile.isFile() && !outputFile.delete()) {
        System.out.println("Couldn't delete the old messages.properties: " + outputFile);
        System.exit(1);
    }

    System.out.println("Starting scan.");
    System.out.println("Scanning template directory: " + templateDir.getAbsolutePath());

    String[] extensions = { "html", "include" };
    Collection<File> files = FileUtils.listFiles(templateDir, extensions, true);

    TreeMap<String, String> strings = new TreeMap<>();

    for (File file : files) {
        System.out.println("\tScanning file: " + file);
        scanFile(file, strings);
    }

    outputMessages(strings, outputFile);

    System.out.println("Scan complete.  Scanned " + files.size() + " files and discovered " + strings.size()
            + " translation strings.");
}

From source file:com.acapulcoapp.alloggiatiweb.FileReader.java

public static void main(String[] args) throws UnknownHostException, IOException {
    // TODO code application logic here

    SpringApplication app = new SpringApplication(AcapulcoappApp.class);
    SimpleCommandLinePropertySource source = new SimpleCommandLinePropertySource(args);
    addDefaultProfile(app, source);//from  ww  w  .j  av  a 2s  .  co m

    ConfigurableApplicationContext context = app.run(args);

    initBeans(context);

    Map<LocalDate, List<List<String>>> map = new TreeMap<>();

    List<File> files = new ArrayList<>(FileUtils.listFiles(new File("/Users/chiccomask/Downloads/ALLOGGIATI"),
            new String[] { "txt" }, true));

    Collections.reverse(files);

    int count = 0;

    for (File file : files) {

        //            List<String> allLines = FileUtils.readLines(file, "windows-1252");
        List<String> allLines = FileUtils.readLines(file, "UTF-8");

        for (int i = 0; i < allLines.size();) {

            count++;

            List<String> record = new ArrayList<>();

            String line = allLines.get(i);
            String type = TIPO_ALLOGGIO.parse(line);

            switch (type) {
            case "16":
                record.add(line);
                i++;
                break;
            case "17": {
                record.add(line);
                boolean out = false;
                while (!out) {
                    i++;
                    if (i < allLines.size()) {
                        String subline = allLines.get(i);
                        String subtype = TIPO_ALLOGGIO.parse(subline);
                        if (!subtype.equals("19")) {
                            out = true;
                        } else {
                            record.add(subline);
                        }
                    } else {
                        out = true;
                    }
                }
                break;
            }
            case "18": {
                record.add(line);
                boolean out = false;
                while (!out) {
                    i++;
                    if (i < allLines.size()) {
                        String subline = allLines.get(i);
                        String subtype = TIPO_ALLOGGIO.parse(subline);
                        if (!subtype.equals("20")) {
                            out = true;
                        } else {
                            record.add(subline);
                        }
                    } else {
                        out = true;
                    }
                }
                break;
            }
            default:
                break;
            }

            LocalDate arrived = LocalDate.parse(DATA_ARRIVO.parse(line),
                    DateTimeFormatter.ofPattern(DATE_PATTERN));
            if (!map.containsKey(arrived)) {
                map.put(arrived, new ArrayList<>());
            }
            map.get(arrived).add(record);
        }
    }

    for (LocalDate date : map.keySet()) {

        System.out.println();
        System.out.println("process day " + date);

        for (List<String> record : map.get(date)) {

            System.out.println();
            System.out.println("process record ");
            for (String line : record) {
                System.out.println(line);
            }

            CheckinRecord checkinRecord = new CheckinRecord();

            //non lo setto per adesso
            String firstLine = record.get(0);

            String typeStr = TIPO_ALLOGGIO.parse(firstLine);
            CheckinType cht = checkinTypeRepository.find(typeStr);
            checkinRecord.setCheckinType(cht);

            int days = Integer.parseInt(PERMANENZA.parse(firstLine));
            checkinRecord.setDays(days);
            checkinRecord.setArrived(date);

            boolean isMain = true;

            List<Person> others = new ArrayList<>();

            for (String line : record) {
                Person p = extractPerson(line);

                if (p.getDistrictOfBirth() == null) {
                    System.out.println("district of birth not found " + p);
                }

                List<Person> duplicates = personRepository.findDuplicates(p.getSurname(), p.getName(),
                        p.getDateOfBirth());

                if (duplicates.isEmpty()) {
                    System.out.println("add new person " + p.getId() + " " + p);
                    personRepository.saveAndFlush(p);
                } else if (duplicates.size() == 1) {

                    Person found = duplicates.get(0);

                    if (p.getIdentityDocument() != null) {
                        //we sorted by date so we suppose 
                        //the file version is newer so we update the entity
                        p.setId(found.getId());
                        System.out.println("update person " + p.getId() + " " + p);
                        personRepository.saveAndFlush(p);

                    } else if (found.getIdentityDocument() != null) {
                        //on db there are more data so I use them.
                        p = found;
                        System.out.println("use already saved person " + p.getId() + " " + p);
                    } else {
                        p.setId(found.getId());
                        System.out.println("update person " + p.getId() + " " + p);
                        personRepository.saveAndFlush(p);
                    }

                } else {
                    throw new RuntimeException("More duplicated for " + p.getName());
                }

                if (isMain) {
                    checkinRecord.setMainPerson(p);
                    isMain = false;
                } else {
                    others.add(p);
                }
            }

            checkinRecord.setOtherPeople(new HashSet<>(others));

            if (checkinRecordRepository.alreadyExists(checkinRecord.getMainPerson(), date) != null) {
                System.out.println("already exists " + date + " p " + checkinRecord.getMainPerson());
            } else {
                System.out.println("save record ");
                checkinRecordRepository.saveAndFlush(checkinRecord);
            }
        }
    }

    //
    //            if (type.equals("16")) {
    //                List<String> record = new ArrayList<>();
    //                record.add(line);
    //                keepOpen = false;
    //            }
    //
    //            map.get(arrived).add(record);
    //        map.values().forEach((list) -> {
    //
    //            for (String line : list) {
    //
    //                Person p = null;
    //
    //                try {
    //
    //                    p = extractPerson(line);
    //
    //                    List<Person> duplicates = personRepository.findDuplicates(p.getSurname(), p.getName(), p.getDateOfBirth());
    //
    //                    if (duplicates.isEmpty()) {
    //                        personRepository.saveAndFlush(p);
    //
    //                    } else if (duplicates.size() > 1) {
    //                        System.out.println();
    //                        System.out.println("MULIPLE DUPLICATED");
    //
    //                        for (Person dd : duplicates) {
    //                            System.out.println(dd);
    //                        }
    //                        System.out.println("* " + p);
    //                        throw new RuntimeException();
    //                    } else {
    //
    ////                        if (!duplicates.get(0).getDistrictOfBirth().equals(p.getDistrictOfBirth())) {
    ////                        int index = 0;
    ////
    ////                        System.out.println();
    ////                        System.out.println("DUPLICATED");
    ////
    ////                        for (Person dd : duplicates) {
    ////                            System.out.println(dd);
    ////                            index++;
    ////                        }
    ////                        System.out.println("* " + p);
    ////                        System.out.println(file.getAbsolutePath() + " " + p);
    ////
    ////                        System.out.println();
    ////                        System.out.println();
    ////                        }
    ////                        duplicates.remove(0);
    ////                        personRepository.deleteInBatch(duplicates);
    ////                System.out.println();
    ////                System.out.println("Seleziona scelta");
    ////                Scanner s = new Scanner(System.in);
    ////                int selected;
    ////                try {
    ////                    selected = s.nextInt();
    ////                } catch (InputMismatchException e) {
    ////                    selected = 0;
    ////                }
    ////
    ////                if (duplicates.size() <= selected) {
    ////                    personRepository.deleteInBatch(duplicates);
    ////                    personRepository.saveAndFlush(p);
    ////                } else {
    ////                    duplicates.remove(selected);
    ////                    personRepository.deleteInBatch(duplicates);
    ////                }
    //                    }
    //
    //                } catch (Exception e) {
    //
    //                    System.out.println();
    ////                    System.out.println("ERROR READING lineCount=" + allLines.indexOf(line) + " line=" + line);
    ////                    System.out.println(file.getAbsolutePath());
    //                    System.out.println(p);
    //                    e.printStackTrace();
    //                    System.out.println();
    //                }
    //            }
    //        });
    context.registerShutdownHook();

    System.exit(0);
}

From source file:com.l2jserver.model.template.SkillTemplateConverter.java

public static void main(String[] args) throws SQLException, IOException, ClassNotFoundException, JAXBException {
    Class.forName("com.mysql.jdbc.Driver");

    final File target = new File("data/templates");
    final JAXBContext c = JAXBContext.newInstance(SkillTemplate.class, LegacySkillList.class);
    final Connection conn = DriverManager.getConnection(JDBC_URL, JDBC_USERNAME, JDBC_PASSWORD);

    System.out.println("Generating template XML files...");
    c.generateSchema(new SchemaOutputResolver() {
        @Override/*w  w  w  .ja  v  a  2 s .  co  m*/
        public Result createOutput(String namespaceUri, String suggestedFileName) throws IOException {
            return new StreamResult(new File(target, suggestedFileName));
        }
    });

    try {
        final Unmarshaller u = c.createUnmarshaller();
        final Marshaller m = c.createMarshaller();
        m.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
        m.setProperty(Marshaller.JAXB_NO_NAMESPACE_SCHEMA_LOCATION, "skill");
        m.setProperty(Marshaller.JAXB_SCHEMA_LOCATION, "skill ../skill.xsd");

        Collection<File> files = FileUtils.listFiles(new File(LEGACY_SKILL_FOLDER), new String[] { "xml" },
                true);
        for (final File legacyFile : files) {
            LegacySkillList list = (LegacySkillList) u.unmarshal(legacyFile);
            for (final LegacySkill legacySkill : list.skills) {
                SkillTemplate t = fillSkill(legacySkill);
                final File file = new File(target, "skill/" + t.id.getID()
                        + (t.getName() != null ? "-" + camelCase(t.getName()) : "") + ".xml");
                templates.add(t);

                try {
                    m.marshal(t, getXMLSerializer(new FileOutputStream(file)));
                } catch (MarshalException e) {
                    System.err.println(
                            "Could not generate XML template file for " + t.getName() + " - " + t.getID());
                    file.delete();
                }
            }
        }

        System.out.println("Generated " + templates.size() + " templates");

        System.gc();
        System.out.println("Free: " + FileUtils.byteCountToDisplaySize(Runtime.getRuntime().freeMemory()));
        System.out.println("Total: " + FileUtils.byteCountToDisplaySize(Runtime.getRuntime().totalMemory()));
        System.out.println("Used: " + FileUtils.byteCountToDisplaySize(
                Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()));
        System.out.println("Max: " + FileUtils.byteCountToDisplaySize(Runtime.getRuntime().maxMemory()));
    } finally {
        conn.close();
    }
}

From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step6HITPreparator.java

public static void main(String[] args) throws Exception {
    // input dir - list of xml query containers
    // step5-linguistic-annotation/
    System.err.println("Starting step 6 HIT Preparation");

    File inputDir = new File(args[0]);

    // output dir
    File outputDir = new File(args[1]);
    if (outputDir.exists()) {
        outputDir.delete();/*from   www.  j av a 2s.  c  o m*/
    }
    outputDir.mkdir();

    List<String> queries = new ArrayList<>();

    // iterate over query containers
    int countClueWeb = 0;
    int countSentence = 0;
    for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) {
        QueryResultContainer queryResultContainer = QueryResultContainer
                .fromXML(FileUtils.readFileToString(f, "utf-8"));
        if (queries.contains(f.getName()) || queries.size() == 0) {
            // groups contain only non-empty documents
            Map<Integer, List<QueryResultContainer.SingleRankedResult>> groups = new HashMap<>();

            // split to groups according to number of sentences
            for (QueryResultContainer.SingleRankedResult rankedResult : queryResultContainer.rankedResults) {
                if (rankedResult.originalXmi != null) {
                    byte[] bytes = new BASE64Decoder()
                            .decodeBuffer(new ByteArrayInputStream(rankedResult.originalXmi.getBytes()));
                    JCas jCas = JCasFactory.createJCas();
                    XmiCasDeserializer.deserialize(new ByteArrayInputStream(bytes), jCas.getCas());

                    Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class);

                    int groupId = sentences.size() / 40;
                    if (rankedResult.originalXmi == null) {
                        System.err.println("Empty document: " + rankedResult.clueWebID);
                    } else {
                        if (!groups.containsKey(groupId)) {
                            groups.put(groupId, new ArrayList<>());

                        }
                    }
                    //handle it
                    groups.get(groupId).add(rankedResult);
                    countClueWeb++;
                }
            }

            for (Map.Entry<Integer, List<QueryResultContainer.SingleRankedResult>> entry : groups.entrySet()) {
                Integer groupId = entry.getKey();
                List<QueryResultContainer.SingleRankedResult> rankedResults = entry.getValue();

                // make sure the results are sorted
                // DEBUG
                //                for (QueryResultContainer.SingleRankedResult r : rankedResults) {
                //                    System.out.print(r.rank + "\t");
                //                }

                Collections.sort(rankedResults, (o1, o2) -> o1.rank.compareTo(o2.rank));

                // iterate over results for one query and group
                for (int i = 0; i < rankedResults.size() && i < TOP_RESULTS_PER_GROUP; i++) {
                    QueryResultContainer.SingleRankedResult rankedResult = rankedResults.get(i);

                    QueryResultContainer.SingleRankedResult r = rankedResults.get(i);
                    int rank = r.rank;
                    MustacheFactory mf = new DefaultMustacheFactory();
                    Mustache mustache = mf.compile("template/template.html");
                    String queryId = queryResultContainer.qID;
                    String query = queryResultContainer.query;
                    // make the first letter uppercase
                    query = query.substring(0, 1).toUpperCase() + query.substring(1);

                    List<String> relevantInformationExamples = queryResultContainer.relevantInformationExamples;
                    List<String> irrelevantInformationExamples = queryResultContainer.irrelevantInformationExamples;
                    byte[] bytes = new BASE64Decoder()
                            .decodeBuffer(new ByteArrayInputStream(rankedResult.originalXmi.getBytes()));

                    JCas jCas = JCasFactory.createJCas();
                    XmiCasDeserializer.deserialize(new ByteArrayInputStream(bytes), jCas.getCas());

                    List<generators.Sentence> sentences = new ArrayList<>();
                    List<Integer> paragraphs = new ArrayList<>();
                    paragraphs.add(0);

                    for (WebParagraph webParagraph : JCasUtil.select(jCas, WebParagraph.class)) {
                        for (Sentence s : JCasUtil.selectCovered(Sentence.class, webParagraph)) {

                            String sentenceBegin = String.valueOf(s.getBegin());
                            generators.Sentence sentence = new generators.Sentence(s.getCoveredText(),
                                    sentenceBegin);
                            sentences.add(sentence);
                            countSentence++;
                        }
                        int SentenceID = paragraphs.get(paragraphs.size() - 1);
                        if (sentences.size() > 120)
                            while (SentenceID < sentences.size()) {
                                if (!paragraphs.contains(SentenceID))
                                    paragraphs.add(SentenceID);
                                SentenceID = SentenceID + 120;
                            }
                        paragraphs.add(sentences.size());

                    }
                    System.err.println("Output dir: " + outputDir);
                    int startID = 0;
                    int endID;

                    for (int j = 0; j < paragraphs.size(); j++) {

                        endID = paragraphs.get(j);
                        int sentLength = endID - startID;
                        if (sentLength > 120 || j == paragraphs.size() - 1) {
                            if (sentLength > 120) {

                                endID = paragraphs.get(j - 1);
                                j--;
                            }
                            sentLength = endID - startID;
                            if (sentLength <= 40)
                                groupId = 40;
                            else if (sentLength <= 80 && sentLength > 40)
                                groupId = 80;
                            else if (sentLength > 80)
                                groupId = 120;

                            File folder = new File(outputDir + "/" + groupId);
                            if (!folder.exists()) {
                                System.err.println("creating directory: " + outputDir + "/" + groupId);
                                boolean result = false;

                                try {
                                    folder.mkdir();
                                    result = true;
                                } catch (SecurityException se) {
                                    //handle it
                                }
                                if (result) {
                                    System.out.println("DIR created");
                                }
                            }

                            String newHtmlFile = folder.getAbsolutePath() + "/" + f.getName() + "_"
                                    + rankedResult.clueWebID + "_" + sentLength + ".html";
                            System.err.println("Printing a file: " + newHtmlFile);
                            File newHTML = new File(newHtmlFile);
                            int t = 0;
                            while (newHTML.exists()) {
                                newHTML = new File(folder.getAbsolutePath() + "/" + f.getName() + "_"
                                        + rankedResult.clueWebID + "_" + sentLength + "." + t + ".html");
                                t++;
                            }
                            mustache.execute(new PrintWriter(new FileWriter(newHTML)),
                                    new generators(query, relevantInformationExamples,
                                            irrelevantInformationExamples, sentences.subList(startID, endID),
                                            queryId, rank))
                                    .flush();
                            startID = endID;
                        }
                    }
                }
            }

        }
    }
    System.out.println("Printed " + countClueWeb + " documents with " + countSentence + " sentences");
}