Example usage for org.apache.commons.io FileUtils listFiles

List of usage examples for org.apache.commons.io FileUtils listFiles

Introduction

In this page you can find the example usage for org.apache.commons.io FileUtils listFiles.

Prototype

public static Collection listFiles(File directory, String[] extensions, boolean recursive) 

Source Link

Document

Finds files within a given directory (and optionally its subdirectories) which match an array of extensions.

Usage

From source file:com.sangupta.keepwalking.MergeRepo.java

/**
 * @param args//from  ww w  . j  a  v a2s  .  c  om
 * @throws IOException 
 */
public static void main(String[] args) throws IOException {
    if (args.length != 3) {
        usage();
        return;
    }

    final String previousRepo = args[0];
    final String newerRepo = args[1];
    final String mergedRepo = args[2];

    final File previous = new File(previousRepo);
    final File newer = new File(newerRepo);
    final File merged = new File(mergedRepo);

    if (!(previous.exists() && previous.isDirectory())) {
        System.out.println("The previous version does not exists or is not a directory.");
        return;
    }

    if (!(newer.exists() && newer.isDirectory())) {
        System.out.println("The newer version does not exists or is not a directory.");
        return;
    }

    final IOFileFilter directoryFilter = FileFilterUtils.makeCVSAware(FileFilterUtils.makeSVNAware(null));

    final Collection<File> olderFiles = FileUtils.listFiles(previous, TrueFileFilter.TRUE, directoryFilter);
    final Collection<File> newerFiles = FileUtils.listFiles(newer, TrueFileFilter.TRUE, directoryFilter);

    // build a list of unique paths
    System.out.println("Reading files from older version...");
    List<String> olderPaths = new ArrayList<String>();
    for (File oldFile : olderFiles) {
        olderPaths.add(getRelativePath(oldFile, previous));
    }

    System.out.println("Reading files from newer version...");
    List<String> newerPaths = new ArrayList<String>();
    for (File newerFile : newerFiles) {
        newerPaths.add(getRelativePath(newerFile, newer));
    }

    // find which files have been removed from Perforce depot
    List<String> filesRemoved = new ArrayList<String>(olderPaths);
    filesRemoved.removeAll(newerPaths);
    System.out.println("Files removed in newer version: " + filesRemoved.size());
    for (String removed : filesRemoved) {
        System.out.print("    ");
        System.out.println(removed);
    }

    // find which files have been added in Perforce depot
    List<String> filesAdded = new ArrayList<String>(newerPaths);
    filesAdded.removeAll(olderPaths);
    System.out.println("Files added in newer version: " + filesAdded.size());
    for (String added : filesAdded) {
        System.out.print("    ");
        System.out.println(added);
    }

    // find which files are common 
    // now check if they have modified or not
    newerPaths.retainAll(olderPaths);
    List<String> modified = checkModifiedFiles(newerPaths, previous, newer);
    System.out.println("Files modified in newer version: " + modified.size());
    for (String modify : modified) {
        System.out.print("    ");
        System.out.println(modify);
    }

    // clean any previous existence of merged repo
    System.out.println("Cleaning any previous merged repositories...");
    if (merged.exists() && merged.isDirectory()) {
        FileUtils.deleteDirectory(merged);
    }

    System.out.println("Merging from newer to older repository...");
    // copy the original SVN repo to merged
    FileUtils.copyDirectory(previous, merged);

    // now remove all files that need to be
    for (String removed : filesRemoved) {
        File toRemove = new File(merged, removed);
        toRemove.delete();
    }

    // now add all files that are new in perforce
    for (String added : filesAdded) {
        File toAdd = new File(newer, added);
        File destination = new File(merged, added);
        FileUtils.copyFile(toAdd, destination);
    }

    // now over-write modified files
    for (String changed : modified) {
        File change = new File(newer, changed);
        File destination = new File(merged, changed);
        destination.delete();
        FileUtils.copyFile(change, destination);
    }

    System.out.println("Done merging.");
}

From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step10RemoveEmptyDocuments.java

public static void main(String[] args) throws IOException {
    // input dir - list of xml query containers
    File inputDir = new File(args[0]);

    // output dir
    File outputDir = new File(args[1]);
    if (!outputDir.exists()) {
        outputDir.mkdirs();//from w  ww  .j  a  va2  s.  c  o m
    }

    boolean crop = args.length >= 3 && "crop".equals(args[2]);

    // first find the maximum of zero-sized documents
    int maxMissing = 7;

    /*
    // iterate over query containers
    for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) {
    QueryResultContainer queryResultContainer = QueryResultContainer
            .fromXML(FileUtils.readFileToString(f, "utf-8"));
            
    // first find the maximum of zero-sized documents in a query
    int missingInQuery = 0;
            
    for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) {
        // boilerplate removal
        if (rankedResults.plainText == null || rankedResults.plainText.isEmpty()) {
            missingInQuery++;
        }
    }
            
    maxMissing = Math.max(missingInQuery, maxMissing);
    }
    */

    System.out.println("Max zeroLengthDocuments in query: " + maxMissing);
    // max is 7 = we're cut-off at 93

    // iterate over query containers
    for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) {
        QueryResultContainer queryResultContainer = QueryResultContainer
                .fromXML(FileUtils.readFileToString(f, "utf-8"));

        List<QueryResultContainer.SingleRankedResult> nonEmptyDocsList = new ArrayList<>();

        for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) {
            // collect non-empty documents
            if (rankedResults.plainText != null && !rankedResults.plainText.isEmpty()) {
                nonEmptyDocsList.add(rankedResults);
            }
        }

        System.out.println("Non-empty docs coune: " + nonEmptyDocsList.size());

        if (crop) {
            // now cut at 93
            nonEmptyDocsList = nonEmptyDocsList.subList(0, (100 - maxMissing));
            System.out.println("After cropping: " + nonEmptyDocsList.size());
        }
        System.out.println("After cleaning: " + nonEmptyDocsList.size());

        queryResultContainer.rankedResults.clear();
        queryResultContainer.rankedResults.addAll(nonEmptyDocsList);

        // and save the query to output dir
        File outputFile = new File(outputDir, queryResultContainer.qID + ".xml");
        FileUtils.writeStringToFile(outputFile, queryResultContainer.toXML(), "utf-8");
        System.out.println("Finished " + outputFile);
    }

}

From source file:PlyBounder.java

public static void main(String[] args) {

    // Get the commandline arguments
    Options options = new Options();
    // Available options
    Option plyPath = OptionBuilder.withArgName("dir").hasArg()
            .withDescription("directory containing input .ply files").create("plyPath");
    Option boundingbox = OptionBuilder.withArgName("string").hasArg()
            .withDescription("bounding box in WKT notation").create("boundingbox");
    Option outputPlyFile = OptionBuilder.withArgName("file").hasArg().withDescription("output PLY file name")
            .create("outputPlyFile");
    options.addOption(plyPath);/*from  w  w  w.j av a 2 s  .  co m*/
    options.addOption(boundingbox);
    options.addOption(outputPlyFile);

    String plydir = ".";
    String boundingboxstr = "";
    String outputfilename = "";

    CommandLineParser parser = new DefaultParser();
    try {
        // parse the command line arguments
        CommandLine line = parser.parse(options, args);

        boundingboxstr = line.getOptionValue("boundingbox");
        outputfilename = line.getOptionValue("outputPlyFile");

        if (line.hasOption("plyPath")) {
            // print the value of block-size
            plydir = line.getOptionValue("plyPath");
            System.out.println("Using plyPath=" + plydir);
        } else {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("PlyBounder", options);
        }
        //System.out.println( "plyPath=" + line.getOptionValue( "plyPath" ) );
    } catch (ParseException exp) {
        System.err.println("Error getting arguments: " + exp.getMessage());
    }

    // input directory
    // Get list of files
    File dir = new File(plydir);

    //System.out.println("Getting all files in " + dir.getCanonicalPath());
    List<File> files = (List<File>) FileUtils.listFiles(dir, new String[] { "ply", "PLY" }, false);
    for (File file : files) {
        try {
            System.out.println("file=" + file.getCanonicalPath());
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    String sometempfile = "magweg.wkt";
    String s = null;

    // Loop through .ply files in directory
    for (File file : files) {
        try {
            String cmdl[] = { "./ply-tool.py", "intersection", file.getCanonicalPath(), boundingboxstr,
                    sometempfile };
            //System.out.println("Running: " + Arrays.toString(cmdl));
            Process p = Runtime.getRuntime().exec(cmdl);

            BufferedReader stdInput = new BufferedReader(new InputStreamReader(p.getInputStream()));

            BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream()));

            // read the output from the command
            System.out.println("cmdout:\n");
            while ((s = stdInput.readLine()) != null) {
                System.out.println(s);
            }

            // read any errors from the attempted command
            System.out.println("cmderr:\n");
            while ((s = stdError.readLine()) != null) {
                System.out.println(s);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    // Write new .ply file
    //ply-tool write setfile outputPlyFile
    try {
        String cmdl = "./ply-tool.py write " + sometempfile + " " + outputfilename;
        System.out.println("Running: " + cmdl);
        Process p = Runtime.getRuntime().exec(cmdl);

        BufferedReader stdInput = new BufferedReader(new InputStreamReader(p.getInputStream()));

        BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream()));

        // read the output from the command
        System.out.println("cmdout:\n");
        while ((s = stdInput.readLine()) != null) {
            System.out.println(s);
        }

        // read any errors from the attempted command
        System.out.println("cmderr:\n");
        while ((s = stdError.readLine()) != null) {
            System.out.println(s);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }

    // Done
    System.out.println("Done");
}

From source file:de.jwi.zip.Zipper.java

public static void main(String[] args) throws Exception {
    ZipOutputStream z = new ZipOutputStream(new FileOutputStream("d:/temp/myfirst.zip"));

    IOFileFilter filter = new IOFileFilter() {

        public boolean accept(java.io.File file) {
            return true;
        }//from  w w  w  .  ja  v a  2s  .  c om

        public boolean accept(java.io.File dir, java.lang.String name) {
            return true;
        }
    };

    Collection c = FileUtils.listFiles(new File("/java/javadocs/j2sdk-1.4.1/docs/tooldocs"), filter, filter);

    new Zipper().zip(z, c, new File("/java/javadocs/j2sdk-1.4.1"));

    z.close();

}

From source file:com.ontotext.s4.SBTDemo.Main.java

public static void main(String[] args) {
    /*/*from  ww w  . jav  a  2  s  .  c o m*/
     * Read log4j properties file.
     */
    org.apache.log4j.PropertyConfigurator.configure(args.length >= 1 ? args[1] : DEFAULT_LOG4J_FILE);

    /*
     * Read properties file  List all annotated files. We should
     * use absolute path to the files
     */
    init(args);

    ProcessingDocuments processingDocuments = new ProcessingDocuments(
            programProperties.getProperty(PropertiesNames.S4_API_KEY),
            programProperties.getProperty(PropertiesNames.S4_API_PASS),
            programProperties.getProperty(PropertiesNames.RAW_FOLDER),
            programProperties.getProperty(PropertiesNames.ANNOTATED_FOLDER),
            programProperties.getProperty(PropertiesNames.SERVICE),
            programProperties.getProperty(PropertiesNames.MIME_TYPE),
            programProperties.getProperty(PropertiesNames.RESPONSE_FORMAT),
            Integer.parseInt(programProperties.getProperty(PropertiesNames.NUMBER_OF_THREADS)));

    processingDocuments.ProcessData();

    File directory = new File(programProperties.getProperty(PropertiesNames.ANNOTATED_FOLDER));
    listOfAllAnnotatedFiles = FileUtils.listFiles(directory, new RegexFileFilter("^(.*?)"),
            DirectoryFileFilter.DIRECTORY);

    RepoManager repoManager = new RepoManager(programProperties.getProperty(PropertiesNames.REPOSITORY_URL));
    JsonToRDF jsonToRdfParser = new JsonToRDF(programProperties.getProperty(PropertiesNames.MIME_TYPE),
            programProperties.getProperty(PropertiesNames.RDFIZE_FOLDER));

    for (File file : listOfAllAnnotatedFiles) {
        String fileContent = null;
        try {
            fileContent = FileUtils.readFileToString(file, "UTF-8");
        } catch (IOException e) {
            logger.error(e);
        }

        Model graph = jsonToRdfParser.wirteDataToRDF(fileContent, file.getName(),
                programProperties.getProperty(PropertiesNames.RDFIZE_FOLDER));
        try {
            repoManager.sendDataTOGraphDB(graph);
        } catch (RepositoryException e) {
            logger.error(e);
        }

    }

    repoManager.close();
}

From source file:de.micromata.genome.gwiki.tools.PatchJavaHeader.java

public static void main(String[] args) {
    String baseDir = args[0];//from  w ww  .  ja v a 2s. c  o m
    Collection<File> col = FileUtils.listFiles(new File(baseDir), new String[] { "java" }, true);
    for (File f : col) {
        System.out.println("file: " + f.getAbsolutePath());
        patchFile(f);
    }

}

From source file:io.apiman.tools.i18n.TemplateScanner.java

public static void main(String[] args) throws IOException {
    if (args == null || args.length != 1) {
        System.out.println("Template directory not provided (no path provided).");
        System.exit(1);//from   w  w w  .j a  v a  2s.co  m
    }
    File templateDir = new File(args[0]);
    if (!templateDir.isDirectory()) {
        System.out.println("Template directory not provided (provided path is not a directory).");
        System.exit(1);
    }

    if (!new File(templateDir, "dash.html").isFile()) {
        System.out.println("Template directory not provided (dash.html not found).");
        System.exit(1);
    }

    File outputDir = new File(templateDir, "../../../../../../tools/i18n/target");
    if (!outputDir.isDirectory()) {
        System.out.println("Output directory not found: " + outputDir);
        System.exit(1);
    }
    File outputFile = new File(outputDir, "scanner-messages.properties");
    if (outputFile.isFile() && !outputFile.delete()) {
        System.out.println("Couldn't delete the old messages.properties: " + outputFile);
        System.exit(1);
    }

    System.out.println("Starting scan.");
    System.out.println("Scanning template directory: " + templateDir.getAbsolutePath());

    String[] extensions = { "html", "include" };
    Collection<File> files = FileUtils.listFiles(templateDir, extensions, true);

    TreeMap<String, String> strings = new TreeMap<>();

    for (File file : files) {
        System.out.println("\tScanning file: " + file);
        scanFile(file, strings);
    }

    outputMessages(strings, outputFile);

    System.out.println("Scan complete.  Scanned " + files.size() + " files and discovered " + strings.size()
            + " translation strings.");
}

From source file:com.acapulcoapp.alloggiatiweb.FileReader.java

public static void main(String[] args) throws UnknownHostException, IOException {
    // TODO code application logic here

    SpringApplication app = new SpringApplication(AcapulcoappApp.class);
    SimpleCommandLinePropertySource source = new SimpleCommandLinePropertySource(args);
    addDefaultProfile(app, source);//from  ww  w  .j  av  a 2s  .  co m

    ConfigurableApplicationContext context = app.run(args);

    initBeans(context);

    Map<LocalDate, List<List<String>>> map = new TreeMap<>();

    List<File> files = new ArrayList<>(FileUtils.listFiles(new File("/Users/chiccomask/Downloads/ALLOGGIATI"),
            new String[] { "txt" }, true));

    Collections.reverse(files);

    int count = 0;

    for (File file : files) {

        //            List<String> allLines = FileUtils.readLines(file, "windows-1252");
        List<String> allLines = FileUtils.readLines(file, "UTF-8");

        for (int i = 0; i < allLines.size();) {

            count++;

            List<String> record = new ArrayList<>();

            String line = allLines.get(i);
            String type = TIPO_ALLOGGIO.parse(line);

            switch (type) {
            case "16":
                record.add(line);
                i++;
                break;
            case "17": {
                record.add(line);
                boolean out = false;
                while (!out) {
                    i++;
                    if (i < allLines.size()) {
                        String subline = allLines.get(i);
                        String subtype = TIPO_ALLOGGIO.parse(subline);
                        if (!subtype.equals("19")) {
                            out = true;
                        } else {
                            record.add(subline);
                        }
                    } else {
                        out = true;
                    }
                }
                break;
            }
            case "18": {
                record.add(line);
                boolean out = false;
                while (!out) {
                    i++;
                    if (i < allLines.size()) {
                        String subline = allLines.get(i);
                        String subtype = TIPO_ALLOGGIO.parse(subline);
                        if (!subtype.equals("20")) {
                            out = true;
                        } else {
                            record.add(subline);
                        }
                    } else {
                        out = true;
                    }
                }
                break;
            }
            default:
                break;
            }

            LocalDate arrived = LocalDate.parse(DATA_ARRIVO.parse(line),
                    DateTimeFormatter.ofPattern(DATE_PATTERN));
            if (!map.containsKey(arrived)) {
                map.put(arrived, new ArrayList<>());
            }
            map.get(arrived).add(record);
        }
    }

    for (LocalDate date : map.keySet()) {

        System.out.println();
        System.out.println("process day " + date);

        for (List<String> record : map.get(date)) {

            System.out.println();
            System.out.println("process record ");
            for (String line : record) {
                System.out.println(line);
            }

            CheckinRecord checkinRecord = new CheckinRecord();

            //non lo setto per adesso
            String firstLine = record.get(0);

            String typeStr = TIPO_ALLOGGIO.parse(firstLine);
            CheckinType cht = checkinTypeRepository.find(typeStr);
            checkinRecord.setCheckinType(cht);

            int days = Integer.parseInt(PERMANENZA.parse(firstLine));
            checkinRecord.setDays(days);
            checkinRecord.setArrived(date);

            boolean isMain = true;

            List<Person> others = new ArrayList<>();

            for (String line : record) {
                Person p = extractPerson(line);

                if (p.getDistrictOfBirth() == null) {
                    System.out.println("district of birth not found " + p);
                }

                List<Person> duplicates = personRepository.findDuplicates(p.getSurname(), p.getName(),
                        p.getDateOfBirth());

                if (duplicates.isEmpty()) {
                    System.out.println("add new person " + p.getId() + " " + p);
                    personRepository.saveAndFlush(p);
                } else if (duplicates.size() == 1) {

                    Person found = duplicates.get(0);

                    if (p.getIdentityDocument() != null) {
                        //we sorted by date so we suppose 
                        //the file version is newer so we update the entity
                        p.setId(found.getId());
                        System.out.println("update person " + p.getId() + " " + p);
                        personRepository.saveAndFlush(p);

                    } else if (found.getIdentityDocument() != null) {
                        //on db there are more data so I use them.
                        p = found;
                        System.out.println("use already saved person " + p.getId() + " " + p);
                    } else {
                        p.setId(found.getId());
                        System.out.println("update person " + p.getId() + " " + p);
                        personRepository.saveAndFlush(p);
                    }

                } else {
                    throw new RuntimeException("More duplicated for " + p.getName());
                }

                if (isMain) {
                    checkinRecord.setMainPerson(p);
                    isMain = false;
                } else {
                    others.add(p);
                }
            }

            checkinRecord.setOtherPeople(new HashSet<>(others));

            if (checkinRecordRepository.alreadyExists(checkinRecord.getMainPerson(), date) != null) {
                System.out.println("already exists " + date + " p " + checkinRecord.getMainPerson());
            } else {
                System.out.println("save record ");
                checkinRecordRepository.saveAndFlush(checkinRecord);
            }
        }
    }

    //
    //            if (type.equals("16")) {
    //                List<String> record = new ArrayList<>();
    //                record.add(line);
    //                keepOpen = false;
    //            }
    //
    //            map.get(arrived).add(record);
    //        map.values().forEach((list) -> {
    //
    //            for (String line : list) {
    //
    //                Person p = null;
    //
    //                try {
    //
    //                    p = extractPerson(line);
    //
    //                    List<Person> duplicates = personRepository.findDuplicates(p.getSurname(), p.getName(), p.getDateOfBirth());
    //
    //                    if (duplicates.isEmpty()) {
    //                        personRepository.saveAndFlush(p);
    //
    //                    } else if (duplicates.size() > 1) {
    //                        System.out.println();
    //                        System.out.println("MULIPLE DUPLICATED");
    //
    //                        for (Person dd : duplicates) {
    //                            System.out.println(dd);
    //                        }
    //                        System.out.println("* " + p);
    //                        throw new RuntimeException();
    //                    } else {
    //
    ////                        if (!duplicates.get(0).getDistrictOfBirth().equals(p.getDistrictOfBirth())) {
    ////                        int index = 0;
    ////
    ////                        System.out.println();
    ////                        System.out.println("DUPLICATED");
    ////
    ////                        for (Person dd : duplicates) {
    ////                            System.out.println(dd);
    ////                            index++;
    ////                        }
    ////                        System.out.println("* " + p);
    ////                        System.out.println(file.getAbsolutePath() + " " + p);
    ////
    ////                        System.out.println();
    ////                        System.out.println();
    ////                        }
    ////                        duplicates.remove(0);
    ////                        personRepository.deleteInBatch(duplicates);
    ////                System.out.println();
    ////                System.out.println("Seleziona scelta");
    ////                Scanner s = new Scanner(System.in);
    ////                int selected;
    ////                try {
    ////                    selected = s.nextInt();
    ////                } catch (InputMismatchException e) {
    ////                    selected = 0;
    ////                }
    ////
    ////                if (duplicates.size() <= selected) {
    ////                    personRepository.deleteInBatch(duplicates);
    ////                    personRepository.saveAndFlush(p);
    ////                } else {
    ////                    duplicates.remove(selected);
    ////                    personRepository.deleteInBatch(duplicates);
    ////                }
    //                    }
    //
    //                } catch (Exception e) {
    //
    //                    System.out.println();
    ////                    System.out.println("ERROR READING lineCount=" + allLines.indexOf(line) + " line=" + line);
    ////                    System.out.println(file.getAbsolutePath());
    //                    System.out.println(p);
    //                    e.printStackTrace();
    //                    System.out.println();
    //                }
    //            }
    //        });
    context.registerShutdownHook();

    System.exit(0);
}

From source file:com.l2jserver.model.template.SkillTemplateConverter.java

public static void main(String[] args) throws SQLException, IOException, ClassNotFoundException, JAXBException {
    Class.forName("com.mysql.jdbc.Driver");

    final File target = new File("data/templates");
    final JAXBContext c = JAXBContext.newInstance(SkillTemplate.class, LegacySkillList.class);
    final Connection conn = DriverManager.getConnection(JDBC_URL, JDBC_USERNAME, JDBC_PASSWORD);

    System.out.println("Generating template XML files...");
    c.generateSchema(new SchemaOutputResolver() {
        @Override/*w  w  w  .ja  v  a  2 s .  co  m*/
        public Result createOutput(String namespaceUri, String suggestedFileName) throws IOException {
            return new StreamResult(new File(target, suggestedFileName));
        }
    });

    try {
        final Unmarshaller u = c.createUnmarshaller();
        final Marshaller m = c.createMarshaller();
        m.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
        m.setProperty(Marshaller.JAXB_NO_NAMESPACE_SCHEMA_LOCATION, "skill");
        m.setProperty(Marshaller.JAXB_SCHEMA_LOCATION, "skill ../skill.xsd");

        Collection<File> files = FileUtils.listFiles(new File(LEGACY_SKILL_FOLDER), new String[] { "xml" },
                true);
        for (final File legacyFile : files) {
            LegacySkillList list = (LegacySkillList) u.unmarshal(legacyFile);
            for (final LegacySkill legacySkill : list.skills) {
                SkillTemplate t = fillSkill(legacySkill);
                final File file = new File(target, "skill/" + t.id.getID()
                        + (t.getName() != null ? "-" + camelCase(t.getName()) : "") + ".xml");
                templates.add(t);

                try {
                    m.marshal(t, getXMLSerializer(new FileOutputStream(file)));
                } catch (MarshalException e) {
                    System.err.println(
                            "Could not generate XML template file for " + t.getName() + " - " + t.getID());
                    file.delete();
                }
            }
        }

        System.out.println("Generated " + templates.size() + " templates");

        System.gc();
        System.out.println("Free: " + FileUtils.byteCountToDisplaySize(Runtime.getRuntime().freeMemory()));
        System.out.println("Total: " + FileUtils.byteCountToDisplaySize(Runtime.getRuntime().totalMemory()));
        System.out.println("Used: " + FileUtils.byteCountToDisplaySize(
                Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()));
        System.out.println("Max: " + FileUtils.byteCountToDisplaySize(Runtime.getRuntime().maxMemory()));
    } finally {
        conn.close();
    }
}

From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step6HITPreparator.java

public static void main(String[] args) throws Exception {
    // input dir - list of xml query containers
    // step5-linguistic-annotation/
    System.err.println("Starting step 6 HIT Preparation");

    File inputDir = new File(args[0]);

    // output dir
    File outputDir = new File(args[1]);
    if (outputDir.exists()) {
        outputDir.delete();/*from   www.  j av a 2s.  c  o m*/
    }
    outputDir.mkdir();

    List<String> queries = new ArrayList<>();

    // iterate over query containers
    int countClueWeb = 0;
    int countSentence = 0;
    for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) {
        QueryResultContainer queryResultContainer = QueryResultContainer
                .fromXML(FileUtils.readFileToString(f, "utf-8"));
        if (queries.contains(f.getName()) || queries.size() == 0) {
            // groups contain only non-empty documents
            Map<Integer, List<QueryResultContainer.SingleRankedResult>> groups = new HashMap<>();

            // split to groups according to number of sentences
            for (QueryResultContainer.SingleRankedResult rankedResult : queryResultContainer.rankedResults) {
                if (rankedResult.originalXmi != null) {
                    byte[] bytes = new BASE64Decoder()
                            .decodeBuffer(new ByteArrayInputStream(rankedResult.originalXmi.getBytes()));
                    JCas jCas = JCasFactory.createJCas();
                    XmiCasDeserializer.deserialize(new ByteArrayInputStream(bytes), jCas.getCas());

                    Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class);

                    int groupId = sentences.size() / 40;
                    if (rankedResult.originalXmi == null) {
                        System.err.println("Empty document: " + rankedResult.clueWebID);
                    } else {
                        if (!groups.containsKey(groupId)) {
                            groups.put(groupId, new ArrayList<>());

                        }
                    }
                    //handle it
                    groups.get(groupId).add(rankedResult);
                    countClueWeb++;
                }
            }

            for (Map.Entry<Integer, List<QueryResultContainer.SingleRankedResult>> entry : groups.entrySet()) {
                Integer groupId = entry.getKey();
                List<QueryResultContainer.SingleRankedResult> rankedResults = entry.getValue();

                // make sure the results are sorted
                // DEBUG
                //                for (QueryResultContainer.SingleRankedResult r : rankedResults) {
                //                    System.out.print(r.rank + "\t");
                //                }

                Collections.sort(rankedResults, (o1, o2) -> o1.rank.compareTo(o2.rank));

                // iterate over results for one query and group
                for (int i = 0; i < rankedResults.size() && i < TOP_RESULTS_PER_GROUP; i++) {
                    QueryResultContainer.SingleRankedResult rankedResult = rankedResults.get(i);

                    QueryResultContainer.SingleRankedResult r = rankedResults.get(i);
                    int rank = r.rank;
                    MustacheFactory mf = new DefaultMustacheFactory();
                    Mustache mustache = mf.compile("template/template.html");
                    String queryId = queryResultContainer.qID;
                    String query = queryResultContainer.query;
                    // make the first letter uppercase
                    query = query.substring(0, 1).toUpperCase() + query.substring(1);

                    List<String> relevantInformationExamples = queryResultContainer.relevantInformationExamples;
                    List<String> irrelevantInformationExamples = queryResultContainer.irrelevantInformationExamples;
                    byte[] bytes = new BASE64Decoder()
                            .decodeBuffer(new ByteArrayInputStream(rankedResult.originalXmi.getBytes()));

                    JCas jCas = JCasFactory.createJCas();
                    XmiCasDeserializer.deserialize(new ByteArrayInputStream(bytes), jCas.getCas());

                    List<generators.Sentence> sentences = new ArrayList<>();
                    List<Integer> paragraphs = new ArrayList<>();
                    paragraphs.add(0);

                    for (WebParagraph webParagraph : JCasUtil.select(jCas, WebParagraph.class)) {
                        for (Sentence s : JCasUtil.selectCovered(Sentence.class, webParagraph)) {

                            String sentenceBegin = String.valueOf(s.getBegin());
                            generators.Sentence sentence = new generators.Sentence(s.getCoveredText(),
                                    sentenceBegin);
                            sentences.add(sentence);
                            countSentence++;
                        }
                        int SentenceID = paragraphs.get(paragraphs.size() - 1);
                        if (sentences.size() > 120)
                            while (SentenceID < sentences.size()) {
                                if (!paragraphs.contains(SentenceID))
                                    paragraphs.add(SentenceID);
                                SentenceID = SentenceID + 120;
                            }
                        paragraphs.add(sentences.size());

                    }
                    System.err.println("Output dir: " + outputDir);
                    int startID = 0;
                    int endID;

                    for (int j = 0; j < paragraphs.size(); j++) {

                        endID = paragraphs.get(j);
                        int sentLength = endID - startID;
                        if (sentLength > 120 || j == paragraphs.size() - 1) {
                            if (sentLength > 120) {

                                endID = paragraphs.get(j - 1);
                                j--;
                            }
                            sentLength = endID - startID;
                            if (sentLength <= 40)
                                groupId = 40;
                            else if (sentLength <= 80 && sentLength > 40)
                                groupId = 80;
                            else if (sentLength > 80)
                                groupId = 120;

                            File folder = new File(outputDir + "/" + groupId);
                            if (!folder.exists()) {
                                System.err.println("creating directory: " + outputDir + "/" + groupId);
                                boolean result = false;

                                try {
                                    folder.mkdir();
                                    result = true;
                                } catch (SecurityException se) {
                                    //handle it
                                }
                                if (result) {
                                    System.out.println("DIR created");
                                }
                            }

                            String newHtmlFile = folder.getAbsolutePath() + "/" + f.getName() + "_"
                                    + rankedResult.clueWebID + "_" + sentLength + ".html";
                            System.err.println("Printing a file: " + newHtmlFile);
                            File newHTML = new File(newHtmlFile);
                            int t = 0;
                            while (newHTML.exists()) {
                                newHTML = new File(folder.getAbsolutePath() + "/" + f.getName() + "_"
                                        + rankedResult.clueWebID + "_" + sentLength + "." + t + ".html");
                                t++;
                            }
                            mustache.execute(new PrintWriter(new FileWriter(newHTML)),
                                    new generators(query, relevantInformationExamples,
                                            irrelevantInformationExamples, sentences.subList(startID, endID),
                                            queryId, rank))
                                    .flush();
                            startID = endID;
                        }
                    }
                }
            }

        }
    }
    System.out.println("Printed " + countClueWeb + " documents with " + countSentence + " sentences");
}