Example usage for java.io OutputStreamWriter OutputStreamWriter

List of usage examples for java.io OutputStreamWriter OutputStreamWriter

Introduction

In this page you can find the example usage for java.io OutputStreamWriter OutputStreamWriter.

Prototype

public OutputStreamWriter(OutputStream out, CharsetEncoder enc) 

Source Link

Document

Creates an OutputStreamWriter that uses the given charset encoder.

Usage

From source file:GIST.IzbirkomExtractor.IzbirkomExtractor.java

/**
 * @param args// w w  w .  j a  v  a  2 s  . c o  m
 */
public static void main(String[] args) {

    // process command-line options
    Options options = new Options();
    options.addOption("n", "noaddr", false, "do not do any address matching (for testing)");
    options.addOption("i", "info", false, "create and populate address information table");
    options.addOption("h", "help", false, "this message");

    // database connection
    options.addOption("s", "server", true, "database server to connect to");
    options.addOption("d", "database", true, "OSM database name");
    options.addOption("u", "user", true, "OSM database user name");
    options.addOption("p", "pass", true, "OSM database password");

    // logging options
    options.addOption("l", "logdir", true, "log file directory (default './logs')");
    options.addOption("e", "loglevel", true, "log level (default 'FINEST')");

    // automatically generate the help statement
    HelpFormatter help_formatter = new HelpFormatter();

    // database URI for connection
    String dburi = null;

    // Information message for help screen
    String info_msg = "IzbirkomExtractor [options] <html_directory>";

    try {
        CommandLineParser parser = new GnuParser();
        CommandLine cmd = parser.parse(options, args);

        if (cmd.hasOption('h') || cmd.getArgs().length != 1) {
            help_formatter.printHelp(info_msg, options);
            System.exit(1);
        }

        /* prohibit n and i together */
        if (cmd.hasOption('n') && cmd.hasOption('i')) {
            System.err.println("Options 'n' and 'i' cannot be used together.");
            System.exit(1);
        }

        /* require database arguments without -n */
        if (cmd.hasOption('n')
                && (cmd.hasOption('s') || cmd.hasOption('d') || cmd.hasOption('u') || cmd.hasOption('p'))) {
            System.err.println("Options 'n' and does not need any databse parameters.");
            System.exit(1);
        }

        /* require all 4 database options to be used together */
        if (!cmd.hasOption('n')
                && !(cmd.hasOption('s') && cmd.hasOption('d') && cmd.hasOption('u') && cmd.hasOption('p'))) {
            System.err.println(
                    "For database access all of the following arguments have to be specified: server, database, user, pass");
            System.exit(1);
        }

        /* useful variables */
        SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd'T'kk:mm");
        String dateString = formatter.format(new Date());

        /* setup logging */
        File logdir = new File(cmd.hasOption('l') ? cmd.getOptionValue('l') : "logs");
        FileUtils.forceMkdir(logdir);
        File log_file_name = new File(
                logdir + "/" + IzbirkomExtractor.class.getName() + "-" + formatter.format(new Date()) + ".log");
        FileHandler log_file = new FileHandler(log_file_name.getPath());

        /* create "latest" link to currently created log file */
        Path latest_log_link = Paths.get(logdir + "/latest");
        Files.deleteIfExists(latest_log_link);
        Files.createSymbolicLink(latest_log_link, Paths.get(log_file_name.getName()));

        log_file.setFormatter(new SimpleFormatter());
        LogManager.getLogManager().reset(); // prevents logging to console
        logger.addHandler(log_file);
        logger.setLevel(cmd.hasOption('e') ? Level.parse(cmd.getOptionValue('e')) : Level.FINEST);

        // open directory with HTML files and create file list
        File dir = new File(cmd.getArgs()[0]);
        if (!dir.isDirectory()) {
            System.err.println("Unable to find directory '" + cmd.getArgs()[0] + "', exiting");
            System.exit(1);
        }
        PathMatcher pmatcher = FileSystems.getDefault()
                .getPathMatcher("glob:?  * ?*.html");
        ArrayList<File> html_files = new ArrayList<>();
        for (Path file : Files.newDirectoryStream(dir.toPath()))
            if (pmatcher.matches(file.getFileName()))
                html_files.add(file.toFile());
        if (html_files.size() == 0) {
            System.err.println("No matching HTML files found in '" + dir.getAbsolutePath() + "', exiting");
            System.exit(1);
        }

        // create csvResultSink
        FileOutputStream csvout_file = new FileOutputStream("parsed_addresses-" + dateString + ".csv");
        OutputStreamWriter csvout = new OutputStreamWriter(csvout_file, "UTF-8");
        ResultSink csvResultSink = new CSVResultSink(csvout, new CSVStrategy('|', '"', '#'));

        // Connect to DB and osmAddressMatcher
        AddressMatcher osmAddressMatcher;
        DBSink dbSink = null;
        DBInfoSink dbInfoSink = null;
        if (cmd.hasOption('n')) {
            osmAddressMatcher = new DummyAddressMatcher();
        } else {
            dburi = "jdbc:postgresql://" + cmd.getOptionValue('s') + "/" + cmd.getOptionValue('d');
            Connection con = DriverManager.getConnection(dburi, cmd.getOptionValue('u'),
                    cmd.getOptionValue('p'));
            osmAddressMatcher = new OsmAddressMatcher(con);
            dbSink = new DBSink(con);
            if (cmd.hasOption('i'))
                dbInfoSink = new DBInfoSink(con);
        }

        /* create resultsinks */
        SinkMultiplexor sm = SinkMultiplexor.newSinkMultiplexor();
        sm.addResultSink(csvResultSink);
        if (dbSink != null) {
            sm.addResultSink(dbSink);
            if (dbInfoSink != null)
                sm.addResultSink(dbInfoSink);
        }

        // create tableExtractor
        TableExtractor te = new TableExtractor(osmAddressMatcher, sm);

        // TODO: printout summary of options: processing date/time, host, directory of HTML files, jdbc uri, command line with parameters

        // iterate through files
        logger.info("Start processing " + html_files.size() + " files in " + dir);
        for (int i = 0; i < html_files.size(); i++) {
            System.err.println("Parsing #" + i + ": " + html_files.get(i));
            te.processHTMLfile(html_files.get(i));
        }

        System.err.println("Processed " + html_files.size() + " HTML files");
        logger.info("Finished processing " + html_files.size() + " files in " + dir);

    } catch (ParseException e1) {
        System.err.println("Failed to parse CLI: " + e1.getMessage());
        help_formatter.printHelp(info_msg, options);
        System.exit(1);
    } catch (IOException e) {
        System.err.println("I/O Exception: " + e.getMessage());
        e.printStackTrace();
        System.exit(1);
    } catch (SQLException e) {
        System.err.println("Database '" + dburi + "': " + e.getMessage());
        System.exit(1);
    } catch (ResultSinkException e) {
        System.err.println("Failed to initialize ResultSink: " + e.getMessage());
        System.exit(1);
    } catch (TableExtractorException e) {
        System.err.println("Failed to initialize Table Extractor: " + e.getMessage());
        System.exit(1);
    } catch (CloneNotSupportedException | IllegalAccessException | InstantiationException e) {
        System.err.println("Something really odd happened: " + e.getMessage());
        e.printStackTrace();
        System.exit(1);
    }
}

From source file:com.joliciel.talismane.terminology.Main.java

public static void main(String[] args) throws Exception {
    String termFilePath = null;/*from  w  ww . ja  v a  2  s  . c o  m*/
    String outFilePath = null;
    Command command = Command.extract;
    int depth = -1;
    String databasePropertiesPath = null;
    String projectCode = null;

    Map<String, String> argMap = TalismaneConfig.convertArgs(args);

    String logConfigPath = argMap.get("logConfigFile");
    if (logConfigPath != null) {
        argMap.remove("logConfigFile");
        Properties props = new Properties();
        props.load(new FileInputStream(logConfigPath));
        PropertyConfigurator.configure(props);
    }

    Map<String, String> innerArgs = new HashMap<String, String>();
    for (Entry<String, String> argEntry : argMap.entrySet()) {
        String argName = argEntry.getKey();
        String argValue = argEntry.getValue();

        if (argName.equals("command"))
            command = Command.valueOf(argValue);
        else if (argName.equals("termFile"))
            termFilePath = argValue;
        else if (argName.equals("outFile"))
            outFilePath = argValue;
        else if (argName.equals("depth"))
            depth = Integer.parseInt(argValue);
        else if (argName.equals("databaseProperties"))
            databasePropertiesPath = argValue;
        else if (argName.equals("projectCode"))
            projectCode = argValue;
        else
            innerArgs.put(argName, argValue);
    }
    if (termFilePath == null && databasePropertiesPath == null)
        throw new TalismaneException("Required argument: termFile or databasePropertiesPath");

    if (termFilePath != null) {
        String currentDirPath = System.getProperty("user.dir");
        File termFileDir = new File(currentDirPath);
        if (termFilePath.lastIndexOf("/") >= 0) {
            String termFileDirPath = termFilePath.substring(0, termFilePath.lastIndexOf("/"));
            termFileDir = new File(termFileDirPath);
            termFileDir.mkdirs();
        }
    }

    long startTime = new Date().getTime();
    try {
        TerminologyServiceLocator terminologyServiceLocator = TerminologyServiceLocator.getInstance();
        TerminologyService terminologyService = terminologyServiceLocator.getTerminologyService();
        TerminologyBase terminologyBase = null;

        if (projectCode == null)
            throw new TalismaneException("Required argument: projectCode");

        File file = new File(databasePropertiesPath);
        FileInputStream fis = new FileInputStream(file);
        Properties dataSourceProperties = new Properties();
        dataSourceProperties.load(fis);
        terminologyBase = terminologyService.getPostGresTerminologyBase(projectCode, dataSourceProperties);

        if (command.equals(Command.analyse) || command.equals(Command.extract)) {
            if (depth < 0)
                throw new TalismaneException("Required argument: depth");

            if (command.equals(Command.analyse)) {
                innerArgs.put("command", "analyse");
            } else {
                innerArgs.put("command", "process");
            }

            TalismaneFrench talismaneFrench = new TalismaneFrench();
            TalismaneConfig config = new TalismaneConfig(innerArgs, talismaneFrench);

            PosTagSet tagSet = TalismaneSession.getPosTagSet();
            Charset outputCharset = config.getOutputCharset();

            TermExtractor termExtractor = terminologyService.getTermExtractor(terminologyBase);
            termExtractor.setMaxDepth(depth);
            termExtractor.setOutFilePath(termFilePath);
            termExtractor.getIncludeChildren().add(tagSet.getPosTag("P"));
            termExtractor.getIncludeChildren().add(tagSet.getPosTag("P+D"));
            termExtractor.getIncludeChildren().add(tagSet.getPosTag("CC"));

            termExtractor.getIncludeWithParent().add(tagSet.getPosTag("DET"));

            if (outFilePath != null) {
                if (outFilePath.lastIndexOf("/") >= 0) {
                    String outFileDirPath = outFilePath.substring(0, outFilePath.lastIndexOf("/"));
                    File outFileDir = new File(outFileDirPath);
                    outFileDir.mkdirs();
                }
                File outFile = new File(outFilePath);
                outFile.delete();
                outFile.createNewFile();

                Writer writer = new BufferedWriter(
                        new OutputStreamWriter(new FileOutputStream(outFilePath), outputCharset));
                TermAnalysisWriter termAnalysisWriter = new TermAnalysisWriter(writer);
                termExtractor.addTermObserver(termAnalysisWriter);
            }

            Talismane talismane = config.getTalismane();
            talismane.setParseConfigurationProcessor(termExtractor);
            talismane.process();
        } else if (command.equals(Command.list)) {

            List<Term> terms = terminologyBase.getTermsByFrequency(2);
            for (Term term : terms) {
                LOG.debug("Term: " + term.getText());
                LOG.debug("Frequency: " + term.getFrequency());
                LOG.debug("Heads: " + term.getHeads());
                LOG.debug("Expansions: " + term.getExpansions());
                LOG.debug("Contexts: " + term.getContexts());
            }
        }
    } finally {
        long endTime = new Date().getTime();
        long totalTime = endTime - startTime;
        LOG.info("Total time: " + totalTime);
    }
}

From source file:eu.fbk.utils.lsa.util.Anvur.java

public static void main(String[] args) throws Exception {
    String logConfig = System.getProperty("log-config");
    if (logConfig == null) {
        logConfig = "log-config.txt";
    }/* w  ww.  j av  a2s.  co  m*/

    PropertyConfigurator.configure(logConfig);
    /*
    if (args.length != 2)
    {
    log.println("Usage: java -mx512M eu.fbk.utils.lsa.util.Anvur in-file out-dir");
    System.exit(1);
    }
            
    File l = new File(args[1]);
    if (!l.exists())
    {
    l.mkdir();
    }
    List<String[]> list = readText(new File(args[0]));
    String oldCategory = "";
    for (int i=0;i<list.size();i++)
    {
    String[] s = list.get(i);
    if (!oldCategory.equals(s[0]))
    {
    File f = new File(args[1] + File.separator + s[0]);
    boolean b = f.mkdir();
    logger.debug(f + " created " + b);
    }
            
    File g = new File(args[1] + File.separator + s[0] + File.separator + s[1] + ".txt");
    logger.debug("writing " + g + "...");
    PrintWriter pw = new PrintWriter(new FileWriter(g));
    //pw.println(tokenize(s[1].substring(0, s[1].indexOf(".")).replace('_', ' ') + " " + s[2]));
    if (s.length == 5)
    {
    pw.println(tokenize(s[1].substring(0, s[1].indexOf(".")).replace('_', ' ') + " " + s[2] + " " + s[4].replace('_', ' ')));
    }
    else
    {
    pw.println(tokenize(s[1].substring(0, s[1].indexOf(".")).replace('_', ' ') + " " + s[2]));
    }
    pw.flush();
    pw.close();
            
    } // end for i
    */

    if (args.length != 7) {
        System.out.println(args.length);
        System.out.println(
                "Usage: java -mx2G eu.fbk.utils.lsa.util.Anvur input threshold size dim idf in-file-csv fields\n\n");
        System.exit(1);
    }

    //
    DecimalFormat dec = new DecimalFormat("#.00");

    File Ut = new File(args[0] + "-Ut");
    File Sk = new File(args[0] + "-S");
    File r = new File(args[0] + "-row");
    File c = new File(args[0] + "-col");
    File df = new File(args[0] + "-df");
    double threshold = Double.parseDouble(args[1]);
    int size = Integer.parseInt(args[2]);
    int dim = Integer.parseInt(args[3]);
    boolean rescaleIdf = Boolean.parseBoolean(args[4]);

    //"author_check"0,   "authors"1,   "title"2,   "year"3,   "pubtype"4,   "publisher"5,   "journal"6,   "volume"7,   "number"8,   "pages"9,   "abstract"10,   "nauthors",   "citedby"
    String[] labels = { "author_check", "authors", "title", "year", "pubtype", "publisher", "journal", "volume",
            "number", "pages", "abstract", "nauthors", "citedby"
            //author_id   authors   title   year   pubtype   publisher   journal   volume   number   pages   abstract   nauthors   citedby

    };
    String name = buildName(labels, args[6]);

    File bwf = new File(args[5] + name + "-bow.txt");
    PrintWriter bw = new PrintWriter(
            new BufferedWriter(new OutputStreamWriter(new FileOutputStream(bwf), "UTF-8")));
    File bdf = new File(args[5] + name + "-bow.csv");
    PrintWriter bd = new PrintWriter(
            new BufferedWriter(new OutputStreamWriter(new FileOutputStream(bdf), "UTF-8")));
    File lwf = new File(args[5] + name + "-ls.txt");
    PrintWriter lw = new PrintWriter(
            new BufferedWriter(new OutputStreamWriter(new FileOutputStream(lwf), "UTF-8")));
    File ldf = new File(args[5] + name + "-ls.csv");
    PrintWriter ld = new PrintWriter(
            new BufferedWriter(new OutputStreamWriter(new FileOutputStream(ldf), "UTF-8")));
    File blwf = new File(args[5] + name + "-bow+ls.txt");
    PrintWriter blw = new PrintWriter(
            new BufferedWriter(new OutputStreamWriter(new FileOutputStream(blwf), "UTF-8")));
    File bldf = new File(args[5] + name + "-bow+ls.csv");
    PrintWriter bld = new PrintWriter(
            new BufferedWriter(new OutputStreamWriter(new FileOutputStream(bldf), "UTF-8")));
    File logf = new File(args[5] + name + ".log");
    PrintWriter log = new PrintWriter(
            new BufferedWriter(new OutputStreamWriter(new FileOutputStream(logf), "UTF-8")));

    //System.exit(0);
    LSM lsm = new LSM(Ut, Sk, r, c, df, dim, rescaleIdf);
    LSSimilarity lss = new LSSimilarity(lsm, size);

    List<String[]> list = readText(new File(args[5]));

    // author_check   authors   title   year   pubtype   publisher   journal   volume   number   pages   abstract   nauthors   citedby

    //header
    for (int i = 0; i < list.size(); i++) {
        String[] s1 = list.get(i);
        String t1 = s1[0].toLowerCase();
        bw.print("\t");
        lw.print("\t");
        blw.print("\t");
        bw.print(i + "(" + s1[0] + ")");
        lw.print(i + "(" + s1[0] + ")");
        blw.print(i + "(" + s1[0] + ")");
    } // end for i

    bw.print("\n");
    lw.print("\n");
    blw.print("\n");
    for (int i = 0; i < list.size(); i++) {
        logger.info(i + "\t");
        String[] s1 = list.get(i);
        String t1 = buildText(s1, args[6]);
        BOW bow1 = new BOW(t1);
        logger.info(bow1);

        Vector d1 = lsm.mapDocument(bow1);
        d1.normalize();
        log.println("d1:" + d1);

        Vector pd1 = lsm.mapPseudoDocument(d1);
        pd1.normalize();
        log.println("pd1:" + pd1);

        Vector m1 = merge(pd1, d1);
        log.println("m1:" + m1);

        // write the orginal line
        for (int j = 0; j < s1.length; j++) {
            bd.print(s1[j]);
            bd.print("\t");
            ld.print(s1[j]);
            ld.print("\t");
            bld.print(s1[j]);
            bld.print("\t");

        }
        // write the bow, ls, and bow+ls vectors
        bd.println(d1);
        ld.println(pd1);
        bld.println(m1);

        bw.print(i + "(" + s1[0] + ")");
        lw.print(i + "(" + s1[0] + ")");
        blw.print(i + "(" + s1[0] + ")");
        for (int j = 0; j < i + 1; j++) {
            bw.print("\t");
            lw.print("\t");
            blw.print("\t");
        } // end for j

        for (int j = i + 1; j < list.size(); j++) {
            logger.info(i + "\t" + j);
            String[] s2 = list.get(j);

            String t2 = buildText(s2, args[6]);
            BOW bow2 = new BOW(t2);

            log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") t1:" + t1);
            log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") t2:" + t2);
            log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow1:" + bow1);
            log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow2:" + bow2);

            Vector d2 = lsm.mapDocument(bow2);
            d2.normalize();
            log.println("d2:" + d2);

            Vector pd2 = lsm.mapPseudoDocument(d2);
            pd2.normalize();
            log.println("pd2:" + pd2);

            Vector m2 = merge(pd2, d2);
            log.println("m2:" + m2);

            float cosVSM = d1.dotProduct(d2) / (float) Math.sqrt(d1.dotProduct(d1) * d2.dotProduct(d2));
            float cosLSM = pd1.dotProduct(pd2) / (float) Math.sqrt(pd1.dotProduct(pd1) * pd2.dotProduct(pd2));
            float cosBOWLSM = m1.dotProduct(m2) / (float) Math.sqrt(m1.dotProduct(m1) * m2.dotProduct(m2));
            bw.print("\t");
            bw.print(dec.format(cosVSM));
            lw.print("\t");
            lw.print(dec.format(cosLSM));
            blw.print("\t");
            blw.print(dec.format(cosBOWLSM));

            log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow\t" + cosVSM);
            log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") ls:\t" + cosLSM);
            log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow+ls:\t" + cosBOWLSM);
        }
        bw.print("\n");
        lw.print("\n");
        blw.print("\n");
    } // end for i

    logger.info("wrote " + bwf);
    logger.info("wrote " + bwf);
    logger.info("wrote " + bdf);
    logger.info("wrote " + lwf);
    logger.info("wrote " + ldf);
    logger.info("wrote " + blwf);
    logger.info("wrote " + bldf);
    logger.info("wrote " + logf);

    ld.close();
    bd.close();
    bld.close();
    bw.close();
    lw.close();
    blw.close();

    log.close();

}

From source file:com.joliciel.frenchTreebank.FrenchTreebank.java

/**
 * @param args/*w  w w.ja  v a2  s . co  m*/
 */
public static void main(String[] args) throws Exception {
    String command = args[0];

    String outFilePath = "";
    String outDirPath = "";
    String treebankPath = "";
    String ftbFileName = "";
    String rawTextDir = "";
    String queryPath = "";
    String sentenceNumber = null;
    boolean firstArg = true;
    for (String arg : args) {
        if (firstArg) {
            firstArg = false;
            continue;
        }
        int equalsPos = arg.indexOf('=');
        String argName = arg.substring(0, equalsPos);
        String argValue = arg.substring(equalsPos + 1);
        if (argName.equals("outfile"))
            outFilePath = argValue;
        else if (argName.equals("outdir"))
            outDirPath = argValue;
        else if (argName.equals("ftbFileName"))
            ftbFileName = argValue;
        else if (argName.equals("treebank"))
            treebankPath = argValue;
        else if (argName.equals("sentence"))
            sentenceNumber = argValue;
        else if (argName.equals("query"))
            queryPath = argValue;
        else if (argName.equals("rawTextDir"))
            rawTextDir = argValue;
        else
            throw new RuntimeException("Unknown argument: " + argName);
    }

    TalismaneServiceLocator talismaneServiceLocator = TalismaneServiceLocator.getInstance();

    TreebankServiceLocator locator = TreebankServiceLocator.getInstance(talismaneServiceLocator);

    if (treebankPath.length() == 0)
        locator.setDataSourcePropertiesFile("jdbc-live.properties");

    if (command.equals("search")) {
        final SearchService searchService = locator.getSearchService();
        final XmlPatternSearch search = searchService.newXmlPatternSearch();
        search.setXmlPatternFile(queryPath);
        List<SearchResult> searchResults = search.perform();

        FileWriter fileWriter = new FileWriter(outFilePath);
        for (SearchResult searchResult : searchResults) {
            String lineToWrite = "";
            Sentence sentence = searchResult.getSentence();
            Phrase phrase = searchResult.getPhrase();
            lineToWrite += sentence.getFile().getFileName() + "|";
            lineToWrite += sentence.getSentenceNumber() + "|";
            List<PhraseUnit> phraseUnits = searchResult.getPhraseUnits();
            LOG.debug("Phrase: " + phrase.getId());
            for (PhraseUnit phraseUnit : phraseUnits)
                lineToWrite += phraseUnit.getLemma().getText() + "|";
            lineToWrite += phrase.getText();
            fileWriter.write(lineToWrite + "\n");
        }
        fileWriter.flush();
        fileWriter.close();
    } else if (command.equals("load")) {
        final TreebankService treebankService = locator.getTreebankService();
        final TreebankSAXParser parser = new TreebankSAXParser();
        parser.setTreebankService(treebankService);
        parser.parseDocument(treebankPath);
    } else if (command.equals("loadAll")) {
        final TreebankService treebankService = locator.getTreebankService();

        File dir = new File(treebankPath);

        String firstFile = null;
        if (args.length > 2)
            firstFile = args[2];
        String[] files = dir.list();
        if (files == null) {
            throw new RuntimeException("Not a directory or no children: " + treebankPath);
        } else {
            boolean startProcessing = true;
            if (firstFile != null)
                startProcessing = false;
            for (int i = 0; i < files.length; i++) {
                if (!startProcessing && files[i].equals(firstFile))
                    startProcessing = true;
                if (startProcessing) {
                    String filePath = args[1] + "/" + files[i];
                    LOG.debug(filePath);
                    final TreebankSAXParser parser = new TreebankSAXParser();
                    parser.setTreebankService(treebankService);
                    parser.parseDocument(filePath);
                }
            }
        }
    } else if (command.equals("loadRawText")) {
        final TreebankService treebankService = locator.getTreebankService();
        final TreebankRawTextAssigner assigner = new TreebankRawTextAssigner();
        assigner.setTreebankService(treebankService);
        assigner.setRawTextDirectory(rawTextDir);
        assigner.loadRawText();
    } else if (command.equals("tokenize")) {
        Writer csvFileWriter = null;
        if (outFilePath != null && outFilePath.length() > 0) {
            if (outFilePath.lastIndexOf("/") > 0) {
                String outputDirPath = outFilePath.substring(0, outFilePath.lastIndexOf("/"));
                File outputDir = new File(outputDirPath);
                outputDir.mkdirs();
            }

            File csvFile = new File(outFilePath);
            csvFile.delete();
            csvFile.createNewFile();
            csvFileWriter = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(csvFile, false), "UTF8"));
        }
        try {

            final TreebankService treebankService = locator.getTreebankService();
            TreebankExportService treebankExportService = locator.getTreebankExportServiceLocator()
                    .getTreebankExportService();
            TreebankUploadService treebankUploadService = locator.getTreebankUploadServiceLocator()
                    .getTreebankUploadService();
            TreebankReader treebankReader = null;

            if (treebankPath.length() > 0) {
                File treebankFile = new File(treebankPath);
                if (sentenceNumber != null)
                    treebankReader = treebankUploadService.getXmlReader(treebankFile, sentenceNumber);
                else
                    treebankReader = treebankUploadService.getXmlReader(treebankFile);

            } else {
                treebankReader = treebankService.getDatabaseReader(TreebankSubSet.ALL, 0);
            }

            TokeniserAnnotatedCorpusReader reader = treebankExportService
                    .getTokeniserAnnotatedCorpusReader(treebankReader, csvFileWriter);

            while (reader.hasNextTokenSequence()) {
                TokenSequence tokenSequence = reader.nextTokenSequence();
                List<Integer> tokenSplits = tokenSequence.getTokenSplits();
                String sentence = tokenSequence.getText();
                LOG.debug(sentence);
                int currentPos = 0;
                StringBuilder sb = new StringBuilder();
                for (int split : tokenSplits) {
                    if (split == 0)
                        continue;
                    String token = sentence.substring(currentPos, split);
                    sb.append('|');
                    sb.append(token);
                    currentPos = split;
                }
                LOG.debug(sb.toString());
            }
        } finally {
            csvFileWriter.flush();
            csvFileWriter.close();
        }
    } else if (command.equals("export")) {
        if (outDirPath.length() == 0)
            throw new RuntimeException("Parameter required: outdir");
        File outDir = new File(outDirPath);
        outDir.mkdirs();

        final TreebankService treebankService = locator.getTreebankService();
        FrenchTreebankXmlWriter xmlWriter = new FrenchTreebankXmlWriter();
        xmlWriter.setTreebankService(treebankService);

        if (ftbFileName.length() == 0) {
            xmlWriter.write(outDir);
        } else {
            TreebankFile ftbFile = treebankService.loadTreebankFile(ftbFileName);
            String fileName = ftbFileName.substring(ftbFileName.lastIndexOf('/') + 1);
            File xmlFile = new File(outDir, fileName);
            xmlFile.delete();
            xmlFile.createNewFile();

            Writer xmlFileWriter = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(xmlFile, false), "UTF8"));
            xmlWriter.write(xmlFileWriter, ftbFile);
            xmlFileWriter.flush();
            xmlFileWriter.close();
        }
    } else {
        throw new RuntimeException("Unknown command: " + command);
    }
    LOG.debug("========== END ============");
}

From source file:net.java.sen.tools.MkSenDic.java

/**
 * Build sen dictionary.//ww w . j  a va  2  s.c  o  m
 * 
 * @param args
 *            custom dictionary files. see dic/build.xml.
 */
public static void main(String args[]) {
    ResourceBundle rb = ResourceBundle.getBundle("dictionary");
    DictionaryMaker dm1 = new DictionaryMaker();
    DictionaryMaker dm2 = new DictionaryMaker();
    DictionaryMaker dm3 = new DictionaryMaker();

    // 1st field information of connect file.
    Vector rule1 = new Vector();

    // 2nd field information of connect file.
    Vector rule2 = new Vector();

    // 3rd field information of connect file.
    Vector rule3 = new Vector();

    // 4th field information of connect file.
    // this field shows cost of morpheme connection
    // [size3*(x3*size2+x2)+x1]
    // [size3*(Attr1*size2+Attr2)+Attl]
    short score[] = new short[20131];

    long start = System.currentTimeMillis();

    // /////////////////////////////////////////
    //
    // Step1. Loading connetion file.
    //
    log.info("(1/7): reading connection matrix ... ");
    try {
        log.info("connection file = " + rb.getString("text_connection_file"));
        log.info("charset = " + rb.getString("dic.charset"));
        CSVParser csvparser = new CSVParser(new FileInputStream(rb.getString("text_connection_file")),
                rb.getString("dic.charset"));
        String t[];
        int line = 0;
        while ((t = csvparser.nextTokens()) != null) {
            if (t.length < 4) {
                log.warn("invalid line in " + rb.getString("text_connection_file") + ":" + line);
                log.warn(rb.getString("text_connection_file") + "may be broken.");
                break;
            }
            dm1.add(t[0]);
            rule1.add(t[0]);

            dm2.add(t[1]);
            rule2.add(t[1]);

            dm3.add(t[2]);
            rule3.add(t[2]);

            if (line == score.length) {
                score = resize(score);
            }

            score[line++] = (short) Integer.parseInt(t[3]);
        }

        // /////////////////////////////////////////
        //
        // Step2. Building internal dictionary
        //
        log.info("(2/7): building type dictionary ... ");
        dm1.build();
        dm2.build();
        dm3.build();

        // if you want check specified morpheme, you uncomment and modify
        // following line:
        /*
         * System.out.print("22="); dm3.getById(22);
         * System.out.print("368="); dm3.getById(368);
         * 
         * System.out.println(dm3.getDicId("?????*,*,*,*,?"));
         * DictionaryMaker.debug = true;
         * System.out.println(dm3.getDicId("?????*,*,*,*,?"));
         * System.out.println(dm3.getDicIdNoCache("?????*,*,*,*,?"));
         */

    } catch (IOException e) {
        e.printStackTrace();
        System.exit(0);
    }

    // -------------------------------------------------

    int size1 = dm1.size();
    int size2 = dm2.size();
    int size3 = dm3.size();
    int ruleSize = rule1.size();
    short matrix[] = new short[size1 * size2 * size3];
    short default_cost = (short) Integer.parseInt(rb.getString("default_connection_cost"));

    // /////////////////////////////////////////
    //
    // Step3. Writing Connection Matrix
    //
    log.info("(3/7): writing conection matrix (" + size1 + " x " + size2 + " x " + size3 + " = "
            + size1 * size2 * size3 + ") ...");

    for (int i = 0; i < (int) (size1 * size2 * size3); i++)
        matrix[i] = default_cost;

    for (int i = 0; i < ruleSize; i++) {
        Vector r1 = dm1.getRuleIdList((String) rule1.get(i));
        Vector r2 = dm2.getRuleIdList((String) rule2.get(i));
        Vector r3 = dm3.getRuleIdList((String) rule3.get(i));

        for (Iterator i1 = r1.iterator(); i1.hasNext();) {
            int ii1 = ((Integer) i1.next()).intValue();
            for (Iterator i2 = r2.iterator(); i2.hasNext();) {
                int ii2 = ((Integer) i2.next()).intValue();
                for (Iterator i3 = r3.iterator(); i3.hasNext();) {
                    int ii3 = ((Integer) i3.next()).intValue();
                    int pos = size3 * (size2 * ii1 + ii2) + ii3;
                    matrix[pos] = score[i];
                }
            }
        }
    }

    try {
        DataOutputStream out = new DataOutputStream(
                new BufferedOutputStream(new FileOutputStream(rb.getString("matrix_file"))));
        out.writeShort(size1);
        out.writeShort(size2);
        out.writeShort(size3);
        for (int i1 = 0; i1 < size1; i1++)
            for (int i2 = 0; i2 < size2; i2++)
                for (int i3 = 0; i3 < size3; i3++) {
                    out.writeShort(matrix[size3 * (size2 * i1 + i2) + i3]);
                    // if (matrix[size3 * (size2 * i1 + i2) + i3] !=
                    // default_cost) {
                    // }
                }
        out.close();
    } catch (IOException e) {
        e.printStackTrace();
        System.exit(0);
    }

    matrix = null;
    score = null;

    // -------------------------------------------------

    int pos_start = Integer.parseInt(rb.getString("pos_start"));
    int pos_size = Integer.parseInt(rb.getString("pos_size"));

    int di = 0;
    int offset = 0;
    ArrayList dicList = new ArrayList();

    // /////////////////////////////////////////
    //
    // Step4. Reading Morpheme Information
    //
    log.info("(4/7): reading morpheme information ... ");
    String t = null;
    String[] csv = null;
    try {
        // writer for feature file.
        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
                new FileOutputStream(rb.getString("pos_file")), rb.getString("sen.charset")));

        log.info("load dic: " + rb.getString("text_dic_file"));
        BufferedReader dicStream = null;
        int custom_dic = -1;
        if (args.length == 0) {
            dicStream = new BufferedReader(new InputStreamReader(
                    new FileInputStream(rb.getString("text_dic_file")), rb.getString("dic.charset")));
        } else {
            custom_dic = 0;
            dicStream = new BufferedReader(
                    new InputStreamReader(new FileInputStream(args[custom_dic]), rb.getString("dic.charset")));
        }

        int line = 0;

        CSVData key_b = new CSVData();
        CSVData pos_b = new CSVData();

        while (true) {
            t = dicStream.readLine();
            if (t == null) {
                dicStream.close();
                custom_dic++;
                if (args.length == custom_dic) {
                    break;
                } else {
                    // read custum dictionary
                    log.info("load dic: " + "args[custum_dic]");
                    dicStream = new BufferedReader(new InputStreamReader(new FileInputStream(args[custom_dic]),
                            rb.getString("dic.charset")));
                }
                continue;
            }

            CSVParser parser = new CSVParser(t);
            csv = parser.nextTokens();
            if (csv.length < (pos_size + pos_start)) {
                throw new RuntimeException("format error:" + t);
            }

            key_b.clear();
            pos_b.clear();
            for (int i = pos_start; i < (pos_start + pos_size - 1); i++) {
                key_b.append(csv[i]);
                pos_b.append(csv[i]);
            }

            key_b.append(csv[pos_start + pos_size - 1]);
            pos_b.append(csv[pos_start + pos_size - 1]);

            for (int i = pos_start + pos_size; i < (csv.length - 1); i++) {
                pos_b.append(csv[i]);
            }
            pos_b.append(csv[csv.length - 1]);

            CToken token = new CToken();

            token.rcAttr2 = (short) dm1.getDicId(key_b.toString());
            token.rcAttr1 = (short) dm2.getDicId(key_b.toString());
            token.lcAttr = (short) dm3.getDicId(key_b.toString());
            token.posid = 0;
            token.posID = offset;
            token.length = (short) csv[0].length();
            token.cost = (short) Integer.parseInt(csv[1]);

            dicList.add(new PairObject(csv[0], token));

            byte b[] = pos_b.toString().getBytes(rb.getString("sen.charset"));
            offset += (b.length + 1);
            String pos_b_str = pos_b.toString();
            bw.write(pos_b_str, 0, pos_b_str.length());
            // bw.write(b, 0, b.length);
            bw.write(0);
            if (++di % 50000 == 0)
                log.info("" + di + "... ");
        }
        bw.close();
        // ----end of writing feature.cha ----
    } catch (Exception e) {
        log.error("Error: " + t);
        e.printStackTrace();
        System.exit(1);
    }

    rule1 = null;
    rule2 = null;
    rule3 = null;

    // /////////////////////////////////////////
    //
    // Step5. Sort lexs and write to file
    //
    log.info("(5/7): sorting lex... ");

    int value[] = new int[dicList.size()];
    char key[][] = new char[dicList.size()][];
    int spos = 0;
    int dsize = 0;
    int bsize = 0;
    String prev = "";
    Collections.sort(dicList);

    // /////////////////////////////////////////
    //
    // Step6. Writing Token Information
    //
    log.info("(6/7): writing token... ");
    try {
        // writer for token file.
        DataOutputStream out = new DataOutputStream(
                new BufferedOutputStream(new FileOutputStream(rb.getString("token_file"))));

        // writing 'bos' and 'eos' and 'unknown' token.
        CToken token = new CToken();
        token.rcAttr2 = (short) dm1.getDicId(rb.getString("bos_pos"));
        token.rcAttr1 = (short) dm2.getDicId(rb.getString("bos_pos"));
        token.lcAttr = (short) dm3.getDicId(rb.getString("bos_pos"));
        token.write(out);

        token.rcAttr2 = (short) dm1.getDicId(rb.getString("eos_pos"));
        token.rcAttr1 = (short) dm2.getDicId(rb.getString("eos_pos"));
        token.lcAttr = (short) dm3.getDicId(rb.getString("eos_pos"));
        token.write(out);

        token.rcAttr2 = (short) dm1.getDicId(rb.getString("unknown_pos"));
        token.rcAttr1 = (short) dm2.getDicId(rb.getString("unknown_pos"));
        token.lcAttr = (short) dm3.getDicId(rb.getString("unknown_pos"));
        token.posID = -1;
        token.write(out);
        log.info("key size = " + key.length);
        for (int i = 0; i < key.length; i++) {
            String k = (String) ((PairObject) dicList.get(i)).key;
            if (!prev.equals(k) && i != 0) {
                key[dsize] = ((String) ((PairObject) dicList.get(spos)).key).toCharArray();
                value[dsize] = bsize + (spos << 8);
                dsize++;
                bsize = 1;
                spos = i;
            } else {
                bsize++;
            }
            prev = (String) ((PairObject) dicList.get(i)).key;
            ((CToken) (((PairObject) dicList.get(i)).value)).write(out);
        }
        out.flush();
        out.close();
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(1);
    }

    key[dsize] = ((String) ((PairObject) dicList.get(spos)).key).toCharArray();

    value[dsize] = bsize + (spos << 8);
    dsize++;

    dm1 = null;
    dm2 = null;
    dm3 = null;
    dicList = null;

    // /////////////////////////////////////////
    //
    // Step7. Build Double Array
    //
    log.info("(7/7): building Double-Array (size = " + dsize + ") ...");

    DoubleArrayTrie da = new DoubleArrayTrie();

    da.build(key, null, value, dsize);
    try {
        da.save(rb.getString("double_array_file"));
    } catch (Exception e) {
        e.printStackTrace();
    }

    log.info("total time = " + (System.currentTimeMillis() - start) / 1000 + "[ms]");
}

From source file:at.riemers.velocity2js.velocity.Velocity2Js.java

/**
 * @param args the command line arguments
 *//*ww  w .java 2  s.c  om*/
public static void main(String[] args) {
    try {
        if (args.length == 0) {
            printUsage();
            return;
        }

        if (args[0].equals("-d") && args.length >= 3) {
            String resource = null;
            if (args.length >= 4) {
                resource = args[3];
            }
            List<I18NBundle> bundles = getBundles(resource);

            Velocity2Js.generateDir(args[1], args[2], bundles);
            return;
        }

        if (args[0].equals("-f") && args.length >= 4) {

            Properties p = new Properties();
            p.setProperty("resource.loader", "file");

            p.setProperty("file.resource.loader.description", "Velocity File Resource Loader");
            p.setProperty("file.resource.loader.class",
                    "org.apache.velocity.runtime.resource.loader.FileResourceLoader");

            p.setProperty("file.resource.loader.path", args[1]);

            Velocity2Js.init(p);

            String function = createFunctionName(args[2]);
            String resource = null;
            if (args.length >= 5) {
                resource = args[4];
            }
            List<I18NBundle> bundles = getBundles(resource);

            for (I18NBundle bundle : bundles) {
                String fname = args[3];
                int e = args[3].lastIndexOf('.');
                if (e <= 0 || e > args[3].length())
                    e = args[3].length();
                fname = args[3].substring(0, e) + bundle.getLocale() + args[3].substring(e);

                BufferedWriter writer = new BufferedWriter(
                        new OutputStreamWriter(new FileOutputStream(fname), "UTF8"));
                Velocity2Js.generate(args[2], function, writer, bundle.getBundle());
                writer.flush();
                writer.close();
            }

            return;
        }

        printUsage();
        return;

    } catch (ResourceNotFoundException rnfe) {
        log.error("[velocity2js] : cannot find template : " + rnfe.getMessage());
    } catch (ParseErrorException pee) {
        log.error("[velocity2js] :  Syntax error in template :" + pee);
    } catch (Exception ex) {
        System.out.flush();
        log.error("[velocity2js] :  unknown error " + ex.getMessage());
        ex.printStackTrace(System.out);
        System.out.println(ex.getLocalizedMessage());
        System.exit(1);
    }
}

From source file:com.joliciel.talismane.terminology.TalismaneTermExtractorMain.java

public static void main(String[] args) throws Exception {
    String termFilePath = null;/*  ww  w.  ja v  a  2  s  . co m*/
    String outFilePath = null;
    Command command = Command.extract;
    int depth = -1;
    String databasePropertiesPath = null;
    String projectCode = null;
    String terminologyPropertiesPath = null;

    Map<String, String> argMap = StringUtils.convertArgs(args);

    String logConfigPath = argMap.get("logConfigFile");
    if (logConfigPath != null) {
        argMap.remove("logConfigFile");
        Properties props = new Properties();
        props.load(new FileInputStream(logConfigPath));
        PropertyConfigurator.configure(props);
    }

    Map<String, String> innerArgs = new HashMap<String, String>();
    for (Entry<String, String> argEntry : argMap.entrySet()) {
        String argName = argEntry.getKey();
        String argValue = argEntry.getValue();

        if (argName.equals("command"))
            command = Command.valueOf(argValue);
        else if (argName.equals("termFile"))
            termFilePath = argValue;
        else if (argName.equals("outFile"))
            outFilePath = argValue;
        else if (argName.equals("depth"))
            depth = Integer.parseInt(argValue);
        else if (argName.equals("databaseProperties"))
            databasePropertiesPath = argValue;
        else if (argName.equals("terminologyProperties"))
            terminologyPropertiesPath = argValue;
        else if (argName.equals("projectCode"))
            projectCode = argValue;
        else
            innerArgs.put(argName, argValue);
    }
    if (termFilePath == null && databasePropertiesPath == null)
        throw new TalismaneException("Required argument: termFile or databasePropertiesPath");

    if (termFilePath != null) {
        String currentDirPath = System.getProperty("user.dir");
        File termFileDir = new File(currentDirPath);
        if (termFilePath.lastIndexOf("/") >= 0) {
            String termFileDirPath = termFilePath.substring(0, termFilePath.lastIndexOf("/"));
            termFileDir = new File(termFileDirPath);
            termFileDir.mkdirs();
        }
    }

    long startTime = new Date().getTime();
    try {
        if (command.equals(Command.analyse)) {
            innerArgs.put("command", "analyse");
        } else {
            innerArgs.put("command", "process");
        }

        String sessionId = "";
        TalismaneServiceLocator locator = TalismaneServiceLocator.getInstance(sessionId);
        TalismaneService talismaneService = locator.getTalismaneService();

        TalismaneConfig config = talismaneService.getTalismaneConfig(innerArgs, sessionId);

        TerminologyServiceLocator terminologyServiceLocator = TerminologyServiceLocator.getInstance(locator);
        TerminologyService terminologyService = terminologyServiceLocator.getTerminologyService();
        TerminologyBase terminologyBase = null;

        if (projectCode == null)
            throw new TalismaneException("Required argument: projectCode");

        File file = new File(databasePropertiesPath);
        FileInputStream fis = new FileInputStream(file);
        Properties dataSourceProperties = new Properties();
        dataSourceProperties.load(fis);
        terminologyBase = terminologyService.getPostGresTerminologyBase(projectCode, dataSourceProperties);

        TalismaneSession talismaneSession = talismaneService.getTalismaneSession();

        if (command.equals(Command.analyse) || command.equals(Command.extract)) {
            Locale locale = talismaneSession.getLocale();
            Map<TerminologyProperty, String> terminologyProperties = new HashMap<TerminologyProperty, String>();
            if (terminologyPropertiesPath != null) {
                Map<String, String> terminologyPropertiesStr = StringUtils.getArgMap(terminologyPropertiesPath);
                for (String key : terminologyPropertiesStr.keySet()) {
                    try {
                        TerminologyProperty property = TerminologyProperty.valueOf(key);
                        terminologyProperties.put(property, terminologyPropertiesStr.get(key));
                    } catch (IllegalArgumentException e) {
                        throw new TalismaneException("Unknown terminology property: " + key);
                    }
                }
            } else {
                terminologyProperties = getDefaultTerminologyProperties(locale);
            }
            if (depth <= 0 && !terminologyProperties.containsKey(TerminologyProperty.maxDepth))
                throw new TalismaneException("Required argument: depth");

            InputStream regexInputStream = getInputStreamFromResource(
                    "parser_conll_with_location_input_regex.txt");
            Scanner regexScanner = new Scanner(regexInputStream, "UTF-8");
            String inputRegex = regexScanner.nextLine();
            regexScanner.close();
            config.setInputRegex(inputRegex);

            Charset outputCharset = config.getOutputCharset();

            TermExtractor termExtractor = terminologyService.getTermExtractor(terminologyBase,
                    terminologyProperties);
            if (depth > 0)
                termExtractor.setMaxDepth(depth);
            termExtractor.setOutFilePath(termFilePath);

            if (outFilePath != null) {
                if (outFilePath.lastIndexOf("/") >= 0) {
                    String outFileDirPath = outFilePath.substring(0, outFilePath.lastIndexOf("/"));
                    File outFileDir = new File(outFileDirPath);
                    outFileDir.mkdirs();
                }
                File outFile = new File(outFilePath);
                outFile.delete();
                outFile.createNewFile();

                Writer writer = new BufferedWriter(
                        new OutputStreamWriter(new FileOutputStream(outFilePath), outputCharset));
                TermAnalysisWriter termAnalysisWriter = new TermAnalysisWriter(writer);
                termExtractor.addTermObserver(termAnalysisWriter);
            }

            Talismane talismane = config.getTalismane();
            talismane.setParseConfigurationProcessor(termExtractor);
            talismane.process();
        } else if (command.equals(Command.list)) {

            List<Term> terms = terminologyBase.findTerms(2, null, 0, null, null);
            for (Term term : terms) {
                LOG.debug("Term: " + term.getText());
                LOG.debug("Frequency: " + term.getFrequency());
                LOG.debug("Heads: " + term.getHeads());
                LOG.debug("Expansions: " + term.getExpansions());
                LOG.debug("Contexts: " + term.getContexts());
            }
        }
    } finally {
        long endTime = new Date().getTime();
        long totalTime = endTime - startTime;
        LOG.info("Total time: " + totalTime);
    }
}

From source file:client.QueryLastFm.java

License:asdf

public static void main(String[] args) throws Exception {

    // isAlreadyInserted("asdfs","jas,jnjkah");

    // FileWriter fw = new FileWriter(".\\tracks.csv");
    OutputStream track_os = new FileOutputStream(".\\tracks.csv");
    PrintWriter out = new PrintWriter(new OutputStreamWriter(track_os, "UTF-8"));

    OutputStream track_id_os = new FileOutputStream(".\\track_id_sim_track_id.csv");
    PrintWriter track_id_out = new PrintWriter(new OutputStreamWriter(track_id_os, "UTF-8"));

    track_id_out.print("");

    ByteArrayInputStream input;/*from  w  w w .  j a v a2 s .  c  o  m*/
    Document doc = null;
    CloseableHttpClient httpclient = HttpClients.createDefault();

    String trackName = "";
    String artistName = "";
    String sourceMbid = "";
    out.print("ID");// first row first column
    out.print(",");
    out.print("TrackName");// first row second column
    out.print(",");
    out.println("Artist");// first row third column

    track_id_out.print("source");// first row second column
    track_id_out.print(",");
    track_id_out.println("target");// first row third column
    // track_id_out.print(",");
    // track_id_out.println("type");// first row third column

    // out.flush();

    // out.close();

    // fw.close();

    // os.close();

    try {
        URI uri = new URIBuilder().setScheme("http").setHost("ws.audioscrobbler.com").setPath("/2.0/")
                .setParameter("method", "track.getsimilar").setParameter("artist", "cher")
                .setParameter("track", "believe").setParameter("limit", "100")
                .setParameter("api_key", "88858618961414f8bec919bddd057044").build();

        // new URIBuilder().
        HttpGet request = new HttpGet(uri);

        // request.
        // This is useful for last.fm logging and preventing them from blocking this client
        request.setHeader(HttpHeaders.USER_AGENT,
                "nileshmore@gatech.edu - ClassAssignment at GeorgiaTech Non-commercial use");

        HttpGet httpGet = new HttpGet(
                "http://ws.audioscrobbler.com/2.0/?method=track.getsimilar&artist=cher&track=believe&limit=4&api_key=88858618961414f8bec919bddd057044");
        CloseableHttpResponse response = httpclient.execute(request);

        int statusCode = response.getStatusLine().getStatusCode();
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        DocumentBuilder builder = factory.newDocumentBuilder();
        // The underlying HTTP connection is still held by the response object
        // to allow the response content to be streamed directly from the network socket.
        // In order to ensure correct deallocation of system resources
        // the user MUST call CloseableHttpResponse#close() from a finally clause.
        // Please note that if response content is not fully consumed the underlying
        // connection cannot be safely re-used and will be shut down and discarded
        // by the connection manager.
        try {
            if (statusCode == 200) {
                HttpEntity entity1 = response.getEntity();
                BufferedReader br = new BufferedReader(
                        new InputStreamReader((response.getEntity().getContent())));
                Document document = builder.parse((response.getEntity().getContent()));
                Element root = document.getDocumentElement();
                root.normalize();
                // Need to focus and resolve this part
                NodeList nodes;
                nodes = root.getChildNodes();

                nodes = root.getElementsByTagName("track");
                if (nodes.getLength() == 0) {
                    // System.out.println("empty");
                    return;
                }
                Node trackNode;
                for (int k = 0; k < nodes.getLength(); k++) // can access all tracks now
                {
                    trackNode = nodes.item(k);
                    NodeList trackAttributes = trackNode.getChildNodes();

                    // check if mbid is present in track attributes
                    // System.out.println("Length  " + (trackAttributes.item(5).getNodeName().compareToIgnoreCase("mbid") == 0));

                    if ((trackAttributes.item(5).getNodeName().compareToIgnoreCase("mbid") == 0)) {
                        if (((Element) trackAttributes.item(5)).hasChildNodes())
                            ;// System.out.println("Go aHead");
                        else
                            continue;
                    } else
                        continue;

                    for (int n = 0; n < trackAttributes.getLength(); n++) {
                        Node attribute = trackAttributes.item(n);
                        if ((attribute.getNodeName().compareToIgnoreCase("name")) == 0) {
                            // System.out.println(((Element)attribute).getFirstChild().getNodeValue());
                            trackName = ((Element) attribute).getFirstChild().getNodeValue(); // make string encoding as UTF-8 ************ 

                        }

                        if ((attribute.getNodeName().compareToIgnoreCase("mbid")) == 0) {
                            // System.out.println(n +  "   " +  ((Element)attribute).getFirstChild().getNodeValue());
                            sourceMbid = attribute.getFirstChild().getNodeValue();

                        }

                        if ((attribute.getNodeName().compareToIgnoreCase("artist")) == 0) {
                            NodeList ArtistNodeList = attribute.getChildNodes();
                            for (int j = 0; j < ArtistNodeList.getLength(); j++) {
                                Node Artistnode = ArtistNodeList.item(j);
                                if ((Artistnode.getNodeName().compareToIgnoreCase("name")) == 0) {
                                    // System.out.println(((Element)Artistnode).getFirstChild().getNodeValue());
                                    artistName = ((Element) Artistnode).getFirstChild().getNodeValue();
                                }
                            }
                        }
                    }
                    out.print(sourceMbid);
                    out.print(",");
                    out.print(trackName);
                    out.print(",");
                    out.println(artistName);
                    // out.print(",");
                    findSimilarTracks(track_id_out, sourceMbid, trackName, artistName);

                }
                track_id_out.flush();

                out.flush();
                out.close();
                track_id_out.close();
                track_os.close();

                // fw.close();
                Element trac = (Element) nodes.item(0);
                // trac.normalize();
                nodes = trac.getChildNodes();
                // System.out.println(nodes.getLength());

                for (int i = 0; i < nodes.getLength(); i++) {
                    Node node = nodes.item(i);
                    // System.out.println(node.getNodeName());
                    if ((node.getNodeName().compareToIgnoreCase("name")) == 0) {
                        // System.out.println(((Element)node).getFirstChild().getNodeValue());
                    }

                    if ((node.getNodeName().compareToIgnoreCase("mbid")) == 0) {
                        // System.out.println(((Element)node).getFirstChild().getNodeValue());
                    }

                    if ((node.getNodeName().compareToIgnoreCase("artist")) == 0) {

                        // System.out.println("Well");
                        NodeList ArtistNodeList = node.getChildNodes();
                        for (int j = 0; j < ArtistNodeList.getLength(); j++) {
                            Node Artistnode = ArtistNodeList.item(j);
                            if ((Artistnode.getNodeName().compareToIgnoreCase("name")) == 0) {
                                /* System.out.println(((Element)Artistnode).getFirstChild().getNodeValue());*/
                            }
                            /*System.out.println(Artistnode.getNodeName());*/
                        }
                    }

                }
                /*if(node instanceof Element){
                  //a child element to process
                  Element child = (Element) node;
                  String attribute = child.getAttribute("width");
                }*/

                // System.out.println(root.getAttribute("status"));
                NodeList tracks = root.getElementsByTagName("track");
                Element track = (Element) tracks.item(0);
                // System.out.println(track.getTagName());
                track.getChildNodes();

            } else {
                System.out.println("failed with status" + response.getStatusLine());
            }
            // input = (ByteArrayInputStream)entity1.getContent();
            // do something useful with the response body
            // and ensure it is fully consumed
        } finally {
            response.close();
        }
    }

    finally {
        System.out.println("Exited succesfully.");
        httpclient.close();

    }
}

From source file:gobblin.runtime.util.JobStateToJsonConverter.java

@SuppressWarnings("all")
public static void main(String[] args) throws Exception {
    Option sysConfigOption = Option.builder("sc").argName("system configuration file")
            .desc("Gobblin system configuration file").longOpt("sysconfig").hasArgs().build();
    Option storeUrlOption = Option.builder("u").argName("gobblin state store URL")
            .desc("Gobblin state store root path URL").longOpt("storeurl").hasArgs().required().build();
    Option jobNameOption = Option.builder("n").argName("gobblin job name").desc("Gobblin job name")
            .longOpt("name").hasArgs().required().build();
    Option jobIdOption = Option.builder("i").argName("gobblin job id").desc("Gobblin job id").longOpt("id")
            .hasArgs().build();//www  .ja v  a  2  s  .  c om
    Option convertAllOption = Option.builder("a")
            .desc("Whether to convert all past job states of the given job").longOpt("all").build();
    Option keepConfigOption = Option.builder("kc").desc("Whether to keep all configuration properties")
            .longOpt("keepConfig").build();
    Option outputToFile = Option.builder("t").argName("output file name").desc("Output file name")
            .longOpt("toFile").hasArgs().build();

    Options options = new Options();
    options.addOption(sysConfigOption);
    options.addOption(storeUrlOption);
    options.addOption(jobNameOption);
    options.addOption(jobIdOption);
    options.addOption(convertAllOption);
    options.addOption(keepConfigOption);
    options.addOption(outputToFile);

    CommandLine cmd = null;
    try {
        CommandLineParser parser = new DefaultParser();
        cmd = parser.parse(options, args);
    } catch (ParseException pe) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("JobStateToJsonConverter", options);
        System.exit(1);
    }

    Properties sysConfig = new Properties();
    if (cmd.hasOption(sysConfigOption.getLongOpt())) {
        sysConfig = JobConfigurationUtils.fileToProperties(cmd.getOptionValue(sysConfigOption.getLongOpt()));
    }

    JobStateToJsonConverter converter = new JobStateToJsonConverter(sysConfig, cmd.getOptionValue('u'),
            cmd.hasOption("kc"));
    StringWriter stringWriter = new StringWriter();
    if (cmd.hasOption('i')) {
        converter.convert(cmd.getOptionValue('n'), cmd.getOptionValue('i'), stringWriter);
    } else {
        if (cmd.hasOption('a')) {
            converter.convertAll(cmd.getOptionValue('n'), stringWriter);
        } else {
            converter.convert(cmd.getOptionValue('n'), stringWriter);
        }
    }

    if (cmd.hasOption('t')) {
        Closer closer = Closer.create();
        try {
            FileOutputStream fileOutputStream = closer.register(new FileOutputStream(cmd.getOptionValue('t')));
            OutputStreamWriter outputStreamWriter = closer.register(
                    new OutputStreamWriter(fileOutputStream, ConfigurationKeys.DEFAULT_CHARSET_ENCODING));
            BufferedWriter bufferedWriter = closer.register(new BufferedWriter(outputStreamWriter));
            bufferedWriter.write(stringWriter.toString());
        } catch (Throwable t) {
            throw closer.rethrow(t);
        } finally {
            closer.close();
        }
    } else {
        System.out.println(stringWriter.toString());
    }
}

From source file:de.morbz.osmpoispbf.Scanner.java

public static void main(String[] args) {
    System.out.println("OsmPoisPbf " + VERSION + " started");

    // Get input file
    if (args.length < 1) {
        System.out.println("Error: Please provide an input file");
        System.exit(-1);//from  w w  w  . j  av a2  s. c  o m
    }
    String inputFile = args[args.length - 1];

    // Get output file
    String outputFile;
    int index = inputFile.indexOf('.');
    if (index != -1) {
        outputFile = inputFile.substring(0, index);
    } else {
        outputFile = inputFile;
    }
    outputFile += ".csv";

    // Setup CLI parameters
    options = new Options();
    options.addOption("ff", "filterFile", true, "The file that is used to filter categories");
    options.addOption("of", "outputFile", true, "The output CSV file to be written");
    options.addOption("rt", "requiredTags", true, "Comma separated list of tags that are required [name]");
    options.addOption("ut", "undesiredTags", true,
            "Comma separated list of tags=value combinations that should be filtered [key=value]");
    options.addOption("ot", "outputTags", true, "Comma separated list of tags that are exported [name]");
    options.addOption("ph", "printHeader", false,
            "If flag is set, the `outputTags` are printed as first line in the output file.");
    options.addOption("r", "relations", false, "Parse relations");
    options.addOption("nw", "noWays", false, "Don't parse ways");
    options.addOption("nn", "noNodes", false, "Don't parse nodes");
    options.addOption("u", "allowUnclosedWays", false, "Allow ways that aren't closed");
    options.addOption("d", "decimals", true, "Number of decimal places of coordinates [7]");
    options.addOption("s", "separator", true, "Separator character for CSV [|]");
    options.addOption("v", "verbose", false, "Print all found POIs");
    options.addOption("h", "help", false, "Print this help");

    // Parse parameters
    CommandLine line = null;
    try {
        line = (new DefaultParser()).parse(options, args);
    } catch (ParseException exp) {
        System.err.println(exp.getMessage());
        printHelp();
        System.exit(-1);
    }

    // Help
    if (line.hasOption("help")) {
        printHelp();
        System.exit(0);
    }

    // Get filter file
    String filterFile = null;
    if (line.hasOption("filterFile")) {
        filterFile = line.getOptionValue("filterFile");
    }

    // Get output file
    if (line.hasOption("outputFile")) {
        outputFile = line.getOptionValue("outputFile");
    }

    // Check files
    if (inputFile.equals(outputFile)) {
        System.out.println("Error: Input and output files are the same");
        System.exit(-1);
    }
    File file = new File(inputFile);
    if (!file.exists()) {
        System.out.println("Error: Input file doesn't exist");
        System.exit(-1);
    }

    // Check OSM entity types
    boolean parseNodes = true;
    boolean parseWays = true;
    boolean parseRelations = false;
    if (line.hasOption("noNodes")) {
        parseNodes = false;
    }
    if (line.hasOption("noWays")) {
        parseWays = false;
    }
    if (line.hasOption("relations")) {
        parseRelations = true;
    }

    // Unclosed ways allowed?
    if (line.hasOption("allowUnclosedWays")) {
        onlyClosedWays = false;
    }

    // Get CSV separator
    char separator = '|';
    if (line.hasOption("separator")) {
        String arg = line.getOptionValue("separator");
        if (arg.length() != 1) {
            System.out.println("Error: The CSV separator has to be exactly 1 character");
            System.exit(-1);
        }
        separator = arg.charAt(0);
    }
    Poi.setSeparator(separator);

    // Set decimals
    int decimals = 7; // OSM default
    if (line.hasOption("decimals")) {
        String arg = line.getOptionValue("decimals");
        try {
            int dec = Integer.valueOf(arg);
            if (dec < 0) {
                System.out.println("Error: Decimals must not be less than 0");
                System.exit(-1);
            } else {
                decimals = dec;
            }
        } catch (NumberFormatException ex) {
            System.out.println("Error: Decimals have to be a number");
            System.exit(-1);
        }
    }
    Poi.setDecimals(decimals);

    // Verbose mode?
    if (line.hasOption("verbose")) {
        printPois = true;
    }

    // Required tags
    if (line.hasOption("requiredTags")) {
        String arg = line.getOptionValue("requiredTags");
        requiredTags = arg.split(",");
    }

    // Undesired tags
    if (line.hasOption("undesiredTags")) {
        String arg = line.getOptionValue("undesiredTags");
        undesiredTags = new HashMap<>();
        for (String undesired : arg.split(",")) {
            String[] keyVal = undesired.split("=");
            if (keyVal.length != 2) {
                System.out.println("Error: Undesired Tags have to formated like tag=value");
                System.exit(-1);
            }
            if (!undesiredTags.containsKey(keyVal[0])) {
                undesiredTags.put(keyVal[0], new HashSet<>(1));
            }
            undesiredTags.get(keyVal[0]).add(keyVal[1]);
        }
    }

    // Output tags
    if (line.hasOption("outputTags")) {
        String arg = line.getOptionValue("outputTags");
        outputTags = arg.split(",");
    }

    // Get filter rules
    FilterFileParser parser = new FilterFileParser(filterFile);
    filters = parser.parse();
    if (filters == null) {
        System.exit(-1);
    }

    // Setup CSV output
    try {
        writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile), "UTF8"));
    } catch (IOException e) {
        System.out.println("Error: Output file error");
        System.exit(-1);
    }

    // Print Header
    if (line.hasOption("printHeader")) {
        String header = "category" + separator + "osm_id" + separator + "lat" + separator + "lon";
        for (int i = 0; i < outputTags.length; i++) {
            header += separator + outputTags[i];
        }
        try {
            writer.write(header + "\n");
        } catch (IOException e) {
            System.out.println("Error: Output file write error");
            System.exit(-1);
        }
    }

    // Setup OSMonaut
    EntityFilter filter = new EntityFilter(parseNodes, parseWays, parseRelations);
    Osmonaut naut = new Osmonaut(inputFile, filter, false);

    // Start watch
    StopWatch stopWatch = new StopWatch();
    stopWatch.start();

    // Start OSMonaut
    String finalSeparator = String.valueOf(separator);
    naut.scan(new IOsmonautReceiver() {

        boolean entityNeeded;

        @Override
        public boolean needsEntity(EntityType type, Tags tags) {
            // Are there any tags?
            if (tags.size() == 0) {
                return false;
            }

            // Check required tags
            for (String tag : requiredTags) {
                if (!tags.hasKey(tag)) {
                    return false;
                }
            }

            entityNeeded = getCategory(tags, filters) != null;

            if (undesiredTags != null && entityNeeded) {
                for (String key : undesiredTags.keySet()) {
                    if (tags.hasKey(key)) {
                        for (String val : undesiredTags.get(key)) {
                            if (tags.hasKeyValue(key, val)) {
                                return false;
                            }
                        }
                    }
                }
            }

            return entityNeeded;
        }

        @Override
        public void foundEntity(Entity entity) {
            // Check if way is closed
            if (onlyClosedWays && entity.getEntityType() == EntityType.WAY) {
                if (!((Way) entity).isClosed()) {
                    return;
                }
            }

            // Get category
            Tags tags = entity.getTags();
            String cat = getCategory(tags, filters);
            if (cat == null) {
                return;
            }

            // Get center
            LatLon center = entity.getCenter();
            if (center == null) {
                return;
            }

            // Make OSM-ID
            String type = "";
            switch (entity.getEntityType()) {
            case NODE:
                type = "node";
                break;
            case WAY:
                type = "way";
                break;
            case RELATION:
                type = "relation";
                break;
            }
            String id = String.valueOf(entity.getId());

            // Make output tags
            String[] values = new String[outputTags.length];
            for (int i = 0; i < outputTags.length; i++) {
                String key = outputTags[i];
                if (tags.hasKey(key)) {
                    values[i] = tags.get(key).replaceAll(finalSeparator, "").replaceAll("\"", "");
                }
            }

            // Make POI
            poisFound++;
            Poi poi = new Poi(values, cat, center, type, id);

            // Output
            if (printPois && System.currentTimeMillis() > lastMillis + 40) {
                printPoisFound();
                lastMillis = System.currentTimeMillis();
            }

            // Write to file
            try {
                writer.write(poi.toCsv() + "\n");
            } catch (IOException e) {
                System.out.println("Error: Output file write error");
                System.exit(-1);
            }
        }
    });

    // Close writer
    try {
        writer.close();
    } catch (IOException e) {
        System.out.println("Error: Output file close error");
        System.exit(-1);
    }

    // Output results
    stopWatch.stop();

    printPoisFound();
    System.out.println();
    System.out.println("Elapsed time in milliseconds: " + stopWatch.getElapsedTime());

    // Quit
    System.exit(0);
}