public static void close(@Nullable Closeable closeable, boolean swallowIOException) throws IOException 

Closes a Closeable, with control over whether an IOException may be thrown.


From source file:org.apache.mahout.classifier.df.tools.UDistrib.java

private static void runTool(String dataStr, String datasetStr, String output, int numPartitions)
        throws IOException {

    Preconditions.checkArgument(numPartitions > 0, "numPartitions <= 0");

    // make sure the output file does not exist
    Path outputPath = new Path(output);
    Configuration conf = new Configuration();
    FileSystem fs = outputPath.getFileSystem(conf);

    Preconditions.checkArgument(!fs.exists(outputPath), "Output path already exists");

    // create a new file corresponding to each partition
    // Path workingDir = fs.getWorkingDirectory();
    // FileSystem wfs = workingDir.getFileSystem(conf);
    // File parentFile = new File(workingDir.toString());
    // File tempFile = FileUtil.createLocalTempFile(parentFile, "Parts", true);
    // File tempFile = File.createTempFile("df.tools.UDistrib","");
    // tempFile.deleteOnExit();
    File tempFile = FileUtil.createLocalTempFile(new File(""), "df.tools.UDistrib", true);
    Path partsPath = new Path(tempFile.toString());
    FileSystem pfs = partsPath.getFileSystem(conf);

    Path[] partPaths = new Path[numPartitions];
    FSDataOutputStream[] files = new FSDataOutputStream[numPartitions];
    for (int p = 0; p < numPartitions; p++) {
        partPaths[p] = new Path(partsPath, String.format(Locale.ENGLISH, "part.%03d", p));
        files[p] = pfs.create(partPaths[p]);
    Path datasetPath = new Path(datasetStr);
    Dataset dataset = Dataset.load(conf, datasetPath);

    // currents[label] = next partition file where to place the tuple
    int[] currents = new int[dataset.nblabels()];

    // currents is initialized randomly in the range [0, numpartitions[
    Random random = RandomUtils.getRandom();
    for (int c = 0; c < currents.length; c++) {
        currents[c] = random.nextInt(numPartitions);

    // foreach tuple of the data
    Path dataPath = new Path(dataStr);
    FileSystem ifs = dataPath.getFileSystem(conf);
    FSDataInputStream input = ifs.open(dataPath);
    Scanner scanner = new Scanner(input, "UTF-8");
    DataConverter converter = new DataConverter(dataset);

    int id = 0;
    while (scanner.hasNextLine()) {
        if (id % 1000 == 0) {
            log.info("progress : {}", id);

        String line = scanner.nextLine();
        if (line.isEmpty()) {
            continue; // skip empty lines

        // write the tuple in files[tuple.label]
        Instance instance = converter.convert(line);
        int label = (int) dataset.getLabel(instance);

        // update currents
        if (currents[label] == numPartitions) {
            currents[label] = 0;

    // close all the files.
    for (FSDataOutputStream file : files) {
        Closeables.close(file, false);

    // merge all output files
    FileUtil.copyMerge(pfs, partsPath, fs, outputPath, true, conf, null);
     * FSDataOutputStream joined = fs.create(new Path(outputPath, "uniform.data")); for (int p = 0; p <
     * numPartitions; p++) {log.info("Joining part : {}", p); FSDataInputStream partStream =
     * fs.open(partPaths[p]);
     * IOUtils.copyBytes(partStream, joined, conf, false);
     * partStream.close(); }
     * joined.close();
     * fs.delete(partsPath, true);

From source file:org.apache.mahout.vectorizer.collocations.llr.CollocMapper.java

 * Collocation finder: pass 1 map phase.
 * Receives a token stream which gets passed through a Lucene ShingleFilter. The ShingleFilter delivers ngrams of
 * the appropriate size which are then decomposed into head and tail subgrams which are collected in the
 * following manner
 * <p/>
 * <pre>
 * k:head_key,           v:head_subgram
 * k:head_key,ngram_key, v:ngram
 * k:tail_key,           v:tail_subgram
 * k:tail_key,ngram_key, v:ngram
 * </pre>
 * <p/>
 * The 'head' or 'tail' prefix is used to specify whether the subgram in question is the head or tail of the
 * ngram. In this implementation the head of the ngram is a (n-1)gram, and the tail is a (1)gram.
 * <p/>
 * For example, given 'click and clack' and an ngram length of 3:
 * <pre>
 * k: head_'click and'                         v:head_'click and'
 * k: head_'click and',ngram_'click and clack' v:ngram_'click and clack'
 * k: tail_'clack',                            v:tail_'clack'
 * k: tail_'clack',ngram_'click and clack'     v:ngram_'click and clack'
 * </pre>
 * <p/>
 * Also counts the total number of ngrams encountered and adds it to the counter
 * CollocDriver.Count.NGRAM_TOTAL
 * </p>
 * @throws IOException if there's a problem with the ShingleFilter reading data or the collector collecting output.
protected void map(Text key, StringTuple value, final Context context)
        throws IOException, InterruptedException {

    ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()),
    try {
        int count = 0; // ngram count

        OpenObjectIntHashMap<String> ngrams = new OpenObjectIntHashMap<String>(
                value.getEntries().size() * (maxShingleSize - 1));
        OpenObjectIntHashMap<String> unigrams = new OpenObjectIntHashMap<String>(value.getEntries().size());

        do {
            String term = sf.getAttribute(CharTermAttribute.class).toString();
            String type = sf.getAttribute(TypeAttribute.class).type();
            if ("shingle".equals(type)) {
                ngrams.adjustOrPutValue(term, 1, 1);
            } else if (emitUnigrams && !term.isEmpty()) { // unigram
                unigrams.adjustOrPutValue(term, 1, 1);
        } while (sf.incrementToken());

        final GramKey gramKey = new GramKey();

        ngrams.forEachPair(new ObjectIntProcedure<String>() {
            public boolean apply(String term, int frequency) {
                // obtain components, the leading (n-1)gram and the trailing unigram.
                int i = term.lastIndexOf(' '); // TODO: fix for non-whitespace delimited languages.
                if (i != -1) { // bigram, trigram etc

                    try {
                        Gram ngram = new Gram(term, frequency, Gram.Type.NGRAM);
                        Gram head = new Gram(term.substring(0, i), frequency, Gram.Type.HEAD);
                        Gram tail = new Gram(term.substring(i + 1), frequency, Gram.Type.TAIL);

                        gramKey.set(head, EMPTY);
                        context.write(gramKey, head);

                        gramKey.set(head, ngram.getBytes());
                        context.write(gramKey, ngram);

                        gramKey.set(tail, EMPTY);
                        context.write(gramKey, tail);

                        gramKey.set(tail, ngram.getBytes());
                        context.write(gramKey, ngram);

                    } catch (IOException e) {
                        throw new IllegalStateException(e);
                    } catch (InterruptedException e) {
                        throw new IllegalStateException(e);
                return true;

        unigrams.forEachPair(new ObjectIntProcedure<String>() {
            public boolean apply(String term, int frequency) {
                try {
                    Gram unigram = new Gram(term, frequency, Gram.Type.UNIGRAM);
                    gramKey.set(unigram, EMPTY);
                    context.write(gramKey, unigram);
                } catch (IOException e) {
                    throw new IllegalStateException(e);
                } catch (InterruptedException e) {
                    throw new IllegalStateException(e);
                return true;

    } finally {
        Closeables.close(sf, true);

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.FPGrowthDriver.java

private static void runFPGrowth(Parameters params) throws IOException {
    log.info("Starting Sequential FPGrowth");
    int maxHeapSize = Integer.valueOf(params.get("maxHeapSize", "50"));
    int minSupport = Integer.valueOf(params.get("minSupport", "3"));

    Path output = new Path(params.get("output", "output.txt"));
    Path input = new Path(params.get("input"));

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(output.toUri(), conf);

    Charset encoding = Charset.forName(params.get("encoding"));

    String pattern = params.get("splitPattern", PFPGrowth.SPLITTER.toString());

    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, output, Text.class,

    FSDataInputStream inputStream = null;
    FSDataInputStream inputStreamAgain = null;

    Collection<String> features = Sets.newHashSet();

    if ("true".equals(params.get(PFPGrowth.USE_FPG2))) {
        com.cg.mapreduce.fpgrowth.mahout.fpm.fpgrowth2.FPGrowthObj<String> fp = new com.cg.mapreduce.fpgrowth.mahout.fpm.fpgrowth2.FPGrowthObj<String>();

            inputStream = fs.open(input);
            inputStreamAgain = fs.open(input);
                    new StringRecordIterator(new FileLineIterable(inputStream, encoding, false), pattern),
                    fp.generateFList(new StringRecordIterator(
                            new FileLineIterable(inputStreamAgain, encoding, false), pattern), minSupport),
                    minSupport, maxHeapSize, features,
                    new StringOutputConverter(
                            new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
                    new ContextStatusUpdater(null));
        } finally {
            Closeables.close(writer, false);
            Closeables.close(inputStream, true);
            Closeables.close(inputStreamAgain, true);
    } else {
        FPGrowth<String> fp = new FPGrowth<String>();

        inputStream = fs.open(input);
        inputStreamAgain = fs.open(input);
        try {
                    new StringRecordIterator(new FileLineIterable(inputStream, encoding, false), pattern),
                    fp.generateFList(new StringRecordIterator(
                            new FileLineIterable(inputStreamAgain, encoding, false), pattern), minSupport),
                    minSupport, maxHeapSize, features,
                    new StringOutputConverter(
                            new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
                    new ContextStatusUpdater(null));
        } finally {
            Closeables.close(writer, false);
            Closeables.close(inputStream, true);
            Closeables.close(inputStreamAgain, true);

    List<Pair<String, TopKStringPatterns>> frequentPatterns = FPGrowth.readFrequentPattern(conf, output);
    for (Pair<String, TopKStringPatterns> entry : frequentPatterns) {
        log.info("Dumping Patterns for Feature: {} \n{}", entry.getFirst(), entry.getSecond());

From source file:org.apache.mahout.clustering.classify.ClusterClassifier.java

public void writeToSeqFiles(Path path) throws IOException {
    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(path.toUri(), config);
    SequenceFile.Writer writer = null;
    ClusterWritable cw = new ClusterWritable();
    for (int i = 0; i < models.size(); i++) {
        try {
            Cluster cluster = models.get(i);
            writer = new SequenceFile.Writer(fs, config,
                    new Path(path, "part-" + String.format(Locale.ENGLISH, "%05d", i)), IntWritable.class,
            Writable key = new IntWritable(i);
            writer.append(key, cw);
        } finally {
            Closeables.close(writer, false);

From source file:org.apache.giraph.hive.jython.HiveJythonUtils.java

 * Parse set of Jython scripts from local files
 * @param interpreter PythonInterpreter to use
 * @return JythonJob
 * @throws IOException
public static JythonJob parseJythonFiles(PythonInterpreter interpreter, List<String> paths) throws IOException {
    InputStream[] streams = new InputStream[paths.size()];
    for (int i = 0; i < paths.size(); ++i) {
        LOG.info("Reading jython file " + paths.get(i));
        streams[i] = new FileInputStream(paths.get(i));

    JythonJob jythonJob;
    try {
        jythonJob = parseJythonStreams(interpreter, streams);
    } finally {
        for (InputStream stream : streams) {
            Closeables.close(stream, true);
    return jythonJob;

From source file:com.turn.ttorrent.tracker.client.HTTPTrackerClient.java

public static HTTPTrackerMessage toMessage(@Nonnull HttpResponse response,
        @CheckForSigned long maxContentLength) throws IOException {
    HttpEntity entity = response.getEntity();
    if (entity == null) // Usually 204-no-content, etc.
        return null;
        if (maxContentLength >= 0) {
            long contentLength = entity.getContentLength();
            if (contentLength >= 0)
                if (contentLength > maxContentLength)
                    throw new IllegalArgumentException(
                            "ContentLength was too big: " + contentLength + ": " + response);

        InputStream in = entity.getContent();
        if (in == null)
            return null;
        try {
            StreamBDecoder decoder = new StreamBDecoder(in);
            BEValue value = decoder.bdecodeMap();
            Map<String, BEValue> params = value.getMap();
            // TODO: "warning message"
            if (params.containsKey("failure reason"))
                return HTTPTrackerErrorMessage.fromBEValue(params);
                return HTTPAnnounceResponseMessage.fromBEValue(params);
        } finally {
            Closeables.close(in, true);
    } catch (InvalidBEncodingException e) {
        throw new IOException("Failed to parse response " + response, e);
    } catch (TrackerMessage.MessageValidationException e) {
        throw new IOException("Failed to parse response " + response, e);
    } finally {

From source file:com.minecave.pickaxes.util.nbt.EPNbtFactory.java

 * Load the content of a file from a stream.
 * Use {@link Files#newInputStreamSupplier(java.io.File)} to provide a stream from a file.
 * @param stream - the stream supplier.
 * @param option - whether or not to decompress the input stream.
 * @return The decoded NBT compound.
 * @throws IOException If anything went wrong.
public static NbtCompound fromStream(InputSupplier<? extends InputStream> stream, StreamOptions option)
        throws IOException {
    InputStream input = null;
    DataInputStream data = null;
    boolean suppress = true;

    try {
        input = stream.getInput();
        data = new DataInputStream(new BufferedInputStream(
                option == StreamOptions.GZIP_COMPRESSION ? new GZIPInputStream(input) : input));

        NbtCompound result = fromCompound(get().LOAD_COMPOUND.loadNbt(data));
        suppress = false;
        return result;

    } finally {
        if (data != null)
            Closeables.close(data, suppress);
        else if (input != null)
            Closeables.close(input, suppress);

From source file:org.apache.mahout.clustering.classify.ClusterClassifier.java

public static ClusteringPolicy readPolicy(Path path) throws IOException {
    Path policyPath = new Path(path, POLICY_FILE_NAME);
    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(policyPath.toUri(), config);
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, policyPath, config);
    Text key = new Text();
    ClusteringPolicyWritable cpw = new ClusteringPolicyWritable();
    Closeables.close(reader, true);
    return cpw.getValue();

From source file:org.apache.mahout.clustering.lda.LDAPrintTopics.java

private static void printTopWords(List<Queue<Pair<String, Double>>> topWords, File outputDir)
        throws IOException {
    for (int i = 0; i < topWords.size(); ++i) {
        Collection<Pair<String, Double>> topK = topWords.get(i);
        boolean printingToSystemOut = false;
        try {
            if (outputDir != null) {
                out = new OutputStreamWriter(new FileOutputStream(new File(outputDir, "topic_" + i)),
            } else {
                out = new OutputStreamWriter(System.out, Charsets.UTF_8);
                printingToSystemOut = true;
                out.write("Topic " + i);
            List<Pair<String, Double>> topKasList = Lists.newArrayListWithCapacity(topK.size());
            for (Pair<String, Double> wordWithScore : topK) {
            Collections.sort(topKasList, new Comparator<Pair<String, Double>>() {
                public int compare(Pair<String, Double> pair1, Pair<String, Double> pair2) {
                    return pair2.getSecond().compareTo(pair1.getSecond());
            for (Pair<String, Double> wordWithScore : topKasList) {
                out.write(wordWithScore.getFirst() + " [p(" + wordWithScore.getFirst() + "|topic_" + i + ") = "
                        + wordWithScore.getSecond());
        } finally {
            if (!printingToSystemOut) {
                Closeables.close(out, false);
            } else {