Example usage for com.google.common.io Closeables close

List of usage examples for com.google.common.io Closeables close

Introduction

In this page you can find the example usage for com.google.common.io Closeables close.

Prototype

public static void close(@Nullable Closeable closeable, boolean swallowIOException) throws IOException 

Source Link

Document

Closes a Closeable , with control over whether an IOException may be thrown.

Usage

From source file:org.apache.mahout.classifier.df.tools.UDistrib.java

private static void runTool(String dataStr, String datasetStr, String output, int numPartitions)
        throws IOException {

    Preconditions.checkArgument(numPartitions > 0, "numPartitions <= 0");

    // make sure the output file does not exist
    Path outputPath = new Path(output);
    Configuration conf = new Configuration();
    FileSystem fs = outputPath.getFileSystem(conf);

    Preconditions.checkArgument(!fs.exists(outputPath), "Output path already exists");

    // create a new file corresponding to each partition
    // Path workingDir = fs.getWorkingDirectory();
    // FileSystem wfs = workingDir.getFileSystem(conf);
    // File parentFile = new File(workingDir.toString());
    // File tempFile = FileUtil.createLocalTempFile(parentFile, "Parts", true);
    // File tempFile = File.createTempFile("df.tools.UDistrib","");
    // tempFile.deleteOnExit();
    File tempFile = FileUtil.createLocalTempFile(new File(""), "df.tools.UDistrib", true);
    Path partsPath = new Path(tempFile.toString());
    FileSystem pfs = partsPath.getFileSystem(conf);

    Path[] partPaths = new Path[numPartitions];
    FSDataOutputStream[] files = new FSDataOutputStream[numPartitions];
    for (int p = 0; p < numPartitions; p++) {
        partPaths[p] = new Path(partsPath, String.format(Locale.ENGLISH, "part.%03d", p));
        files[p] = pfs.create(partPaths[p]);
    }//from www  .  j a v  a  2s  .c  o m

    Path datasetPath = new Path(datasetStr);
    Dataset dataset = Dataset.load(conf, datasetPath);

    // currents[label] = next partition file where to place the tuple
    int[] currents = new int[dataset.nblabels()];

    // currents is initialized randomly in the range [0, numpartitions[
    Random random = RandomUtils.getRandom();
    for (int c = 0; c < currents.length; c++) {
        currents[c] = random.nextInt(numPartitions);
    }

    // foreach tuple of the data
    Path dataPath = new Path(dataStr);
    FileSystem ifs = dataPath.getFileSystem(conf);
    FSDataInputStream input = ifs.open(dataPath);
    Scanner scanner = new Scanner(input, "UTF-8");
    DataConverter converter = new DataConverter(dataset);

    int id = 0;
    while (scanner.hasNextLine()) {
        if (id % 1000 == 0) {
            log.info("progress : {}", id);
        }

        String line = scanner.nextLine();
        if (line.isEmpty()) {
            continue; // skip empty lines
        }

        // write the tuple in files[tuple.label]
        Instance instance = converter.convert(line);
        int label = (int) dataset.getLabel(instance);
        files[currents[label]].writeBytes(line);
        files[currents[label]].writeChar('\n');

        // update currents
        currents[label]++;
        if (currents[label] == numPartitions) {
            currents[label] = 0;
        }
    }

    // close all the files.
    scanner.close();
    for (FSDataOutputStream file : files) {
        Closeables.close(file, false);
    }

    // merge all output files
    FileUtil.copyMerge(pfs, partsPath, fs, outputPath, true, conf, null);
    /*
     * FSDataOutputStream joined = fs.create(new Path(outputPath, "uniform.data")); for (int p = 0; p <
     * numPartitions; p++) {log.info("Joining part : {}", p); FSDataInputStream partStream =
     * fs.open(partPaths[p]);
     * 
     * IOUtils.copyBytes(partStream, joined, conf, false);
     * 
     * partStream.close(); }
     * 
     * joined.close();
     * 
     * fs.delete(partsPath, true);
     */
}

From source file:org.apache.mahout.vectorizer.collocations.llr.CollocMapper.java

/**
 * Collocation finder: pass 1 map phase.
 * <p/>/*from   w  w w  . j  av  a2 s. com*/
 * Receives a token stream which gets passed through a Lucene ShingleFilter. The ShingleFilter delivers ngrams of
 * the appropriate size which are then decomposed into head and tail subgrams which are collected in the
 * following manner
 * <p/>
 * <pre>
 * k:head_key,           v:head_subgram
 * k:head_key,ngram_key, v:ngram
 * k:tail_key,           v:tail_subgram
 * k:tail_key,ngram_key, v:ngram
 * </pre>
 * <p/>
 * The 'head' or 'tail' prefix is used to specify whether the subgram in question is the head or tail of the
 * ngram. In this implementation the head of the ngram is a (n-1)gram, and the tail is a (1)gram.
 * <p/>
 * For example, given 'click and clack' and an ngram length of 3:
 * <pre>
 * k: head_'click and'                         v:head_'click and'
 * k: head_'click and',ngram_'click and clack' v:ngram_'click and clack'
 * k: tail_'clack',                            v:tail_'clack'
 * k: tail_'clack',ngram_'click and clack'     v:ngram_'click and clack'
 * </pre>
 * <p/>
 * Also counts the total number of ngrams encountered and adds it to the counter
 * CollocDriver.Count.NGRAM_TOTAL
 * </p>
 *
 * @throws IOException if there's a problem with the ShingleFilter reading data or the collector collecting output.
 */
@Override
protected void map(Text key, StringTuple value, final Context context)
        throws IOException, InterruptedException {

    ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()),
            maxShingleSize);
    sf.reset();
    try {
        int count = 0; // ngram count

        OpenObjectIntHashMap<String> ngrams = new OpenObjectIntHashMap<String>(
                value.getEntries().size() * (maxShingleSize - 1));
        OpenObjectIntHashMap<String> unigrams = new OpenObjectIntHashMap<String>(value.getEntries().size());

        do {
            String term = sf.getAttribute(CharTermAttribute.class).toString();
            String type = sf.getAttribute(TypeAttribute.class).type();
            if ("shingle".equals(type)) {
                count++;
                ngrams.adjustOrPutValue(term, 1, 1);
            } else if (emitUnigrams && !term.isEmpty()) { // unigram
                unigrams.adjustOrPutValue(term, 1, 1);
            }
        } while (sf.incrementToken());

        final GramKey gramKey = new GramKey();

        ngrams.forEachPair(new ObjectIntProcedure<String>() {
            @Override
            public boolean apply(String term, int frequency) {
                // obtain components, the leading (n-1)gram and the trailing unigram.
                int i = term.lastIndexOf(' '); // TODO: fix for non-whitespace delimited languages.
                if (i != -1) { // bigram, trigram etc

                    try {
                        Gram ngram = new Gram(term, frequency, Gram.Type.NGRAM);
                        Gram head = new Gram(term.substring(0, i), frequency, Gram.Type.HEAD);
                        Gram tail = new Gram(term.substring(i + 1), frequency, Gram.Type.TAIL);

                        gramKey.set(head, EMPTY);
                        context.write(gramKey, head);

                        gramKey.set(head, ngram.getBytes());
                        context.write(gramKey, ngram);

                        gramKey.set(tail, EMPTY);
                        context.write(gramKey, tail);

                        gramKey.set(tail, ngram.getBytes());
                        context.write(gramKey, ngram);

                    } catch (IOException e) {
                        throw new IllegalStateException(e);
                    } catch (InterruptedException e) {
                        throw new IllegalStateException(e);
                    }
                }
                return true;
            }
        });

        unigrams.forEachPair(new ObjectIntProcedure<String>() {
            @Override
            public boolean apply(String term, int frequency) {
                try {
                    Gram unigram = new Gram(term, frequency, Gram.Type.UNIGRAM);
                    gramKey.set(unigram, EMPTY);
                    context.write(gramKey, unigram);
                } catch (IOException e) {
                    throw new IllegalStateException(e);
                } catch (InterruptedException e) {
                    throw new IllegalStateException(e);
                }
                return true;
            }
        });

        context.getCounter(Count.NGRAM_TOTAL).increment(count);
        sf.end();
    } finally {
        Closeables.close(sf, true);
    }
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.FPGrowthDriver.java

private static void runFPGrowth(Parameters params) throws IOException {
    log.info("Starting Sequential FPGrowth");
    int maxHeapSize = Integer.valueOf(params.get("maxHeapSize", "50"));
    int minSupport = Integer.valueOf(params.get("minSupport", "3"));

    Path output = new Path(params.get("output", "output.txt"));
    Path input = new Path(params.get("input"));

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(output.toUri(), conf);

    Charset encoding = Charset.forName(params.get("encoding"));

    String pattern = params.get("splitPattern", PFPGrowth.SPLITTER.toString());

    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, output, Text.class,
            TopKStringPatterns.class);

    FSDataInputStream inputStream = null;
    FSDataInputStream inputStreamAgain = null;

    Collection<String> features = Sets.newHashSet();

    if ("true".equals(params.get(PFPGrowth.USE_FPG2))) {
        com.cg.mapreduce.fpgrowth.mahout.fpm.fpgrowth2.FPGrowthObj<String> fp = new com.cg.mapreduce.fpgrowth.mahout.fpm.fpgrowth2.FPGrowthObj<String>();

        try {//ww  w.  j a  v a2s  .  c o  m
            inputStream = fs.open(input);
            inputStreamAgain = fs.open(input);
            fp.generateTopKFrequentPatterns(
                    new StringRecordIterator(new FileLineIterable(inputStream, encoding, false), pattern),
                    fp.generateFList(new StringRecordIterator(
                            new FileLineIterable(inputStreamAgain, encoding, false), pattern), minSupport),
                    minSupport, maxHeapSize, features,
                    new StringOutputConverter(
                            new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
                    new ContextStatusUpdater(null));
        } finally {
            Closeables.close(writer, false);
            Closeables.close(inputStream, true);
            Closeables.close(inputStreamAgain, true);
        }
    } else {
        FPGrowth<String> fp = new FPGrowth<String>();

        inputStream = fs.open(input);
        inputStreamAgain = fs.open(input);
        try {
            fp.generateTopKFrequentPatterns(
                    new StringRecordIterator(new FileLineIterable(inputStream, encoding, false), pattern),
                    fp.generateFList(new StringRecordIterator(
                            new FileLineIterable(inputStreamAgain, encoding, false), pattern), minSupport),
                    minSupport, maxHeapSize, features,
                    new StringOutputConverter(
                            new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
                    new ContextStatusUpdater(null));
        } finally {
            Closeables.close(writer, false);
            Closeables.close(inputStream, true);
            Closeables.close(inputStreamAgain, true);
        }
    }

    List<Pair<String, TopKStringPatterns>> frequentPatterns = FPGrowth.readFrequentPattern(conf, output);
    for (Pair<String, TopKStringPatterns> entry : frequentPatterns) {
        log.info("Dumping Patterns for Feature: {} \n{}", entry.getFirst(), entry.getSecond());
    }
}

From source file:org.apache.mahout.clustering.classify.ClusterClassifier.java

public void writeToSeqFiles(Path path) throws IOException {
    writePolicy(policy, path);/*from   ww  w .  j a v  a 2  s  . co m*/
    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(path.toUri(), config);
    SequenceFile.Writer writer = null;
    ClusterWritable cw = new ClusterWritable();
    for (int i = 0; i < models.size(); i++) {
        try {
            Cluster cluster = models.get(i);
            cw.setValue(cluster);
            writer = new SequenceFile.Writer(fs, config,
                    new Path(path, "part-" + String.format(Locale.ENGLISH, "%05d", i)), IntWritable.class,
                    ClusterWritable.class);
            Writable key = new IntWritable(i);
            writer.append(key, cw);
        } finally {
            Closeables.close(writer, false);
        }
    }
}

From source file:edu.rosehulman.CollocMapper.java

/**
 * Collocation finder: pass 1 map phase.
 * <p/>/*ww w.  j a va2s .c o  m*/
 * Receives a token stream which gets passed through a Lucene ShingleFilter. The ShingleFilter delivers ngrams of
 * the appropriate size which are then decomposed into head and tail subgrams which are collected in the
 * following manner
 * <p/>
 * <pre>
 * k:head_key,           v:head_subgram
 * k:head_key,ngram_key, v:ngram
 * k:tail_key,           v:tail_subgram
 * k:tail_key,ngram_key, v:ngram
 * </pre>
 * <p/>
 * The 'head' or 'tail' prefix is used to specify whether the subgram in question is the head or tail of the
 * ngram. In this implementation the head of the ngram is a (n-1)gram, and the tail is a (1)gram.
 * <p/>
 * For example, given 'click and clack' and an ngram length of 3:
 * <pre>
 * k: head_'click and'                         v:head_'click and'
 * k: head_'click and',ngram_'click and clack' v:ngram_'click and clack'
 * k: tail_'clack',                            v:tail_'clack'
 * k: tail_'clack',ngram_'click and clack'     v:ngram_'click and clack'
 * </pre>
 * <p/>
 * Also counts the total number of ngrams encountered and adds it to the counter
 * CollocDriver.Count.NGRAM_TOTAL
 * </p>
 *
 * @throws IOException if there's a problem with the ShingleFilter reading data or the collector collecting output.
 */
@Override
protected void map(Text key, StringTuple value, final Context context)
        throws IOException, InterruptedException {

    ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()),
            maxShingleSize);
    sf.reset();
    try {
        int count = 0; // ngram count

        OpenObjectIntHashMap<String> ngrams = new OpenObjectIntHashMap<String>(
                value.getEntries().size() * (maxShingleSize - 1));
        OpenObjectIntHashMap<String> unigrams = new OpenObjectIntHashMap<String>(value.getEntries().size());

        do {
            String term = sf.getAttribute(CharTermAttribute.class).toString();
            String type = sf.getAttribute(TypeAttribute.class).type();
            if ("shingle".equals(type)) {
                count++;
                ngrams.adjustOrPutValue(term, 1, 1);
            } else if (emitUnigrams && !term.isEmpty()) { // unigram
                unigrams.adjustOrPutValue(term, 1, 1);
            }
        } while (sf.incrementToken());

        final GramKey gramKey = new GramKey();

        ngrams.forEachPair(new ObjectIntProcedure<String>() {
            public boolean apply(String term, int frequency) {
                // obtain components, the leading (n-1)gram and the trailing unigram.
                int i = term.lastIndexOf(' '); // TODO: fix for non-whitespace delimited languages.
                if (i != -1) { // bigram, trigram etc

                    try {
                        Gram ngram = new Gram(term, frequency, Gram.Type.NGRAM);
                        Gram head = new Gram(term.substring(0, i), frequency, Gram.Type.HEAD);
                        Gram tail = new Gram(term.substring(i + 1), frequency, Gram.Type.TAIL);

                        gramKey.set(head, EMPTY);
                        context.write(gramKey, head);

                        gramKey.set(head, ngram.getBytes());
                        context.write(gramKey, ngram);

                        gramKey.set(tail, EMPTY);
                        context.write(gramKey, tail);

                        gramKey.set(tail, ngram.getBytes());
                        context.write(gramKey, ngram);

                    } catch (IOException e) {
                        throw new IllegalStateException(e);
                    } catch (InterruptedException e) {
                        throw new IllegalStateException(e);
                    }
                }
                return true;
            }
        });

        unigrams.forEachPair(new ObjectIntProcedure<String>() {
            public boolean apply(String term, int frequency) {
                try {
                    Gram unigram = new Gram(term, frequency, Gram.Type.UNIGRAM);
                    gramKey.set(unigram, EMPTY);
                    context.write(gramKey, unigram);
                } catch (IOException e) {
                    throw new IllegalStateException(e);
                } catch (InterruptedException e) {
                    throw new IllegalStateException(e);
                }
                return true;
            }
        });

        context.getCounter(Count.NGRAM_TOTAL).increment(count);
        sf.end();
    } finally {
        Closeables.close(sf, true);
    }
}

From source file:org.apache.giraph.hive.jython.HiveJythonUtils.java

/**
 * Parse set of Jython scripts from local files
 *
 * @param interpreter PythonInterpreter to use
 * @param paths Jython files to parse/* w  w  w  .  j a v  a 2  s  .  co  m*/
 * @return JythonJob
 * @throws IOException
 */
public static JythonJob parseJythonFiles(PythonInterpreter interpreter, List<String> paths) throws IOException {
    InputStream[] streams = new InputStream[paths.size()];
    for (int i = 0; i < paths.size(); ++i) {
        LOG.info("Reading jython file " + paths.get(i));
        streams[i] = new FileInputStream(paths.get(i));
    }

    JythonJob jythonJob;
    try {
        jythonJob = parseJythonStreams(interpreter, streams);
    } finally {
        for (InputStream stream : streams) {
            Closeables.close(stream, true);
        }
    }
    return jythonJob;
}

From source file:com.turn.ttorrent.tracker.client.HTTPTrackerClient.java

@CheckForNull
public static HTTPTrackerMessage toMessage(@Nonnull HttpResponse response,
        @CheckForSigned long maxContentLength) throws IOException {
    HttpEntity entity = response.getEntity();
    if (entity == null) // Usually 204-no-content, etc.
        return null;
    try {// w w  w .j a  v  a2 s. c  o m
        if (maxContentLength >= 0) {
            long contentLength = entity.getContentLength();
            if (contentLength >= 0)
                if (contentLength > maxContentLength)
                    throw new IllegalArgumentException(
                            "ContentLength was too big: " + contentLength + ": " + response);
        }

        InputStream in = entity.getContent();
        if (in == null)
            return null;
        try {
            StreamBDecoder decoder = new StreamBDecoder(in);
            BEValue value = decoder.bdecodeMap();
            Map<String, BEValue> params = value.getMap();
            // TODO: "warning message"
            if (params.containsKey("failure reason"))
                return HTTPTrackerErrorMessage.fromBEValue(params);
            else
                return HTTPAnnounceResponseMessage.fromBEValue(params);
        } finally {
            Closeables.close(in, true);
        }
    } catch (InvalidBEncodingException e) {
        throw new IOException("Failed to parse response " + response, e);
    } catch (TrackerMessage.MessageValidationException e) {
        throw new IOException("Failed to parse response " + response, e);
    } finally {
        EntityUtils.consumeQuietly(entity);
    }
}

From source file:com.minecave.pickaxes.util.nbt.EPNbtFactory.java

/**
 * Load the content of a file from a stream.
 * <p/>/*from   w  w  w  .java 2s .  c om*/
 * Use {@link Files#newInputStreamSupplier(java.io.File)} to provide a stream from a file.
 *
 * @param stream - the stream supplier.
 * @param option - whether or not to decompress the input stream.
 * @return The decoded NBT compound.
 * @throws IOException If anything went wrong.
 */
public static NbtCompound fromStream(InputSupplier<? extends InputStream> stream, StreamOptions option)
        throws IOException {
    InputStream input = null;
    DataInputStream data = null;
    boolean suppress = true;

    try {
        input = stream.getInput();
        data = new DataInputStream(new BufferedInputStream(
                option == StreamOptions.GZIP_COMPRESSION ? new GZIPInputStream(input) : input));

        NbtCompound result = fromCompound(get().LOAD_COMPOUND.loadNbt(data));
        suppress = false;
        return result;

    } finally {
        if (data != null)
            Closeables.close(data, suppress);
        else if (input != null)
            Closeables.close(input, suppress);
    }
}

From source file:org.apache.mahout.clustering.classify.ClusterClassifier.java

public static ClusteringPolicy readPolicy(Path path) throws IOException {
    Path policyPath = new Path(path, POLICY_FILE_NAME);
    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(policyPath.toUri(), config);
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, policyPath, config);
    Text key = new Text();
    ClusteringPolicyWritable cpw = new ClusteringPolicyWritable();
    reader.next(key, cpw);/* w  w w .  j a v a2 s  .co  m*/
    Closeables.close(reader, true);
    return cpw.getValue();
}

From source file:org.apache.mahout.clustering.lda.LDAPrintTopics.java

private static void printTopWords(List<Queue<Pair<String, Double>>> topWords, File outputDir)
        throws IOException {
    for (int i = 0; i < topWords.size(); ++i) {
        Collection<Pair<String, Double>> topK = topWords.get(i);
        Writer out = null;/*from  w ww  .j av a2s .c  o m*/
        boolean printingToSystemOut = false;
        try {
            if (outputDir != null) {
                out = new OutputStreamWriter(new FileOutputStream(new File(outputDir, "topic_" + i)),
                        Charsets.UTF_8);
            } else {
                out = new OutputStreamWriter(System.out, Charsets.UTF_8);
                printingToSystemOut = true;
                out.write("Topic " + i);
                out.write('\n');
                out.write("===========");
                out.write('\n');
            }
            List<Pair<String, Double>> topKasList = Lists.newArrayListWithCapacity(topK.size());
            for (Pair<String, Double> wordWithScore : topK) {
                topKasList.add(wordWithScore);
            }
            Collections.sort(topKasList, new Comparator<Pair<String, Double>>() {
                @Override
                public int compare(Pair<String, Double> pair1, Pair<String, Double> pair2) {
                    return pair2.getSecond().compareTo(pair1.getSecond());
                }
            });
            for (Pair<String, Double> wordWithScore : topKasList) {
                out.write(wordWithScore.getFirst() + " [p(" + wordWithScore.getFirst() + "|topic_" + i + ") = "
                        + wordWithScore.getSecond());
                out.write('\n');
            }
        } finally {
            if (!printingToSystemOut) {
                Closeables.close(out, false);
            } else {
                out.flush();
            }
        }
    }
}