List of usage examples for com.google.common.io Closeables close
public static void close(@Nullable Closeable closeable, boolean swallowIOException) throws IOException
From source file:org.apache.mahout.classifier.df.tools.UDistrib.java
private static void runTool(String dataStr, String datasetStr, String output, int numPartitions) throws IOException { Preconditions.checkArgument(numPartitions > 0, "numPartitions <= 0"); // make sure the output file does not exist Path outputPath = new Path(output); Configuration conf = new Configuration(); FileSystem fs = outputPath.getFileSystem(conf); Preconditions.checkArgument(!fs.exists(outputPath), "Output path already exists"); // create a new file corresponding to each partition // Path workingDir = fs.getWorkingDirectory(); // FileSystem wfs = workingDir.getFileSystem(conf); // File parentFile = new File(workingDir.toString()); // File tempFile = FileUtil.createLocalTempFile(parentFile, "Parts", true); // File tempFile = File.createTempFile("df.tools.UDistrib",""); // tempFile.deleteOnExit(); File tempFile = FileUtil.createLocalTempFile(new File(""), "df.tools.UDistrib", true); Path partsPath = new Path(tempFile.toString()); FileSystem pfs = partsPath.getFileSystem(conf); Path[] partPaths = new Path[numPartitions]; FSDataOutputStream[] files = new FSDataOutputStream[numPartitions]; for (int p = 0; p < numPartitions; p++) { partPaths[p] = new Path(partsPath, String.format(Locale.ENGLISH, "part.%03d", p)); files[p] = pfs.create(partPaths[p]); }//from www . j a v a 2s .c o m Path datasetPath = new Path(datasetStr); Dataset dataset = Dataset.load(conf, datasetPath); // currents[label] = next partition file where to place the tuple int[] currents = new int[dataset.nblabels()]; // currents is initialized randomly in the range [0, numpartitions[ Random random = RandomUtils.getRandom(); for (int c = 0; c < currents.length; c++) { currents[c] = random.nextInt(numPartitions); } // foreach tuple of the data Path dataPath = new Path(dataStr); FileSystem ifs = dataPath.getFileSystem(conf); FSDataInputStream input = ifs.open(dataPath); Scanner scanner = new Scanner(input, "UTF-8"); DataConverter converter = new DataConverter(dataset); int id = 0; while (scanner.hasNextLine()) { if (id % 1000 == 0) { log.info("progress : {}", id); } String line = scanner.nextLine(); if (line.isEmpty()) { continue; // skip empty lines } // write the tuple in files[tuple.label] Instance instance = converter.convert(line); int label = (int) dataset.getLabel(instance); files[currents[label]].writeBytes(line); files[currents[label]].writeChar('\n'); // update currents currents[label]++; if (currents[label] == numPartitions) { currents[label] = 0; } } // close all the files. scanner.close(); for (FSDataOutputStream file : files) { Closeables.close(file, false); } // merge all output files FileUtil.copyMerge(pfs, partsPath, fs, outputPath, true, conf, null); /* * FSDataOutputStream joined = fs.create(new Path(outputPath, "uniform.data")); for (int p = 0; p < * numPartitions; p++) {log.info("Joining part : {}", p); FSDataInputStream partStream = * fs.open(partPaths[p]); * * IOUtils.copyBytes(partStream, joined, conf, false); * * partStream.close(); } * * joined.close(); * * fs.delete(partsPath, true); */ }
From source file:org.apache.mahout.vectorizer.collocations.llr.CollocMapper.java
/** * Collocation finder: pass 1 map phase. * <p/>/*from w w w . j av a2 s. com*/ * Receives a token stream which gets passed through a Lucene ShingleFilter. The ShingleFilter delivers ngrams of * the appropriate size which are then decomposed into head and tail subgrams which are collected in the * following manner * <p/> * <pre> * k:head_key, v:head_subgram * k:head_key,ngram_key, v:ngram * k:tail_key, v:tail_subgram * k:tail_key,ngram_key, v:ngram * </pre> * <p/> * The 'head' or 'tail' prefix is used to specify whether the subgram in question is the head or tail of the * ngram. In this implementation the head of the ngram is a (n-1)gram, and the tail is a (1)gram. * <p/> * For example, given 'click and clack' and an ngram length of 3: * <pre> * k: head_'click and' v:head_'click and' * k: head_'click and',ngram_'click and clack' v:ngram_'click and clack' * k: tail_'clack', v:tail_'clack' * k: tail_'clack',ngram_'click and clack' v:ngram_'click and clack' * </pre> * <p/> * Also counts the total number of ngrams encountered and adds it to the counter * CollocDriver.Count.NGRAM_TOTAL * </p> * * @throws IOException if there's a problem with the ShingleFilter reading data or the collector collecting output. */ @Override protected void map(Text key, StringTuple value, final Context context) throws IOException, InterruptedException { ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()), maxShingleSize); sf.reset(); try { int count = 0; // ngram count OpenObjectIntHashMap<String> ngrams = new OpenObjectIntHashMap<String>( value.getEntries().size() * (maxShingleSize - 1)); OpenObjectIntHashMap<String> unigrams = new OpenObjectIntHashMap<String>(value.getEntries().size()); do { String term = sf.getAttribute(CharTermAttribute.class).toString(); String type = sf.getAttribute(TypeAttribute.class).type(); if ("shingle".equals(type)) { count++; ngrams.adjustOrPutValue(term, 1, 1); } else if (emitUnigrams && !term.isEmpty()) { // unigram unigrams.adjustOrPutValue(term, 1, 1); } } while (sf.incrementToken()); final GramKey gramKey = new GramKey(); ngrams.forEachPair(new ObjectIntProcedure<String>() { @Override public boolean apply(String term, int frequency) { // obtain components, the leading (n-1)gram and the trailing unigram. int i = term.lastIndexOf(' '); // TODO: fix for non-whitespace delimited languages. if (i != -1) { // bigram, trigram etc try { Gram ngram = new Gram(term, frequency, Gram.Type.NGRAM); Gram head = new Gram(term.substring(0, i), frequency, Gram.Type.HEAD); Gram tail = new Gram(term.substring(i + 1), frequency, Gram.Type.TAIL); gramKey.set(head, EMPTY); context.write(gramKey, head); gramKey.set(head, ngram.getBytes()); context.write(gramKey, ngram); gramKey.set(tail, EMPTY); context.write(gramKey, tail); gramKey.set(tail, ngram.getBytes()); context.write(gramKey, ngram); } catch (IOException e) { throw new IllegalStateException(e); } catch (InterruptedException e) { throw new IllegalStateException(e); } } return true; } }); unigrams.forEachPair(new ObjectIntProcedure<String>() { @Override public boolean apply(String term, int frequency) { try { Gram unigram = new Gram(term, frequency, Gram.Type.UNIGRAM); gramKey.set(unigram, EMPTY); context.write(gramKey, unigram); } catch (IOException e) { throw new IllegalStateException(e); } catch (InterruptedException e) { throw new IllegalStateException(e); } return true; } }); context.getCounter(Count.NGRAM_TOTAL).increment(count); sf.end(); } finally { Closeables.close(sf, true); } }
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.FPGrowthDriver.java
private static void runFPGrowth(Parameters params) throws IOException { log.info("Starting Sequential FPGrowth"); int maxHeapSize = Integer.valueOf(params.get("maxHeapSize", "50")); int minSupport = Integer.valueOf(params.get("minSupport", "3")); Path output = new Path(params.get("output", "output.txt")); Path input = new Path(params.get("input")); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(output.toUri(), conf); Charset encoding = Charset.forName(params.get("encoding")); String pattern = params.get("splitPattern", PFPGrowth.SPLITTER.toString()); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, output, Text.class, TopKStringPatterns.class); FSDataInputStream inputStream = null; FSDataInputStream inputStreamAgain = null; Collection<String> features = Sets.newHashSet(); if ("true".equals(params.get(PFPGrowth.USE_FPG2))) { com.cg.mapreduce.fpgrowth.mahout.fpm.fpgrowth2.FPGrowthObj<String> fp = new com.cg.mapreduce.fpgrowth.mahout.fpm.fpgrowth2.FPGrowthObj<String>(); try {//ww w. j a v a2s . c o m inputStream = fs.open(input); inputStreamAgain = fs.open(input); fp.generateTopKFrequentPatterns( new StringRecordIterator(new FileLineIterable(inputStream, encoding, false), pattern), fp.generateFList(new StringRecordIterator( new FileLineIterable(inputStreamAgain, encoding, false), pattern), minSupport), minSupport, maxHeapSize, features, new StringOutputConverter( new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)), new ContextStatusUpdater(null)); } finally { Closeables.close(writer, false); Closeables.close(inputStream, true); Closeables.close(inputStreamAgain, true); } } else { FPGrowth<String> fp = new FPGrowth<String>(); inputStream = fs.open(input); inputStreamAgain = fs.open(input); try { fp.generateTopKFrequentPatterns( new StringRecordIterator(new FileLineIterable(inputStream, encoding, false), pattern), fp.generateFList(new StringRecordIterator( new FileLineIterable(inputStreamAgain, encoding, false), pattern), minSupport), minSupport, maxHeapSize, features, new StringOutputConverter( new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)), new ContextStatusUpdater(null)); } finally { Closeables.close(writer, false); Closeables.close(inputStream, true); Closeables.close(inputStreamAgain, true); } } List<Pair<String, TopKStringPatterns>> frequentPatterns = FPGrowth.readFrequentPattern(conf, output); for (Pair<String, TopKStringPatterns> entry : frequentPatterns) { log.info("Dumping Patterns for Feature: {} \n{}", entry.getFirst(), entry.getSecond()); } }
From source file:org.apache.mahout.clustering.classify.ClusterClassifier.java
public void writeToSeqFiles(Path path) throws IOException { writePolicy(policy, path);/*from ww w . j a v a 2 s . co m*/ Configuration config = new Configuration(); FileSystem fs = FileSystem.get(path.toUri(), config); SequenceFile.Writer writer = null; ClusterWritable cw = new ClusterWritable(); for (int i = 0; i < models.size(); i++) { try { Cluster cluster = models.get(i); cw.setValue(cluster); writer = new SequenceFile.Writer(fs, config, new Path(path, "part-" + String.format(Locale.ENGLISH, "%05d", i)), IntWritable.class, ClusterWritable.class); Writable key = new IntWritable(i); writer.append(key, cw); } finally { Closeables.close(writer, false); } } }
From source file:edu.rosehulman.CollocMapper.java
/** * Collocation finder: pass 1 map phase. * <p/>/*ww w. j a va2s .c o m*/ * Receives a token stream which gets passed through a Lucene ShingleFilter. The ShingleFilter delivers ngrams of * the appropriate size which are then decomposed into head and tail subgrams which are collected in the * following manner * <p/> * <pre> * k:head_key, v:head_subgram * k:head_key,ngram_key, v:ngram * k:tail_key, v:tail_subgram * k:tail_key,ngram_key, v:ngram * </pre> * <p/> * The 'head' or 'tail' prefix is used to specify whether the subgram in question is the head or tail of the * ngram. In this implementation the head of the ngram is a (n-1)gram, and the tail is a (1)gram. * <p/> * For example, given 'click and clack' and an ngram length of 3: * <pre> * k: head_'click and' v:head_'click and' * k: head_'click and',ngram_'click and clack' v:ngram_'click and clack' * k: tail_'clack', v:tail_'clack' * k: tail_'clack',ngram_'click and clack' v:ngram_'click and clack' * </pre> * <p/> * Also counts the total number of ngrams encountered and adds it to the counter * CollocDriver.Count.NGRAM_TOTAL * </p> * * @throws IOException if there's a problem with the ShingleFilter reading data or the collector collecting output. */ @Override protected void map(Text key, StringTuple value, final Context context) throws IOException, InterruptedException { ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()), maxShingleSize); sf.reset(); try { int count = 0; // ngram count OpenObjectIntHashMap<String> ngrams = new OpenObjectIntHashMap<String>( value.getEntries().size() * (maxShingleSize - 1)); OpenObjectIntHashMap<String> unigrams = new OpenObjectIntHashMap<String>(value.getEntries().size()); do { String term = sf.getAttribute(CharTermAttribute.class).toString(); String type = sf.getAttribute(TypeAttribute.class).type(); if ("shingle".equals(type)) { count++; ngrams.adjustOrPutValue(term, 1, 1); } else if (emitUnigrams && !term.isEmpty()) { // unigram unigrams.adjustOrPutValue(term, 1, 1); } } while (sf.incrementToken()); final GramKey gramKey = new GramKey(); ngrams.forEachPair(new ObjectIntProcedure<String>() { public boolean apply(String term, int frequency) { // obtain components, the leading (n-1)gram and the trailing unigram. int i = term.lastIndexOf(' '); // TODO: fix for non-whitespace delimited languages. if (i != -1) { // bigram, trigram etc try { Gram ngram = new Gram(term, frequency, Gram.Type.NGRAM); Gram head = new Gram(term.substring(0, i), frequency, Gram.Type.HEAD); Gram tail = new Gram(term.substring(i + 1), frequency, Gram.Type.TAIL); gramKey.set(head, EMPTY); context.write(gramKey, head); gramKey.set(head, ngram.getBytes()); context.write(gramKey, ngram); gramKey.set(tail, EMPTY); context.write(gramKey, tail); gramKey.set(tail, ngram.getBytes()); context.write(gramKey, ngram); } catch (IOException e) { throw new IllegalStateException(e); } catch (InterruptedException e) { throw new IllegalStateException(e); } } return true; } }); unigrams.forEachPair(new ObjectIntProcedure<String>() { public boolean apply(String term, int frequency) { try { Gram unigram = new Gram(term, frequency, Gram.Type.UNIGRAM); gramKey.set(unigram, EMPTY); context.write(gramKey, unigram); } catch (IOException e) { throw new IllegalStateException(e); } catch (InterruptedException e) { throw new IllegalStateException(e); } return true; } }); context.getCounter(Count.NGRAM_TOTAL).increment(count); sf.end(); } finally { Closeables.close(sf, true); } }
From source file:org.apache.giraph.hive.jython.HiveJythonUtils.java
/** * Parse set of Jython scripts from local files * * @param interpreter PythonInterpreter to use * @param paths Jython files to parse/* w w w . j a v a 2 s . co m*/ * @return JythonJob * @throws IOException */ public static JythonJob parseJythonFiles(PythonInterpreter interpreter, List<String> paths) throws IOException { InputStream[] streams = new InputStream[paths.size()]; for (int i = 0; i < paths.size(); ++i) { LOG.info("Reading jython file " + paths.get(i)); streams[i] = new FileInputStream(paths.get(i)); } JythonJob jythonJob; try { jythonJob = parseJythonStreams(interpreter, streams); } finally { for (InputStream stream : streams) { Closeables.close(stream, true); } } return jythonJob; }
From source file:com.turn.ttorrent.tracker.client.HTTPTrackerClient.java
@CheckForNull public static HTTPTrackerMessage toMessage(@Nonnull HttpResponse response, @CheckForSigned long maxContentLength) throws IOException { HttpEntity entity = response.getEntity(); if (entity == null) // Usually 204-no-content, etc. return null; try {// w w w .j a v a2 s. c o m if (maxContentLength >= 0) { long contentLength = entity.getContentLength(); if (contentLength >= 0) if (contentLength > maxContentLength) throw new IllegalArgumentException( "ContentLength was too big: " + contentLength + ": " + response); } InputStream in = entity.getContent(); if (in == null) return null; try { StreamBDecoder decoder = new StreamBDecoder(in); BEValue value = decoder.bdecodeMap(); Map<String, BEValue> params = value.getMap(); // TODO: "warning message" if (params.containsKey("failure reason")) return HTTPTrackerErrorMessage.fromBEValue(params); else return HTTPAnnounceResponseMessage.fromBEValue(params); } finally { Closeables.close(in, true); } } catch (InvalidBEncodingException e) { throw new IOException("Failed to parse response " + response, e); } catch (TrackerMessage.MessageValidationException e) { throw new IOException("Failed to parse response " + response, e); } finally { EntityUtils.consumeQuietly(entity); } }
From source file:com.minecave.pickaxes.util.nbt.EPNbtFactory.java
/** * Load the content of a file from a stream. * <p/>/*from w w w .java 2s . c om*/ * Use {@link Files#newInputStreamSupplier(java.io.File)} to provide a stream from a file. * * @param stream - the stream supplier. * @param option - whether or not to decompress the input stream. * @return The decoded NBT compound. * @throws IOException If anything went wrong. */ public static NbtCompound fromStream(InputSupplier<? extends InputStream> stream, StreamOptions option) throws IOException { InputStream input = null; DataInputStream data = null; boolean suppress = true; try { input = stream.getInput(); data = new DataInputStream(new BufferedInputStream( option == StreamOptions.GZIP_COMPRESSION ? new GZIPInputStream(input) : input)); NbtCompound result = fromCompound(get().LOAD_COMPOUND.loadNbt(data)); suppress = false; return result; } finally { if (data != null) Closeables.close(data, suppress); else if (input != null) Closeables.close(input, suppress); } }
From source file:org.apache.mahout.clustering.classify.ClusterClassifier.java
public static ClusteringPolicy readPolicy(Path path) throws IOException { Path policyPath = new Path(path, POLICY_FILE_NAME); Configuration config = new Configuration(); FileSystem fs = FileSystem.get(policyPath.toUri(), config); SequenceFile.Reader reader = new SequenceFile.Reader(fs, policyPath, config); Text key = new Text(); ClusteringPolicyWritable cpw = new ClusteringPolicyWritable(); reader.next(key, cpw);/* w w w . j a v a2 s .co m*/ Closeables.close(reader, true); return cpw.getValue(); }
From source file:org.apache.mahout.clustering.lda.LDAPrintTopics.java
private static void printTopWords(List<Queue<Pair<String, Double>>> topWords, File outputDir) throws IOException { for (int i = 0; i < topWords.size(); ++i) { Collection<Pair<String, Double>> topK = topWords.get(i); Writer out = null;/*from w ww .j av a2s .c o m*/ boolean printingToSystemOut = false; try { if (outputDir != null) { out = new OutputStreamWriter(new FileOutputStream(new File(outputDir, "topic_" + i)), Charsets.UTF_8); } else { out = new OutputStreamWriter(System.out, Charsets.UTF_8); printingToSystemOut = true; out.write("Topic " + i); out.write('\n'); out.write("==========="); out.write('\n'); } List<Pair<String, Double>> topKasList = Lists.newArrayListWithCapacity(topK.size()); for (Pair<String, Double> wordWithScore : topK) { topKasList.add(wordWithScore); } Collections.sort(topKasList, new Comparator<Pair<String, Double>>() { @Override public int compare(Pair<String, Double> pair1, Pair<String, Double> pair2) { return pair2.getSecond().compareTo(pair1.getSecond()); } }); for (Pair<String, Double> wordWithScore : topKasList) { out.write(wordWithScore.getFirst() + " [p(" + wordWithScore.getFirst() + "|topic_" + i + ") = " + wordWithScore.getSecond()); out.write('\n'); } } finally { if (!printingToSystemOut) { Closeables.close(out, false); } else { out.flush(); } } } }