List of usage examples for org.apache.hadoop.io IOUtils closeStream
public static void closeStream(java.io.Closeable stream)
From source file:org.archive.io.hbase.HBaseWriter.java
License:LGPL
/** * Write the crawled output to the configured HBase table. Write each row * key as the url with reverse domain and optionally process any content. * /*from ww w. ja v a2s . co m*/ * @param curi * URI of crawled document * @param ip * IP of remote machine. * @param recordingOutputStream * recording input stream that captured the response * @param recordingInputStream * recording output stream that captured the GET request * * @throws IOException * Signals that an I/O exception has occurred. */ public void write(final HBaseWriterProcessor hBaseWriterProcessor, final CrawlURI curi, final String ip, final RecordingOutputStream recordingOutputStream, final RecordingInputStream recordingInputStream, long recordedSize) throws IOException { // generate the target url of the crawled document String url = curi.toString(); // create the hbase friendly rowkey String rowKey = createRowKeyFromUrl(url); if (log.isLoggable(Level.FINE)) { log.log(Level.FINE, "Writing " + url + " as " + rowKey); } // Modify the row key if its supposed to be stored in MD5 format if (getHbaseParameters().isMd5Key()) { rowKey = DigestUtils.md5Hex(rowKey); } // create an hbase mutation object (the put object) // The cell timestamp is the same for all cells in each Put, and its set // to the time the server finished responding back to heritrix. Put put = new Put(Bytes.toBytes(rowKey), curi.getFetchCompletedTime()); // write the target url to the url column addSerializedDataToPut(put, getHbaseParameters().getCuriColumnFamily(), getHbaseParameters().getUrlColumnName(), url); // write the target ip to the ip column addSerializedDataToPut(put, getHbaseParameters().getCuriColumnFamily(), getHbaseParameters().getIpColumnName(), ip); // is the url part of the seed url (the initial url(s) used to start the // crawl) if (curi.isSeed()) { addSerializedDataToPut(put, getHbaseParameters().getCuriColumnFamily(), getHbaseParameters().getIsSeedColumnName(), Bytes.toBytes(Boolean.TRUE.booleanValue())); if (curi.getPathFromSeed() != null && curi.getPathFromSeed().trim().length() > 0) { addSerializedDataToPut(put, getHbaseParameters().getCuriColumnFamily(), getHbaseParameters().getPathFromSeedColumnName(), curi.getPathFromSeed()); } } // write the Via string addSerializedDataToPut(put, getHbaseParameters().getCuriColumnFamily(), getHbaseParameters().getViaColumnName(), curi.getVia() != null ? curi.getVia().toString() : null); // log the content length addSerializedDataToPut(put, getHbaseParameters().getCuriColumnFamily(), getHbaseParameters().getContentLengthColumnName(), String.valueOf(curi.getContentLength())); // write out the content size addSerializedDataToPut(put, getHbaseParameters().getCuriColumnFamily(), getHbaseParameters().getContentSizeColumnName(), String.valueOf(recordedSize)); // write out the numbre of fetch attempts addSerializedDataToPut(put, getHbaseParameters().getCuriColumnFamily(), getHbaseParameters().getFetchAttmptsColumnName(), String.valueOf(curi.getFetchAttempts())); // write out the time duration it took to fetch addSerializedDataToPut(put, getHbaseParameters().getCuriColumnFamily(), getHbaseParameters().getFetchDurationColumnName(), String.valueOf(curi.getFetchDuration())); // write out the content type from the server addSerializedDataToPut(put, getHbaseParameters().getCuriColumnFamily(), getHbaseParameters().getContentTypeColumnName(), String.valueOf(curi.getContentType())); // write the added annotations if (curi.getAnnotations() != null && curi.getAnnotations().size() > 0) { addSerializedDataToPut(put, getHbaseParameters().getCuriColumnFamily(), getHbaseParameters().getFetchAnnotationsColumnName(), StringUtils.join(curi.getAnnotations(), getHbaseParameters().getFetchAnnotationsValueDelimiter())); } // server request ReplayInputStream requestStream = recordingOutputStream.getReplayInputStream(); // server response ReplayInputStream responseStream = recordingInputStream.getReplayInputStream(); try { // Write the Crawl Request to the Put object if (recordingOutputStream.getSize() > 0) { addSerializedDataToPut(put, getHbaseParameters().getCuriColumnFamily(), getHbaseParameters().getRequestColumnName(), getByteArrayFromInputStream(requestStream, (int) recordingOutputStream.getSize())); } // Write the Crawl Response to the Put object // add the raw content to the table record. addSerializedDataToPut(put, getHbaseParameters().getContentColumnFamily(), getHbaseParameters().getContentColumnName(), getByteArrayFromInputStream(responseStream, (int) recordingInputStream.getSize())); // call the method that can be overridden from hbaseWriterProcessor hBaseWriterProcessor.modifyPut(getHbaseParameters(), curi, ip, put, recordingOutputStream, recordingInputStream); // write the Put object to the HBase table hTable.put(put); } finally { // we are closing the streams once we are done using them IOUtils.closeStream(requestStream); IOUtils.closeStream(responseStream); } }
From source file:org.avenir.util.EntityDistanceMapFileAccessor.java
License:Apache License
/** * @param filePathParam//from w w w. j a va 2 s . c o m * @param delim * @throws IOException */ public void write(String inPutfilePathParam, String outPutfilePathParam, String delim) throws IOException { InputStream fs = Utility.getFileStream(conf, inPutfilePathParam); if (null != fs) { BufferedReader reader = new BufferedReader(new InputStreamReader(fs)); String line = null; Path outputFile = new Path(conf.get(outPutfilePathParam)); Text txtKey = new Text(); Text txtValue = new Text(); MapFile.Writer writer = new MapFile.Writer(conf, fileSys, outputFile.toString(), txtKey.getClass(), txtKey.getClass()); while ((line = reader.readLine()) != null) { int pos = line.indexOf(delim); String key = line.substring(0, pos); String value = line.substring(pos + 1); txtKey.set(key); txtValue.set(value); writer.append(txtKey, txtValue); } IOUtils.closeStream(writer); this.delim = delim; } }
From source file:org.avenir.util.EntityDistanceMapFileAccessor.java
License:Apache License
/** * */ public void closeReader() { IOUtils.closeStream(reader); }
From source file:org.deeplearning4j.utils.ShowData2UIServer.java
License:Apache License
public static void main(String[] args) throws Exception { //load the data saved, and then it'll be showed ,in the browser:http://localhost:9000 File statsFile = null;/*from www . j av a2 s .c o m*/ if (hdfsPath) { statsFile = File.createTempFile("tmp", "dl4j"); OutputStream os = new FileOutputStream(statsFile); FileSystem fs = CommonUtils.openHdfsConnect(); InputStream in = fs.open(new Path("/user/hadoop/trainlog/AnimalModelByHdfsTrainingStatsSpark2.dl4j")); IOUtils.copyBytes(in, os, 4096, false); //?? IOUtils.closeStream(in); CommonUtils.closeHdfsConnect(fs); os.close(); } else { statsFile = new File("/home/AnimalModelByHdfsTrainingStats1.dl4j"); } StatsStorage statsStorage = new FileStatsStorage(statsFile); UIServer uiServer = UIServer.getInstance(); uiServer.attach(statsStorage); }
From source file:org.elasticsearch.hadoop.hdfs.blobstore.HdfsImmutableBlobContainer.java
License:Apache License
@Override public void writeBlob(final String blobName, final InputStream is, final long sizeInBytes, final WriterListener listener) { blobStore.executor().execute(new Runnable() { @Override/*from www. j av a2 s.c om*/ public void run() { Path file; FSDataOutputStream fileStream; try { file = new Path(path, blobName); fileStream = blobStore.fileSystem().create(file, true); } catch (Throwable th) { listener.onFailure(th); return; } try { try { byte[] buffer = new byte[blobStore.bufferSizeInBytes()]; int bytesRead; while ((bytesRead = is.read(buffer)) != -1) { fileStream.write(buffer, 0, bytesRead); } } finally { IOUtils.closeStream(is); IOUtils.closeStream(fileStream); } listener.onCompleted(); } catch (Throwable th) { // just on the safe size, try and delete it on failure try { if (blobStore.fileSystem().exists(file)) { blobStore.fileSystem().delete(file, true); } } catch (Throwable t) { // ignore } listener.onFailure(th); } } }); }
From source file:org.elasticsearch.repositories.hdfs.HdfsRepository.java
License:Apache License
@Override protected void doClose() throws ElasticsearchException { super.doClose(); IOUtils.closeStream(fs); fs = null;// ww w. ja v a2 s . c om concurrentStreamPool.shutdown(); }
From source file:org.geotools.WholeFile.WholeFileRecordReader.java
License:Apache License
@Override public boolean next(Text key, BytesWritable value) throws IOException { if (!processed) { byte[] contents = new byte[(int) fileSplit.getLength()]; Path file = fileSplit.getPath(); String fileName = file.getName(); key.set(fileName);/*from www . j a v a2 s .c o m*/ FileSystem fs = file.getFileSystem(conf); FSDataInputStream in = null; try { in = fs.open(file); IOUtils.readFully(in, contents, 0, contents.length); value.set(contents, 0, contents.length); } finally { IOUtils.closeStream(in); } processed = true; return true; } return false; }
From source file:org.geotools.WholeFile.WholeFileRecordReader_NewAPI.java
License:Apache License
public boolean nextKeyValue() throws IOException { if (!processed) { byte[] contents = new byte[(int) fileSplit.getLength()]; Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); FSDataInputStream in = null;/*from w w w.ja v a2 s . c o m*/ try { in = fs.open(file); IOUtils.readFully(in, contents, 0, contents.length); value.set(contents, 0, contents.length); } finally { IOUtils.closeStream(in); } processed = true; return true; } return false; }
From source file:org.hadoop.tdg.TestPseudoHadoop.java
License:Apache License
@BeforeClass public static void setUpClass() throws IOException { RandomAccessFile f = null;/*www . j a va 2 s .c o m*/ try { /*f = new File(HOME_FILE); FileOutputStream out = new FileOutputStream(f); out.write("content".getBytes("UTF-8")); out.flush();*/ f = new RandomAccessFile(HOME_FILE, "rw"); f.setLength(SIZE); } finally { IOUtils.closeStream(f); } }
From source file:org.hadoop.tdg.TestPseudoHadoop.java
License:Apache License
public void copyFileWithProgress() throws IOException { InputStream in = null;//w ww.ja v a 2 s . com FSDataOutputStream out = null; try { in = new BufferedInputStream(new FileInputStream(HOME_FILE)); // FileSystem fs = FileSystem.get(URI.create(DST), conf); out = fs.create(p, new Progressable() { @Override public void progress() { System.out.print("~"); } }); IOUtils.copyBytes(in, out, 4096, true); // Assert.assertTrue(fs.getFileStatus(p).getLen() == ); } finally { IOUtils.closeStream(in); IOUtils.closeStream(out); } }