List of usage examples for org.apache.hadoop.fs FileSystem open
public FSDataInputStream open(PathHandle fd) throws IOException
From source file:com.kylinolap.common.persistence.HBaseResourceStoreTest.java
License:Apache License
@Test public void testHBaseStoreWithLargeCell() throws Exception { String path = "/cube/_test_large_cell.json"; String largeContent = "THIS_IS_A_LARGE_CELL"; StringEntity content = new StringEntity(largeContent); KylinConfig config = KylinConfig.getInstanceFromEnv(); int origSize = config.getHBaseKeyValueSize(); ResourceStore store = ResourceStore.getStore(KylinConfig.getInstanceFromEnv()); try {//w w w.j ava 2s. co m config.setProperty("kylin.hbase.client.keyvalue.maxsize", String.valueOf(largeContent.length() - 1)); store.deleteResource(path); store.putResource(path, content, StringEntity.serializer); assertTrue(store.exists(path)); StringEntity t = store.getResource(path, StringEntity.class, StringEntity.serializer); assertEquals(content, t); Path redirectPath = ((HBaseResourceStore) store).bigCellHDFSPath(path); Configuration hconf = HadoopUtil.getCurrentConfiguration(); FileSystem fileSystem = FileSystem.get(hconf); assertTrue(fileSystem.exists(redirectPath)); FSDataInputStream in = fileSystem.open(redirectPath); assertEquals(largeContent, in.readUTF()); in.close(); store.deleteResource(path); } finally { config.setProperty("kylin.hbase.client.keyvalue.maxsize", "" + origSize); store.deleteResource(path); } }
From source file:com.liferay.hadoop.store.HDFSStore.java
License:Open Source License
@Override public InputStream getFileAsStream(long companyId, long repositoryId, String fileName, String versionLabel) throws PortalException, SystemException { Path fullPath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, versionLabel); try {//from www .ja va 2 s. c om FileSystem fileSystem = HadoopManager.getFileSystem(); if (!fileSystem.exists(fullPath)) { throw new PortalException("File " + fullPath.toUri().toString() + " does not exist"); } return fileSystem.open(fullPath); } catch (IOException ioe) { throw new SystemException(ioe); } }
From source file:com.lightboxtechnologies.nsrl.SmallTableLoader.java
License:Apache License
protected static void load(FileSystem fs, String filename, LineHandler lh, RecordLoader loader) throws IOException { InputStream in = null;//from w w w. j a v a2 s .co m try { in = fs.open(new Path(filename)); loader.load(in, lh); in.close(); } finally { IOUtils.closeQuietly(in); } }
From source file:com.lightboxtechnologies.spectrum.FileProxy.java
License:Apache License
public InputStream open(FileSystem fs, FSDataInputStream di, FsEntry entry) throws IOException { return fs.open(new Path(FilePath)); }
From source file:com.lightboxtechnologies.spectrum.HDFSArchiver.java
License:Apache License
protected static void handleFile(String relpath, FileSystem fs, Path p, ZipOutputStream zout, byte[] buf) throws IOException { final ZipEntry entry = new ZipEntry(relpath); zout.putNextEntry(entry);/* w ww .j a v a 2 s . com*/ InputStream in = null; try { in = fs.open(p); IOUtils.copy(in, zout, buf); in.close(); } finally { IOUtils.closeQuietly(in); } zout.closeEntry(); }
From source file:com.linkedin.cubert.io.rubix.RubixFile.java
License:Open Source License
@SuppressWarnings("unchecked") public List<KeyData<K>> getKeyData() throws IOException, ClassNotFoundException { final FileSystem fs = FileSystem.get(conf); keyData = new ArrayList<KeyData<K>>(); final long filesize = fs.getFileStatus(path).getLen(); FSDataInputStream in = fs.open(path); /* The last long in the file is the start position of the trailer section */ in.seek(filesize - 8);/*from ww w. ja v a 2 s . c om*/ long metaDataStartPos = in.readLong(); in.seek(metaDataStartPos); ObjectMapper mapper = new ObjectMapper(); metadataJson = mapper.readValue(in.readUTF(), JsonNode.class); int keySectionSize = in.readInt(); // load the key section byte[] keySection = new byte[keySectionSize]; in.seek(filesize - keySectionSize - 8); in.read(keySection, 0, keySectionSize); in.close(); ByteArrayInputStream bis = new ByteArrayInputStream(keySection); DataInput dataInput = new DataInputStream(bis); int numberOfBlocks = metadataJson.get("numberOfBlocks").getIntValue(); // load the key section keyClass = (Class<K>) ClassCache.forName(JsonUtils.getText(metadataJson, "keyClass")); valueClass = (Class<V>) ClassCache.forName(JsonUtils.getText(metadataJson, "valueClass")); SerializationFactory serializationFactory = new SerializationFactory(conf); Deserializer<K> deserializer = serializationFactory.getDeserializer(keyClass); deserializer.open(bis); while (bis.available() > 0 && numberOfBlocks > 0) { K key = deserializer.deserialize(null); long offset = dataInput.readLong(); long blockId = dataInput.readLong(); long numRecords = dataInput.readLong(); keyData.add(new KeyData<K>(key, offset, 0, numRecords, blockId)); numberOfBlocks--; } // Assign length to each keydata entry int numEntries = keyData.size(); for (int i = 1; i < numEntries; i++) { KeyData<K> prev = keyData.get(i - 1); KeyData<K> current = keyData.get(i); prev.setLength(current.getOffset() - prev.getOffset()); } if (numEntries > 0) { KeyData<K> last = keyData.get(numEntries - 1); last.setLength(metaDataStartPos - last.offset); } return keyData; }
From source file:com.linkedin.cubert.io.rubix.RubixFile.java
License:Open Source License
private static void extract(RubixFile<Tuple, Object> rfile, KeyData<Tuple> keyData, String output) throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException { final int BUF_SIZE = 32 * 1024; Configuration conf = new JobConf(); File outFile = new File(output); if (outFile.exists()) outFile.delete();/* w ww.ja va 2 s .c o m*/ outFile.createNewFile(); long offset = keyData.getOffset(); long length = keyData.getLength(); Tuple key = keyData.getKey(); print.f("Extracting block %d (off=%d len=%d) from %s", keyData.getBlockId(), offset, length, rfile.path.toString()); BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(outFile)); // copy the data if (length > 0) { FileSystem fs = FileSystem.get(conf); FSDataInputStream in = fs.open(rfile.path); in.seek(offset); byte[] data = new byte[BUF_SIZE]; long toRead = length; while (toRead > 0) { int thisRead = toRead > BUF_SIZE ? BUF_SIZE : (int) toRead; in.readFully(data, 0, thisRead); bos.write(data, 0, thisRead); toRead -= thisRead; System.out.print("."); } System.out.println(); } // copy the key section ByteArrayOutputStream keySectionStream = new ByteArrayOutputStream(); DataOutput keySectionOut = new DataOutputStream(keySectionStream); SerializationFactory serializationFactory = new SerializationFactory(conf); Serializer<Tuple> keySerializer = serializationFactory.getSerializer(rfile.getKeyClass()); keySerializer.open(keySectionStream); keySerializer.serialize(key); keySectionOut.writeLong(0); // position keySectionOut.writeLong(keyData.getBlockId()); keySectionOut.writeLong(keyData.getNumRecords()); byte[] trailerBytes = keySectionStream.toByteArray(); JsonNode json = JsonUtils.cloneNode(rfile.metadataJson); ((ObjectNode) json).put("numberOfBlocks", 1); DataOutput out = new DataOutputStream(bos); out.writeUTF(json.toString()); out.writeInt(trailerBytes.length); out.write(trailerBytes); out.writeLong(length); // trailer start offset bos.close(); }
From source file:com.linkedin.cubert.io.rubix.RubixRecordReader.java
License:Open Source License
public void initialize(InputSplit split, Configuration conf) throws IOException, InterruptedException { @SuppressWarnings("unchecked") RubixInputSplit<K, V> rsplit = (RubixInputSplit<K, V>) split; SerializationFactory serializationFactory = new SerializationFactory(conf); switch (rsplit.getBlockSerializationType()) { case DEFAULT: valueDeserializer = serializationFactory.getDeserializer(rsplit.getValueClass()); break;/*w ww .j av a 2s .com*/ case COMPACT: BlockSchema schema = rsplit.getSchema(); valueDeserializer = new CompactDeserializer<V>(schema); break; } key = rsplit.getKey(); // store the blockid and partition key in the conf conf.setLong("MY_BLOCK_ID", rsplit.getBlockId()); conf.setLong("MY_NUM_RECORDS", rsplit.getNumRecords()); ByteArrayOutputStream tmpOut = new ByteArrayOutputStream(); ((Tuple) key).write(new DataOutputStream(tmpOut)); String keySerialized = SerializerUtils.serializeToString(tmpOut.toByteArray()); conf.set("MY_PARTITION_KEY", keySerialized); Path path = rsplit.getFilename(); offset = rsplit.getOffset(); length = rsplit.getLength(); FileSystem fs = path.getFileSystem(conf); FSDataInputStream fsin = fs.open(path); fsin.seek(offset); in = new BlockInputStream(fsin, length); CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(path); if (codec != null) { print.f("codec is not null and it is %s", codec.getClass().toString()); in = codec.createInputStream(in); } else { print.f("codec is null"); } valueDeserializer.open(in); }
From source file:com.linkedin.cubert.pig.piggybank.storage.avro.AvroStorage.java
License:Apache License
/** * This method is called to return the schema of an avro schema file. This * method is different than {@link #getSchema}, which returns the schema * from a data file./*from www .j ava2 s.co m*/ * * @param path path of a file or first level directory * @param fs file system * @return avro schema * @throws IOException */ protected Schema getSchemaFromFile(Path path, FileSystem fs) throws IOException { /* get path of the last file */ Path lastFile = AvroStorageUtils.getLast(path, fs); if (lastFile == null) { return null; } /* read in file and obtain schema */ GenericDatumReader<Object> avroReader = new GenericDatumReader<Object>(); InputStream hdfsInputStream = fs.open(lastFile); Schema ret = Schema.parse(hdfsInputStream); hdfsInputStream.close(); return ret; }
From source file:com.linkedin.cubert.pig.piggybank.storage.avro.AvroStorageUtils.java
License:Apache License
/** * This method is called by {@link #getAvroSchema}. The default implementation * returns the schema of an avro file; or the schema of the last file in a first-level * directory (it does not contain sub-directories). * * @param path path of a file or first level directory * @param fs file system//from w w w.java 2 s . co m * @return avro schema * @throws IOException */ public static Schema getSchema(Path path, FileSystem fs) throws IOException { /* get path of the last file */ Path lastFile = AvroStorageUtils.getLast(path, fs); if (lastFile == null) { return null; } /* read in file and obtain schema */ GenericDatumReader<Object> avroReader = new GenericDatumReader<Object>(); InputStream hdfsInputStream = fs.open(lastFile); DataFileStream<Object> avroDataStream = new DataFileStream<Object>(hdfsInputStream, avroReader); Schema ret = avroDataStream.getSchema(); avroDataStream.close(); return ret; }