List of usage examples for org.apache.hadoop.fs AvroFSInput AvroFSInput
public AvroFSInput(final FileContext fc, final Path p) throws IOException
From source file:coldstorage.io.Reader.java
License:Apache License
public static void main(String[] args) throws IOException { List<Long> idsToFind = new ArrayList<Long>(); int maxId = 100000000; Random random = new Random(1); for (int i = 0; i < 1000; i++) { long id = (long) random.nextInt(maxId); // System.out.println(id); idsToFind.add(id);//from w ww .j a v a2s . c o m } // idsToFind.clear(); // idsToFind.add(58998000L); // Path pathData = new Path("./out/data.avro"); // Path pathIndex = new Path("./out/data.index"); Path pathData = new Path("hdfs://localhost:9000/avro/out/data.avro"); Path pathIndex = new Path("hdfs://localhost:9000/avro/out/data.index"); Configuration configuration = new Configuration(); FileSystem fileSystem = pathData.getFileSystem(configuration); FileStatus indexFileStatus = fileSystem.getFileStatus(pathIndex); FileStatus dataFileStatus = fileSystem.getFileStatus(pathData); FSDataInputStream indexInputStream = fileSystem.open(pathIndex); FSDataInputStream dataInputStream = fileSystem.open(pathData); AvroFSInput fsInput = new AvroFSInput(dataInputStream, dataFileStatus.getLen()); GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(); DataFileReader<GenericRecord> reader = new DataFileReader<GenericRecord>(fsInput, gdr); List<IndexKey> list = getList(indexInputStream, indexFileStatus.getLen()); for (Long idToFind : idsToFind) { long t1 = System.nanoTime(); GenericRecord lookupRecord = lookupRecord(reader, list, idToFind); long t2 = System.nanoTime(); System.out.println("Found [" + idToFind + "] in [" + (t2 - t1) / 1000000.0 + " ms]:" + lookupRecord); } }
From source file:com.btoddb.chronicle.apps.AvroTools.java
License:Open Source License
private void echoFile(Path inFile) throws IOException { FileContext context = FileContext.getFileContext(hdfsConfig); AvroFSInput input = new AvroFSInput(context, inFile); ReflectDatumReader<StorableAvroEvent> reader = new ReflectDatumReader<>(StorableAvroEvent.class); FileReader<StorableAvroEvent> fileReader = DataFileReader.openReader(input, reader); long count = 0; try {//from w ww . ja va 2s. c o m Schema schema = fileReader.getSchema(); for (StorableAvroEvent event : fileReader) { count++; System.out.println("event -> " + event.toString()); } } finally { fileReader.close(); } System.out.println("count = " + count); }
From source file:com.btoddb.chronicle.apps.AvroTools.java
License:Open Source License
private void testFileAndFix(Path inFile) throws IOException { FileContext context = FileContext.getFileContext(hdfsConfig); AvroFSInput input = new AvroFSInput(context, inFile); ReflectDatumReader<Object> reader = new ReflectDatumReader<>(); FileReader<Object> fileReader = DataFileReader.openReader(input, reader); Path outFile = inFile.suffix(".fixing"); FSDataOutputStream output = FileSystem.create(outFile.getFileSystem(hdfsConfig), outFile, FsPermission.getDefault());//from w ww. ja va 2 s . com DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>()); writer.setCodec(CodecFactory.snappyCodec()); boolean corrupted = false; long count = 0; try { Schema schema = fileReader.getSchema(); writer.create(schema, output); for (;;) { try { if (fileReader.hasNext()) { Object obj = fileReader.next(); count++; writer.append(obj); } else { break; } } catch (AvroRuntimeException e) { corrupted = true; System.out.println(" - file pointer = " + input.tell()); if (e.getCause() instanceof EOFException) { System.out.println(" - EOF occurred so we're done : " + e.getMessage()); break; } else if (e.getCause() instanceof IOException) { System.out.println(" - will try to 'next' past the error : " + e.getMessage()); try { fileReader.next(); System.out.println(" - 'next' worked - didn't really expect it to, but great!"); } catch (Exception e2) { System.out.println(" - 'next' did not work - will continue on and see what happens : " + e2.getMessage()); } continue; } break; } catch (Exception e) { corrupted = true; System.out.println(" - file pointer = " + input.tell()); e.printStackTrace(); break; } } } catch (Exception e) { e.printStackTrace(); } finally { System.out.println((" - processed " + count + " records")); if (null != fileReader) { try { fileReader.close(); } catch (Exception e) { e.printStackTrace(); } } if (null != writer) { try { writer.close(); } catch (Exception e) { e.printStackTrace(); } } } if (!corrupted) { outFile.getFileSystem(hdfsConfig).delete(outFile, false); } else { outFile.getFileSystem(hdfsConfig).rename(outFile, inFile.suffix(".fixed")); } }
From source file:com.cloudera.cdk.data.filesystem.FileSystemDatasetReader.java
License:Apache License
@Override public void open() { Preconditions.checkState(state.equals(ReaderWriterState.NEW), "A reader may not be opened more than once - current state:%s", state); logger.debug("Opening reader on path:{}", path); try {/*from w w w . jav a 2 s .c om*/ reader = new DataFileReader<E>( new AvroFSInput(fileSystem.open(path), fileSystem.getFileStatus(path).getLen()), new ReflectDatumReader<E>(schema)); } catch (IOException e) { throw new DatasetReaderException("Unable to create reader path:" + path, e); } state = ReaderWriterState.OPEN; }
From source file:com.uber.hoodie.common.table.log.avro.AvroLogAppender.java
License:Apache License
public AvroLogAppender(HoodieLogAppendConfig config) throws IOException, InterruptedException { FileSystem fs = config.getFs(); this.config = config; this.autoFlush = config.isAutoFlush(); GenericDatumWriter<IndexedRecord> datumWriter = new GenericDatumWriter<>(config.getSchema()); this.writer = new DataFileWriter<>(datumWriter); Path path = config.getLogFile().getPath(); if (fs.exists(path)) { //TODO - check for log corruption and roll over if needed log.info(config.getLogFile() + " exists. Appending to existing file"); // this log path exists, we will append to it fs = FileSystem.get(fs.getConf()); try {//ww w .j a va2s. com this.output = fs.append(path, config.getBufferSize()); } catch (RemoteException e) { // this happens when either another task executor writing to this file died or data node is going down if (e.getClassName().equals(AlreadyBeingCreatedException.class.getName()) && fs instanceof DistributedFileSystem) { log.warn("Trying to recover log on path " + path); if (FSUtils.recoverDFSFileLease((DistributedFileSystem) fs, path)) { log.warn("Recovered lease on path " + path); // try again this.output = fs.append(path, config.getBufferSize()); } else { log.warn("Failed to recover lease on path " + path); throw new HoodieException(e); } } } this.writer.appendTo(new AvroFSInput(FileContext.getFileContext(fs.getConf()), path), output); // we always want to flush to disk everytime a avro block is written this.writer.setFlushOnEveryBlock(true); } else { log.info(config.getLogFile() + " does not exist. Create a new file"); this.output = fs.create(path, false, config.getBufferSize(), config.getReplication(), config.getBlockSize(), null); this.writer.create(config.getSchema(), output); this.writer.setFlushOnEveryBlock(true); // We need to close the writer to be able to tell the name node that we created this file // this.writer.close(); } }
From source file:com.uber.hoodie.common.table.log.avro.AvroLogReader.java
License:Apache License
public AvroLogReader(HoodieLogFile file, FileSystem fs, Schema readerSchema) throws IOException { GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); datumReader.setExpected(readerSchema); final AvroFSInput input = new AvroFSInput(FileContext.getFileContext(fs.getConf()), file.getPath()); this.reader = (DataFileReader<GenericRecord>) DataFileReader.openReader(input, datumReader); this.file = file; }
From source file:org.apache.samza.system.hdfs.reader.AvroFileHdfsReader.java
License:Apache License
@Override public void open(String pathStr, String singleFileOffset) { LOG.info(String.format("%s: Open file [%s] with file offset [%s] for read", systemStreamPartition, pathStr, singleFileOffset));/* w w w. j a va 2 s .com*/ Path path = new Path(pathStr); try { AvroFSInput input = new AvroFSInput(FileContext.getFileContext(path.toUri()), path); fileReader = new DataFileReader<>(input, new GenericDatumReader<>()); seek(singleFileOffset); } catch (IOException e) { throw new SamzaException(e); } }