Example usage for org.apache.hadoop.fs AvroFSInput AvroFSInput

List of usage examples for org.apache.hadoop.fs AvroFSInput AvroFSInput

Introduction

In this page you can find the example usage for org.apache.hadoop.fs AvroFSInput AvroFSInput.

Prototype

public AvroFSInput(final FileContext fc, final Path p) throws IOException 

Source Link

Document

Construct given a FileContext and a Path .

Usage

From source file:coldstorage.io.Reader.java

License:Apache License

public static void main(String[] args) throws IOException {

    List<Long> idsToFind = new ArrayList<Long>();
    int maxId = 100000000;
    Random random = new Random(1);
    for (int i = 0; i < 1000; i++) {
        long id = (long) random.nextInt(maxId);
        //      System.out.println(id);
        idsToFind.add(id);//from w  ww  .j a v  a2s .  c o m
    }

    // idsToFind.clear();
    // idsToFind.add(58998000L);

    //    Path pathData = new Path("./out/data.avro");
    //    Path pathIndex = new Path("./out/data.index");

    Path pathData = new Path("hdfs://localhost:9000/avro/out/data.avro");
    Path pathIndex = new Path("hdfs://localhost:9000/avro/out/data.index");

    Configuration configuration = new Configuration();
    FileSystem fileSystem = pathData.getFileSystem(configuration);
    FileStatus indexFileStatus = fileSystem.getFileStatus(pathIndex);
    FileStatus dataFileStatus = fileSystem.getFileStatus(pathData);
    FSDataInputStream indexInputStream = fileSystem.open(pathIndex);
    FSDataInputStream dataInputStream = fileSystem.open(pathData);

    AvroFSInput fsInput = new AvroFSInput(dataInputStream, dataFileStatus.getLen());
    GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> reader = new DataFileReader<GenericRecord>(fsInput, gdr);

    List<IndexKey> list = getList(indexInputStream, indexFileStatus.getLen());

    for (Long idToFind : idsToFind) {
        long t1 = System.nanoTime();
        GenericRecord lookupRecord = lookupRecord(reader, list, idToFind);
        long t2 = System.nanoTime();
        System.out.println("Found [" + idToFind + "] in [" + (t2 - t1) / 1000000.0 + " ms]:" + lookupRecord);
    }
}

From source file:com.btoddb.chronicle.apps.AvroTools.java

License:Open Source License

private void echoFile(Path inFile) throws IOException {
    FileContext context = FileContext.getFileContext(hdfsConfig);
    AvroFSInput input = new AvroFSInput(context, inFile);

    ReflectDatumReader<StorableAvroEvent> reader = new ReflectDatumReader<>(StorableAvroEvent.class);
    FileReader<StorableAvroEvent> fileReader = DataFileReader.openReader(input, reader);
    long count = 0;
    try {//from   w ww  . ja  va  2s. c o  m
        Schema schema = fileReader.getSchema();
        for (StorableAvroEvent event : fileReader) {
            count++;
            System.out.println("event -> " + event.toString());
        }
    } finally {
        fileReader.close();
    }

    System.out.println("count = " + count);
}

From source file:com.btoddb.chronicle.apps.AvroTools.java

License:Open Source License

private void testFileAndFix(Path inFile) throws IOException {
    FileContext context = FileContext.getFileContext(hdfsConfig);
    AvroFSInput input = new AvroFSInput(context, inFile);

    ReflectDatumReader<Object> reader = new ReflectDatumReader<>();
    FileReader<Object> fileReader = DataFileReader.openReader(input, reader);

    Path outFile = inFile.suffix(".fixing");
    FSDataOutputStream output = FileSystem.create(outFile.getFileSystem(hdfsConfig), outFile,
            FsPermission.getDefault());//from w  ww.  ja va 2 s  .  com
    DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>());
    writer.setCodec(CodecFactory.snappyCodec());

    boolean corrupted = false;
    long count = 0;

    try {
        Schema schema = fileReader.getSchema();
        writer.create(schema, output);

        for (;;) {
            try {
                if (fileReader.hasNext()) {
                    Object obj = fileReader.next();
                    count++;
                    writer.append(obj);
                } else {
                    break;
                }
            } catch (AvroRuntimeException e) {
                corrupted = true;
                System.out.println("  - file pointer = " + input.tell());
                if (e.getCause() instanceof EOFException) {
                    System.out.println("  - EOF occurred so we're done : " + e.getMessage());
                    break;
                } else if (e.getCause() instanceof IOException) {
                    System.out.println("  - will try to 'next' past the error : " + e.getMessage());
                    try {
                        fileReader.next();
                        System.out.println("  - 'next' worked - didn't really expect it to, but great!");
                    } catch (Exception e2) {
                        System.out.println("  - 'next' did not work - will continue on and see what happens : "
                                + e2.getMessage());
                    }
                    continue;
                }
                break;
            } catch (Exception e) {
                corrupted = true;
                System.out.println("  - file pointer = " + input.tell());
                e.printStackTrace();
                break;
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        System.out.println(("  - processed " + count + " records"));
        if (null != fileReader) {
            try {
                fileReader.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        if (null != writer) {
            try {
                writer.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    if (!corrupted) {
        outFile.getFileSystem(hdfsConfig).delete(outFile, false);
    } else {
        outFile.getFileSystem(hdfsConfig).rename(outFile, inFile.suffix(".fixed"));
    }
}

From source file:com.cloudera.cdk.data.filesystem.FileSystemDatasetReader.java

License:Apache License

@Override
public void open() {
    Preconditions.checkState(state.equals(ReaderWriterState.NEW),
            "A reader may not be opened more than once - current state:%s", state);

    logger.debug("Opening reader on path:{}", path);

    try {/*from w  w w  .  jav a 2 s  .c om*/
        reader = new DataFileReader<E>(
                new AvroFSInput(fileSystem.open(path), fileSystem.getFileStatus(path).getLen()),
                new ReflectDatumReader<E>(schema));
    } catch (IOException e) {
        throw new DatasetReaderException("Unable to create reader path:" + path, e);
    }

    state = ReaderWriterState.OPEN;
}

From source file:com.uber.hoodie.common.table.log.avro.AvroLogAppender.java

License:Apache License

public AvroLogAppender(HoodieLogAppendConfig config) throws IOException, InterruptedException {
    FileSystem fs = config.getFs();
    this.config = config;
    this.autoFlush = config.isAutoFlush();
    GenericDatumWriter<IndexedRecord> datumWriter = new GenericDatumWriter<>(config.getSchema());
    this.writer = new DataFileWriter<>(datumWriter);
    Path path = config.getLogFile().getPath();

    if (fs.exists(path)) {
        //TODO - check for log corruption and roll over if needed
        log.info(config.getLogFile() + " exists. Appending to existing file");
        // this log path exists, we will append to it
        fs = FileSystem.get(fs.getConf());
        try {//ww w  .j  a va2s.  com
            this.output = fs.append(path, config.getBufferSize());
        } catch (RemoteException e) {
            // this happens when either another task executor writing to this file died or data node is going down
            if (e.getClassName().equals(AlreadyBeingCreatedException.class.getName())
                    && fs instanceof DistributedFileSystem) {
                log.warn("Trying to recover log on path " + path);
                if (FSUtils.recoverDFSFileLease((DistributedFileSystem) fs, path)) {
                    log.warn("Recovered lease on path " + path);
                    // try again
                    this.output = fs.append(path, config.getBufferSize());
                } else {
                    log.warn("Failed to recover lease on path " + path);
                    throw new HoodieException(e);
                }
            }
        }
        this.writer.appendTo(new AvroFSInput(FileContext.getFileContext(fs.getConf()), path), output);
        // we always want to flush to disk everytime a avro block is written
        this.writer.setFlushOnEveryBlock(true);
    } else {
        log.info(config.getLogFile() + " does not exist. Create a new file");
        this.output = fs.create(path, false, config.getBufferSize(), config.getReplication(),
                config.getBlockSize(), null);
        this.writer.create(config.getSchema(), output);
        this.writer.setFlushOnEveryBlock(true);
        // We need to close the writer to be able to tell the name node that we created this file
        // this.writer.close();
    }
}

From source file:com.uber.hoodie.common.table.log.avro.AvroLogReader.java

License:Apache License

public AvroLogReader(HoodieLogFile file, FileSystem fs, Schema readerSchema) throws IOException {
    GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    datumReader.setExpected(readerSchema);
    final AvroFSInput input = new AvroFSInput(FileContext.getFileContext(fs.getConf()), file.getPath());
    this.reader = (DataFileReader<GenericRecord>) DataFileReader.openReader(input, datumReader);
    this.file = file;
}

From source file:org.apache.samza.system.hdfs.reader.AvroFileHdfsReader.java

License:Apache License

@Override
public void open(String pathStr, String singleFileOffset) {
    LOG.info(String.format("%s: Open file [%s] with file offset [%s] for read", systemStreamPartition, pathStr,
            singleFileOffset));/* w  w w.  j  a va 2 s  .com*/
    Path path = new Path(pathStr);
    try {
        AvroFSInput input = new AvroFSInput(FileContext.getFileContext(path.toUri()), path);
        fileReader = new DataFileReader<>(input, new GenericDatumReader<>());
        seek(singleFileOffset);
    } catch (IOException e) {
        throw new SamzaException(e);
    }
}