Example usage for org.apache.hadoop.io MapFile DATA_FILE_NAME

List of usage examples for org.apache.hadoop.io MapFile DATA_FILE_NAME

Introduction

In this page you can find the example usage for org.apache.hadoop.io MapFile DATA_FILE_NAME.

Prototype

String DATA_FILE_NAME

To view the source code for org.apache.hadoop.io MapFile DATA_FILE_NAME.

Click Source Link

Document

The name of the data file.

Usage

From source file:io.aos.hdfs.MapFileFixer.java

License:Apache License

public static void main(String... args) throws Exception {
    String mapUri = args[0];/*from   w  w  w  . ja va 2 s .  c om*/

    Configuration conf = new Configuration();

    FileSystem fs = FileSystem.get(URI.create(mapUri), conf);
    Path map = new Path(mapUri);
    Path mapData = new Path(map, MapFile.DATA_FILE_NAME);

    // Get key and value types from data sequence file
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, mapData, conf);
    Class keyClass = reader.getKeyClass();
    Class valueClass = reader.getValueClass();
    reader.close();

    // Create the map file index file
    long entries = MapFile.fix(fs, map, keyClass, valueClass, false, conf);
    System.out.printf("Created MapFile %s with %d entries\n", map, entries);
}

From source file:kogiri.common.hadoop.io.reader.map.IndexCloseableMapFileReader.java

License:Apache License

protected synchronized void open(FileSystem fs, String dirName, WritableComparator comparator,
        Configuration conf) throws IOException {
    Path dir = new Path(dirName);
    Path dataFile = new Path(dir, MapFile.DATA_FILE_NAME);
    Path indexFile = new Path(dir, MapFile.INDEX_FILE_NAME);

    // open the data
    this.data = createDataFileReader(fs, dataFile, conf);
    this.firstPosition = data.getPosition();

    if (comparator == null) {
        this.comparator = WritableComparator.get(data.getKeyClass().asSubclass(WritableComparable.class));
    } else {/*from   w w  w  .  ja  v  a 2  s  .c  o m*/
        this.comparator = comparator;
    }

    // open the index
    this.index = new SequenceFile.Reader(fs, indexFile, conf);
}

From source file:kogiri.mapreduce.preprocess.common.helpers.KmerIndexHelper.java

License:Open Source License

public static Path[] getAllKmerIndexPartDataFilePath(Configuration conf, Path[] inputPaths) throws IOException {
    List<Path> inputFiles = new ArrayList<Path>();
    KmerIndexPartPathFilter filter = new KmerIndexPartPathFilter();

    for (Path path : inputPaths) {
        FileSystem fs = path.getFileSystem(conf);
        if (fs.exists(path)) {
            FileStatus status = fs.getFileStatus(path);
            if (status.isDir()) {
                if (filter.accept(path)) {
                    inputFiles.add(new Path(path, MapFile.DATA_FILE_NAME));
                } else {
                    // check child
                    FileStatus[] entries = fs.listStatus(path);
                    for (FileStatus entry : entries) {
                        if (entry.isDir()) {
                            if (filter.accept(entry.getPath())) {
                                inputFiles.add(new Path(entry.getPath(), MapFile.DATA_FILE_NAME));
                            }//from   w w w  .  j ava2s  .  c  om
                        }
                    }
                }
            }
        }
    }

    return inputFiles.toArray(new Path[0]);
}

From source file:org.apache.accumulo.core.file.map.MapFileOperations.java

License:Apache License

@Override
protected long getFileSize(GetFileSizeOperation options) throws IOException {
    return options.getFileSystem().getFileStatus(new Path(options.getFilename() + "/" + MapFile.DATA_FILE_NAME))
            .getLen();/*w w w. j a va2 s .c o m*/
}

From source file:org.apache.accumulo.master.tableOps.BulkImport.java

License:Apache License

private String prepareBulkImport(Master master, final VolumeManager fs, String dir, String tableId)
        throws Exception {
    final Path bulkDir = createNewBulkDir(fs, tableId);

    MetadataTableUtil.addBulkLoadInProgressFlag(master,
            "/" + bulkDir.getParent().getName() + "/" + bulkDir.getName());

    Path dirPath = new Path(dir);
    FileStatus[] mapFiles = fs.listStatus(dirPath);

    final UniqueNameAllocator namer = UniqueNameAllocator.getInstance();

    int workerCount = master.getConfiguration().getCount(Property.MASTER_BULK_RENAME_THREADS);
    SimpleThreadPool workers = new SimpleThreadPool(workerCount, "bulk move");
    List<Future<Exception>> results = new ArrayList<>();

    for (FileStatus file : mapFiles) {
        final FileStatus fileStatus = file;
        results.add(workers.submit(new Callable<Exception>() {
            @Override/*from  w w w  . j  a  v  a 2  s.c om*/
            public Exception call() throws Exception {
                try {
                    String sa[] = fileStatus.getPath().getName().split("\\.");
                    String extension = "";
                    if (sa.length > 1) {
                        extension = sa[sa.length - 1];

                        if (!FileOperations.getValidExtensions().contains(extension)) {
                            log.warn(fileStatus.getPath() + " does not have a valid extension, ignoring");
                            return null;
                        }
                    } else {
                        // assume it is a map file
                        extension = Constants.MAPFILE_EXTENSION;
                    }

                    if (extension.equals(Constants.MAPFILE_EXTENSION)) {
                        if (!fileStatus.isDirectory()) {
                            log.warn(fileStatus.getPath() + " is not a map file, ignoring");
                            return null;
                        }

                        if (fileStatus.getPath().getName().equals("_logs")) {
                            log.info(fileStatus.getPath()
                                    + " is probably a log directory from a map/reduce task, skipping");
                            return null;
                        }
                        try {
                            FileStatus dataStatus = fs
                                    .getFileStatus(new Path(fileStatus.getPath(), MapFile.DATA_FILE_NAME));
                            if (dataStatus.isDirectory()) {
                                log.warn(fileStatus.getPath() + " is not a map file, ignoring");
                                return null;
                            }
                        } catch (FileNotFoundException fnfe) {
                            log.warn(fileStatus.getPath() + " is not a map file, ignoring");
                            return null;
                        }
                    }

                    String newName = "I" + namer.getNextName() + "." + extension;
                    Path newPath = new Path(bulkDir, newName);
                    try {
                        fs.rename(fileStatus.getPath(), newPath);
                        log.debug("Moved " + fileStatus.getPath() + " to " + newPath);
                    } catch (IOException E1) {
                        log.error("Could not move: {} {}", fileStatus.getPath().toString(), E1.getMessage());
                    }

                } catch (Exception ex) {
                    return ex;
                }
                return null;
            }
        }));
    }
    workers.shutdown();
    while (!workers.awaitTermination(1000L, TimeUnit.MILLISECONDS)) {
    }

    for (Future<Exception> ex : results) {
        if (ex.get() != null) {
            throw ex.get();
        }
    }
    return bulkDir.toString();
}

From source file:org.apache.accumulo.master.tableOps.bulkVer1.BulkImport.java

License:Apache License

private String prepareBulkImport(Master master, final VolumeManager fs, String dir, Table.ID tableId)
        throws Exception {
    final Path bulkDir = createNewBulkDir(fs, tableId);

    MetadataTableUtil.addBulkLoadInProgressFlag(master,
            "/" + bulkDir.getParent().getName() + "/" + bulkDir.getName());

    Path dirPath = new Path(dir);
    FileStatus[] mapFiles = fs.listStatus(dirPath);

    final UniqueNameAllocator namer = UniqueNameAllocator.getInstance();

    int workerCount = master.getConfiguration().getCount(Property.MASTER_BULK_RENAME_THREADS);
    SimpleThreadPool workers = new SimpleThreadPool(workerCount, "bulk move");
    List<Future<Exception>> results = new ArrayList<>();

    for (FileStatus file : mapFiles) {
        final FileStatus fileStatus = file;
        results.add(workers.submit(() -> {
            try {
                String sa[] = fileStatus.getPath().getName().split("\\.");
                String extension = "";
                if (sa.length > 1) {
                    extension = sa[sa.length - 1];

                    if (!FileOperations.getValidExtensions().contains(extension)) {
                        log.warn("{} does not have a valid extension, ignoring", fileStatus.getPath());
                        return null;
                    }/*from  w  w  w .  j a  v  a 2  s.  co  m*/
                } else {
                    // assume it is a map file
                    extension = Constants.MAPFILE_EXTENSION;
                }

                if (extension.equals(Constants.MAPFILE_EXTENSION)) {
                    if (!fileStatus.isDirectory()) {
                        log.warn("{} is not a map file, ignoring", fileStatus.getPath());
                        return null;
                    }

                    if (fileStatus.getPath().getName().equals("_logs")) {
                        log.info("{} is probably a log directory from a map/reduce task, skipping",
                                fileStatus.getPath());
                        return null;
                    }
                    try {
                        FileStatus dataStatus = fs
                                .getFileStatus(new Path(fileStatus.getPath(), MapFile.DATA_FILE_NAME));
                        if (dataStatus.isDirectory()) {
                            log.warn("{} is not a map file, ignoring", fileStatus.getPath());
                            return null;
                        }
                    } catch (FileNotFoundException fnfe) {
                        log.warn("{} is not a map file, ignoring", fileStatus.getPath());
                        return null;
                    }
                }

                String newName = "I" + namer.getNextName() + "." + extension;
                Path newPath = new Path(bulkDir, newName);
                try {
                    fs.rename(fileStatus.getPath(), newPath);
                    log.debug("Moved {} to {}", fileStatus.getPath(), newPath);
                } catch (IOException E1) {
                    log.error("Could not move: {} {}", fileStatus.getPath(), E1.getMessage());
                }

            } catch (Exception ex) {
                return ex;
            }
            return null;
        }));
    }
    workers.shutdown();
    while (!workers.awaitTermination(1000L, TimeUnit.MILLISECONDS)) {
    }

    for (Future<Exception> ex : results) {
        if (ex.get() != null) {
            throw ex.get();
        }
    }
    return bulkDir.toString();
}

From source file:org.apache.accumulo.server.logger.LogWriter.java

License:Apache License

@Override
public LogCopyInfo startCopy(TInfo info, AuthInfo credentials, final String localLog,
        final String fullyQualifiedFileName, final boolean sort) {
    log.info("Copying " + localLog + " to " + fullyQualifiedFileName);
    final long t1 = System.currentTimeMillis();
    try {//from  w ww.  j  av a2s .c  o m
        Long id = file2id.get(localLog);
        if (id != null)
            close(info, id);
    } catch (NoSuchLogIDException e) {
        log.error("Unexpected error thrown", e);
        throw new RuntimeException(e);
    }
    File file;
    try {
        file = new File(findLocalFilename(localLog));
        log.info(file.getAbsoluteFile().toString());
    } catch (FileNotFoundException ex) {
        throw new RuntimeException(ex);
    }
    long result = file.length();

    copyThreadPool.execute(new Runnable() {
        @Override
        public void run() {
            Thread.currentThread().setName("Copying " + localLog + " to shared file system");
            for (int i = 0; i < 3; i++) {
                try {
                    if (sort) {
                        copySortLog(localLog, fullyQualifiedFileName);
                    } else {
                        copyLog(localLog, fullyQualifiedFileName);
                    }
                    return;
                } catch (IOException e) {
                    log.error("error during copy", e);
                }
                UtilWaitThread.sleep(1000);
            }
            log.error("Unable to copy file to DFS, too many retries " + localLog);
            try {
                fs.create(new Path(fullyQualifiedFileName + ".failed")).close();
            } catch (IOException ex) {
                log.error("Unable to create failure flag file", ex);
            }
            long t2 = System.currentTimeMillis();
            if (metrics.isEnabled())
                metrics.add(LogWriterMetrics.copy, (t2 - t1));
        }

        private void copySortLog(String localLog, String fullyQualifiedFileName) throws IOException {
            final long SORT_BUFFER_SIZE = acuConf.getMemoryInBytes(Property.LOGGER_SORT_BUFFER_SIZE);

            FileSystem local = TraceFileSystem.wrap(FileSystem.getLocal(fs.getConf()).getRaw());
            Path dest = new Path(fullyQualifiedFileName + ".recovered");
            log.debug("Sorting log file to DSF " + dest);
            fs.mkdirs(dest);
            int part = 0;

            Reader reader = new SequenceFile.Reader(local, new Path(findLocalFilename(localLog)), fs.getConf());
            try {
                final ArrayList<Pair<LogFileKey, LogFileValue>> kv = new ArrayList<Pair<LogFileKey, LogFileValue>>();
                long memorySize = 0;
                while (true) {
                    final long position = reader.getPosition();
                    final LogFileKey key = new LogFileKey();
                    final LogFileValue value = new LogFileValue();
                    try {
                        if (!reader.next(key, value))
                            break;
                    } catch (EOFException e) {
                        log.warn("Unexpected end of file reading write ahead log " + localLog);
                        break;
                    }
                    kv.add(new Pair<LogFileKey, LogFileValue>(key, value));
                    memorySize += reader.getPosition() - position;
                    if (memorySize > SORT_BUFFER_SIZE) {
                        writeSortedEntries(dest, part++, kv);
                        kv.clear();
                        memorySize = 0;
                    }
                }

                if (!kv.isEmpty())
                    writeSortedEntries(dest, part++, kv);
                fs.create(new Path(dest, "finished")).close();
            } finally {
                reader.close();
            }
        }

        private void writeSortedEntries(Path dest, int part, final List<Pair<LogFileKey, LogFileValue>> kv)
                throws IOException {
            String path = dest + String.format("/part-r-%05d", part);
            log.debug("Writing partial log file to DSF " + path);
            log.debug("Sorting");
            Span span = Trace.start("Logger sort");
            span.data("logfile", dest.getName());
            Collections.sort(kv, new Comparator<Pair<LogFileKey, LogFileValue>>() {
                @Override
                public int compare(Pair<LogFileKey, LogFileValue> o1, Pair<LogFileKey, LogFileValue> o2) {
                    return o1.getFirst().compareTo(o2.getFirst());
                }
            });
            span.stop();
            span = Trace.start("Logger write");
            span.data("logfile", dest.getName());
            MapFile.Writer writer = new MapFile.Writer(fs.getConf(), fs, path, LogFileKey.class,
                    LogFileValue.class);
            short replication = (short) acuConf.getCount(Property.LOGGER_RECOVERY_FILE_REPLICATION);
            fs.setReplication(new Path(path + "/" + MapFile.DATA_FILE_NAME), replication);
            fs.setReplication(new Path(path + "/" + MapFile.INDEX_FILE_NAME), replication);
            try {
                for (Pair<LogFileKey, LogFileValue> entry : kv)
                    writer.append(entry.getFirst(), entry.getSecond());
            } finally {
                writer.close();
                span.stop();
            }
        }

        private void copyLog(final String localLog, final String fullyQualifiedFileName) throws IOException {
            Path dest = new Path(fullyQualifiedFileName + ".copy");
            log.debug("Copying log file to DSF " + dest);
            fs.delete(dest, true);
            LogFileKey key = new LogFileKey();
            LogFileValue value = new LogFileValue();
            Writer writer = null;
            Reader reader = null;
            try {
                short replication = (short) acuConf.getCount(Property.LOGGER_RECOVERY_FILE_REPLICATION);
                writer = SequenceFile.createWriter(fs, fs.getConf(), dest, LogFileKey.class, LogFileValue.class,
                        fs.getConf().getInt("io.file.buffer.size", 4096), replication, fs.getDefaultBlockSize(),
                        SequenceFile.CompressionType.BLOCK, new DefaultCodec(), null, new Metadata());
                FileSystem local = TraceFileSystem.wrap(FileSystem.getLocal(fs.getConf()).getRaw());
                reader = new SequenceFile.Reader(local, new Path(findLocalFilename(localLog)), fs.getConf());
                while (reader.next(key, value)) {
                    writer.append(key, value);
                }
            } catch (IOException ex) {
                log.warn("May have a partial copy of a recovery file: " + localLog, ex);
            } finally {
                if (reader != null)
                    reader.close();
                if (writer != null)
                    writer.close();
            }
            // Make file appear in the shared file system as the target name only after it is completely copied
            fs.rename(dest, new Path(fullyQualifiedFileName));
            log.info("Copying " + localLog + " complete");
        }
    });
    return new LogCopyInfo(result, null);
}

From source file:org.apache.accumulo.server.master.tableOps.BulkImport.java

License:Apache License

private String prepareBulkImport(VolumeManager fs, String dir, String tableId) throws IOException {
    Path bulkDir = createNewBulkDir(fs, tableId);

    MetadataTableUtil.addBulkLoadInProgressFlag("/" + bulkDir.getParent().getName() + "/" + bulkDir.getName());

    Path dirPath = new Path(dir);
    FileStatus[] mapFiles = fs.listStatus(dirPath);

    UniqueNameAllocator namer = UniqueNameAllocator.getInstance();

    for (FileStatus fileStatus : mapFiles) {
        String sa[] = fileStatus.getPath().getName().split("\\.");
        String extension = "";
        if (sa.length > 1) {
            extension = sa[sa.length - 1];

            if (!FileOperations.getValidExtensions().contains(extension)) {
                log.warn(fileStatus.getPath() + " does not have a valid extension, ignoring");
                continue;
            }/*from w  ww. j a  v a  2s  .  c om*/
        } else {
            // assume it is a map file
            extension = Constants.MAPFILE_EXTENSION;
        }

        if (extension.equals(Constants.MAPFILE_EXTENSION)) {
            if (!fileStatus.isDir()) {
                log.warn(fileStatus.getPath() + " is not a map file, ignoring");
                continue;
            }

            if (fileStatus.getPath().getName().equals("_logs")) {
                log.info(
                        fileStatus.getPath() + " is probably a log directory from a map/reduce task, skipping");
                continue;
            }
            try {
                FileStatus dataStatus = fs
                        .getFileStatus(new Path(fileStatus.getPath(), MapFile.DATA_FILE_NAME));
                if (dataStatus.isDir()) {
                    log.warn(fileStatus.getPath() + " is not a map file, ignoring");
                    continue;
                }
            } catch (FileNotFoundException fnfe) {
                log.warn(fileStatus.getPath() + " is not a map file, ignoring");
                continue;
            }
        }

        String newName = "I" + namer.getNextName() + "." + extension;
        Path newPath = new Path(bulkDir, newName);
        try {
            fs.rename(fileStatus.getPath(), newPath);
            log.debug("Moved " + fileStatus.getPath() + " to " + newPath);
        } catch (IOException E1) {
            log.error("Could not move: " + fileStatus.getPath().toString() + " " + E1.getMessage());
        }
    }
    return bulkDir.toString();
}

From source file:org.apache.accumulo.server.util.MapFilePerformanceTest.java

License:Apache License

public static String[] createMapFiles(String input, String output, int blocksize, int mapFiles)
        throws IOException {

    Configuration conf = CachedConfiguration.getInstance();
    FileSystem fs = FileSystem.get(conf);

    SequenceFile.Reader in = new SequenceFile.Reader(fs, new Path(input + "/" + MapFile.DATA_FILE_NAME), conf);

    boolean someFilesExist = false;

    MapFile.Writer out[] = new MapFile.Writer[mapFiles];
    for (int i = 0; i < out.length; i++) {
        if (!fs.exists(new Path(output + "_" + i + "_" + mapFiles))) {
            out[i] = new MapFile.Writer(conf, fs, output + "_" + i + "_" + mapFiles, Key.class, Value.class,
                    SequenceFile.CompressionType.RECORD);
        } else {/*from  w ww.  j ava2 s  .  c o  m*/
            someFilesExist = true;
        }
    }

    Key key = new Key();
    Value value = new Value();

    Random r = new Random();

    if (someFilesExist) {
        System.out.println("NOT Creating " + mapFiles + " map files using a compression block size of "
                + blocksize + " some files exist");
    } else {
        while (in.next(key, value)) {
            int i = r.nextInt(mapFiles);
            out[i].append(key, value);
        }
    }

    String names[] = new String[mapFiles];

    in.close();
    for (int i = 0; i < out.length; i++) {
        if (out[i] != null) {
            out[i].close();
        }
        names[i] = output + "_" + i + "_" + mapFiles;
    }

    return names;
}

From source file:org.apache.accumulo.server.util.MapFilePerformanceTest.java

License:Apache License

public static void selectRandomKeys(String input, double percentage, ArrayList<Key> keys) throws IOException {

    System.out.println("Selecting random keys ...");

    Configuration conf = CachedConfiguration.getInstance();
    FileSystem fs = FileSystem.get(conf);

    Random r = new Random();

    SequenceFile.Reader in = new SequenceFile.Reader(fs, new Path(input + "/" + MapFile.DATA_FILE_NAME), conf);

    Key key = new Key();

    while (in.next(key)) {
        if (r.nextDouble() < percentage)
            keys.add(new Key(key));
    }//from   w  ww  .  j  a  v a 2s . c  om

    in.close();

    Collections.shuffle(keys);

    System.out.println("Selected " + keys.size() + " random keys.");
}