List of usage examples for org.apache.hadoop.io MapFile DATA_FILE_NAME
String DATA_FILE_NAME
To view the source code for org.apache.hadoop.io MapFile DATA_FILE_NAME.
Click Source Link
From source file:io.aos.hdfs.MapFileFixer.java
License:Apache License
public static void main(String... args) throws Exception { String mapUri = args[0];/*from w w w . ja va 2 s . c om*/ Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(mapUri), conf); Path map = new Path(mapUri); Path mapData = new Path(map, MapFile.DATA_FILE_NAME); // Get key and value types from data sequence file SequenceFile.Reader reader = new SequenceFile.Reader(fs, mapData, conf); Class keyClass = reader.getKeyClass(); Class valueClass = reader.getValueClass(); reader.close(); // Create the map file index file long entries = MapFile.fix(fs, map, keyClass, valueClass, false, conf); System.out.printf("Created MapFile %s with %d entries\n", map, entries); }
From source file:kogiri.common.hadoop.io.reader.map.IndexCloseableMapFileReader.java
License:Apache License
protected synchronized void open(FileSystem fs, String dirName, WritableComparator comparator, Configuration conf) throws IOException { Path dir = new Path(dirName); Path dataFile = new Path(dir, MapFile.DATA_FILE_NAME); Path indexFile = new Path(dir, MapFile.INDEX_FILE_NAME); // open the data this.data = createDataFileReader(fs, dataFile, conf); this.firstPosition = data.getPosition(); if (comparator == null) { this.comparator = WritableComparator.get(data.getKeyClass().asSubclass(WritableComparable.class)); } else {/*from w w w . ja v a 2 s .c o m*/ this.comparator = comparator; } // open the index this.index = new SequenceFile.Reader(fs, indexFile, conf); }
From source file:kogiri.mapreduce.preprocess.common.helpers.KmerIndexHelper.java
License:Open Source License
public static Path[] getAllKmerIndexPartDataFilePath(Configuration conf, Path[] inputPaths) throws IOException { List<Path> inputFiles = new ArrayList<Path>(); KmerIndexPartPathFilter filter = new KmerIndexPartPathFilter(); for (Path path : inputPaths) { FileSystem fs = path.getFileSystem(conf); if (fs.exists(path)) { FileStatus status = fs.getFileStatus(path); if (status.isDir()) { if (filter.accept(path)) { inputFiles.add(new Path(path, MapFile.DATA_FILE_NAME)); } else { // check child FileStatus[] entries = fs.listStatus(path); for (FileStatus entry : entries) { if (entry.isDir()) { if (filter.accept(entry.getPath())) { inputFiles.add(new Path(entry.getPath(), MapFile.DATA_FILE_NAME)); }//from w w w . j ava2s . c om } } } } } } return inputFiles.toArray(new Path[0]); }
From source file:org.apache.accumulo.core.file.map.MapFileOperations.java
License:Apache License
@Override protected long getFileSize(GetFileSizeOperation options) throws IOException { return options.getFileSystem().getFileStatus(new Path(options.getFilename() + "/" + MapFile.DATA_FILE_NAME)) .getLen();/*w w w. j a va2 s .c o m*/ }
From source file:org.apache.accumulo.master.tableOps.BulkImport.java
License:Apache License
private String prepareBulkImport(Master master, final VolumeManager fs, String dir, String tableId) throws Exception { final Path bulkDir = createNewBulkDir(fs, tableId); MetadataTableUtil.addBulkLoadInProgressFlag(master, "/" + bulkDir.getParent().getName() + "/" + bulkDir.getName()); Path dirPath = new Path(dir); FileStatus[] mapFiles = fs.listStatus(dirPath); final UniqueNameAllocator namer = UniqueNameAllocator.getInstance(); int workerCount = master.getConfiguration().getCount(Property.MASTER_BULK_RENAME_THREADS); SimpleThreadPool workers = new SimpleThreadPool(workerCount, "bulk move"); List<Future<Exception>> results = new ArrayList<>(); for (FileStatus file : mapFiles) { final FileStatus fileStatus = file; results.add(workers.submit(new Callable<Exception>() { @Override/*from w w w . j a v a 2 s.c om*/ public Exception call() throws Exception { try { String sa[] = fileStatus.getPath().getName().split("\\."); String extension = ""; if (sa.length > 1) { extension = sa[sa.length - 1]; if (!FileOperations.getValidExtensions().contains(extension)) { log.warn(fileStatus.getPath() + " does not have a valid extension, ignoring"); return null; } } else { // assume it is a map file extension = Constants.MAPFILE_EXTENSION; } if (extension.equals(Constants.MAPFILE_EXTENSION)) { if (!fileStatus.isDirectory()) { log.warn(fileStatus.getPath() + " is not a map file, ignoring"); return null; } if (fileStatus.getPath().getName().equals("_logs")) { log.info(fileStatus.getPath() + " is probably a log directory from a map/reduce task, skipping"); return null; } try { FileStatus dataStatus = fs .getFileStatus(new Path(fileStatus.getPath(), MapFile.DATA_FILE_NAME)); if (dataStatus.isDirectory()) { log.warn(fileStatus.getPath() + " is not a map file, ignoring"); return null; } } catch (FileNotFoundException fnfe) { log.warn(fileStatus.getPath() + " is not a map file, ignoring"); return null; } } String newName = "I" + namer.getNextName() + "." + extension; Path newPath = new Path(bulkDir, newName); try { fs.rename(fileStatus.getPath(), newPath); log.debug("Moved " + fileStatus.getPath() + " to " + newPath); } catch (IOException E1) { log.error("Could not move: {} {}", fileStatus.getPath().toString(), E1.getMessage()); } } catch (Exception ex) { return ex; } return null; } })); } workers.shutdown(); while (!workers.awaitTermination(1000L, TimeUnit.MILLISECONDS)) { } for (Future<Exception> ex : results) { if (ex.get() != null) { throw ex.get(); } } return bulkDir.toString(); }
From source file:org.apache.accumulo.master.tableOps.bulkVer1.BulkImport.java
License:Apache License
private String prepareBulkImport(Master master, final VolumeManager fs, String dir, Table.ID tableId) throws Exception { final Path bulkDir = createNewBulkDir(fs, tableId); MetadataTableUtil.addBulkLoadInProgressFlag(master, "/" + bulkDir.getParent().getName() + "/" + bulkDir.getName()); Path dirPath = new Path(dir); FileStatus[] mapFiles = fs.listStatus(dirPath); final UniqueNameAllocator namer = UniqueNameAllocator.getInstance(); int workerCount = master.getConfiguration().getCount(Property.MASTER_BULK_RENAME_THREADS); SimpleThreadPool workers = new SimpleThreadPool(workerCount, "bulk move"); List<Future<Exception>> results = new ArrayList<>(); for (FileStatus file : mapFiles) { final FileStatus fileStatus = file; results.add(workers.submit(() -> { try { String sa[] = fileStatus.getPath().getName().split("\\."); String extension = ""; if (sa.length > 1) { extension = sa[sa.length - 1]; if (!FileOperations.getValidExtensions().contains(extension)) { log.warn("{} does not have a valid extension, ignoring", fileStatus.getPath()); return null; }/*from w w w . j a v a 2 s. co m*/ } else { // assume it is a map file extension = Constants.MAPFILE_EXTENSION; } if (extension.equals(Constants.MAPFILE_EXTENSION)) { if (!fileStatus.isDirectory()) { log.warn("{} is not a map file, ignoring", fileStatus.getPath()); return null; } if (fileStatus.getPath().getName().equals("_logs")) { log.info("{} is probably a log directory from a map/reduce task, skipping", fileStatus.getPath()); return null; } try { FileStatus dataStatus = fs .getFileStatus(new Path(fileStatus.getPath(), MapFile.DATA_FILE_NAME)); if (dataStatus.isDirectory()) { log.warn("{} is not a map file, ignoring", fileStatus.getPath()); return null; } } catch (FileNotFoundException fnfe) { log.warn("{} is not a map file, ignoring", fileStatus.getPath()); return null; } } String newName = "I" + namer.getNextName() + "." + extension; Path newPath = new Path(bulkDir, newName); try { fs.rename(fileStatus.getPath(), newPath); log.debug("Moved {} to {}", fileStatus.getPath(), newPath); } catch (IOException E1) { log.error("Could not move: {} {}", fileStatus.getPath(), E1.getMessage()); } } catch (Exception ex) { return ex; } return null; })); } workers.shutdown(); while (!workers.awaitTermination(1000L, TimeUnit.MILLISECONDS)) { } for (Future<Exception> ex : results) { if (ex.get() != null) { throw ex.get(); } } return bulkDir.toString(); }
From source file:org.apache.accumulo.server.logger.LogWriter.java
License:Apache License
@Override public LogCopyInfo startCopy(TInfo info, AuthInfo credentials, final String localLog, final String fullyQualifiedFileName, final boolean sort) { log.info("Copying " + localLog + " to " + fullyQualifiedFileName); final long t1 = System.currentTimeMillis(); try {//from w ww. j av a2s .c o m Long id = file2id.get(localLog); if (id != null) close(info, id); } catch (NoSuchLogIDException e) { log.error("Unexpected error thrown", e); throw new RuntimeException(e); } File file; try { file = new File(findLocalFilename(localLog)); log.info(file.getAbsoluteFile().toString()); } catch (FileNotFoundException ex) { throw new RuntimeException(ex); } long result = file.length(); copyThreadPool.execute(new Runnable() { @Override public void run() { Thread.currentThread().setName("Copying " + localLog + " to shared file system"); for (int i = 0; i < 3; i++) { try { if (sort) { copySortLog(localLog, fullyQualifiedFileName); } else { copyLog(localLog, fullyQualifiedFileName); } return; } catch (IOException e) { log.error("error during copy", e); } UtilWaitThread.sleep(1000); } log.error("Unable to copy file to DFS, too many retries " + localLog); try { fs.create(new Path(fullyQualifiedFileName + ".failed")).close(); } catch (IOException ex) { log.error("Unable to create failure flag file", ex); } long t2 = System.currentTimeMillis(); if (metrics.isEnabled()) metrics.add(LogWriterMetrics.copy, (t2 - t1)); } private void copySortLog(String localLog, String fullyQualifiedFileName) throws IOException { final long SORT_BUFFER_SIZE = acuConf.getMemoryInBytes(Property.LOGGER_SORT_BUFFER_SIZE); FileSystem local = TraceFileSystem.wrap(FileSystem.getLocal(fs.getConf()).getRaw()); Path dest = new Path(fullyQualifiedFileName + ".recovered"); log.debug("Sorting log file to DSF " + dest); fs.mkdirs(dest); int part = 0; Reader reader = new SequenceFile.Reader(local, new Path(findLocalFilename(localLog)), fs.getConf()); try { final ArrayList<Pair<LogFileKey, LogFileValue>> kv = new ArrayList<Pair<LogFileKey, LogFileValue>>(); long memorySize = 0; while (true) { final long position = reader.getPosition(); final LogFileKey key = new LogFileKey(); final LogFileValue value = new LogFileValue(); try { if (!reader.next(key, value)) break; } catch (EOFException e) { log.warn("Unexpected end of file reading write ahead log " + localLog); break; } kv.add(new Pair<LogFileKey, LogFileValue>(key, value)); memorySize += reader.getPosition() - position; if (memorySize > SORT_BUFFER_SIZE) { writeSortedEntries(dest, part++, kv); kv.clear(); memorySize = 0; } } if (!kv.isEmpty()) writeSortedEntries(dest, part++, kv); fs.create(new Path(dest, "finished")).close(); } finally { reader.close(); } } private void writeSortedEntries(Path dest, int part, final List<Pair<LogFileKey, LogFileValue>> kv) throws IOException { String path = dest + String.format("/part-r-%05d", part); log.debug("Writing partial log file to DSF " + path); log.debug("Sorting"); Span span = Trace.start("Logger sort"); span.data("logfile", dest.getName()); Collections.sort(kv, new Comparator<Pair<LogFileKey, LogFileValue>>() { @Override public int compare(Pair<LogFileKey, LogFileValue> o1, Pair<LogFileKey, LogFileValue> o2) { return o1.getFirst().compareTo(o2.getFirst()); } }); span.stop(); span = Trace.start("Logger write"); span.data("logfile", dest.getName()); MapFile.Writer writer = new MapFile.Writer(fs.getConf(), fs, path, LogFileKey.class, LogFileValue.class); short replication = (short) acuConf.getCount(Property.LOGGER_RECOVERY_FILE_REPLICATION); fs.setReplication(new Path(path + "/" + MapFile.DATA_FILE_NAME), replication); fs.setReplication(new Path(path + "/" + MapFile.INDEX_FILE_NAME), replication); try { for (Pair<LogFileKey, LogFileValue> entry : kv) writer.append(entry.getFirst(), entry.getSecond()); } finally { writer.close(); span.stop(); } } private void copyLog(final String localLog, final String fullyQualifiedFileName) throws IOException { Path dest = new Path(fullyQualifiedFileName + ".copy"); log.debug("Copying log file to DSF " + dest); fs.delete(dest, true); LogFileKey key = new LogFileKey(); LogFileValue value = new LogFileValue(); Writer writer = null; Reader reader = null; try { short replication = (short) acuConf.getCount(Property.LOGGER_RECOVERY_FILE_REPLICATION); writer = SequenceFile.createWriter(fs, fs.getConf(), dest, LogFileKey.class, LogFileValue.class, fs.getConf().getInt("io.file.buffer.size", 4096), replication, fs.getDefaultBlockSize(), SequenceFile.CompressionType.BLOCK, new DefaultCodec(), null, new Metadata()); FileSystem local = TraceFileSystem.wrap(FileSystem.getLocal(fs.getConf()).getRaw()); reader = new SequenceFile.Reader(local, new Path(findLocalFilename(localLog)), fs.getConf()); while (reader.next(key, value)) { writer.append(key, value); } } catch (IOException ex) { log.warn("May have a partial copy of a recovery file: " + localLog, ex); } finally { if (reader != null) reader.close(); if (writer != null) writer.close(); } // Make file appear in the shared file system as the target name only after it is completely copied fs.rename(dest, new Path(fullyQualifiedFileName)); log.info("Copying " + localLog + " complete"); } }); return new LogCopyInfo(result, null); }
From source file:org.apache.accumulo.server.master.tableOps.BulkImport.java
License:Apache License
private String prepareBulkImport(VolumeManager fs, String dir, String tableId) throws IOException { Path bulkDir = createNewBulkDir(fs, tableId); MetadataTableUtil.addBulkLoadInProgressFlag("/" + bulkDir.getParent().getName() + "/" + bulkDir.getName()); Path dirPath = new Path(dir); FileStatus[] mapFiles = fs.listStatus(dirPath); UniqueNameAllocator namer = UniqueNameAllocator.getInstance(); for (FileStatus fileStatus : mapFiles) { String sa[] = fileStatus.getPath().getName().split("\\."); String extension = ""; if (sa.length > 1) { extension = sa[sa.length - 1]; if (!FileOperations.getValidExtensions().contains(extension)) { log.warn(fileStatus.getPath() + " does not have a valid extension, ignoring"); continue; }/*from w ww. j a v a 2s . c om*/ } else { // assume it is a map file extension = Constants.MAPFILE_EXTENSION; } if (extension.equals(Constants.MAPFILE_EXTENSION)) { if (!fileStatus.isDir()) { log.warn(fileStatus.getPath() + " is not a map file, ignoring"); continue; } if (fileStatus.getPath().getName().equals("_logs")) { log.info( fileStatus.getPath() + " is probably a log directory from a map/reduce task, skipping"); continue; } try { FileStatus dataStatus = fs .getFileStatus(new Path(fileStatus.getPath(), MapFile.DATA_FILE_NAME)); if (dataStatus.isDir()) { log.warn(fileStatus.getPath() + " is not a map file, ignoring"); continue; } } catch (FileNotFoundException fnfe) { log.warn(fileStatus.getPath() + " is not a map file, ignoring"); continue; } } String newName = "I" + namer.getNextName() + "." + extension; Path newPath = new Path(bulkDir, newName); try { fs.rename(fileStatus.getPath(), newPath); log.debug("Moved " + fileStatus.getPath() + " to " + newPath); } catch (IOException E1) { log.error("Could not move: " + fileStatus.getPath().toString() + " " + E1.getMessage()); } } return bulkDir.toString(); }
From source file:org.apache.accumulo.server.util.MapFilePerformanceTest.java
License:Apache License
public static String[] createMapFiles(String input, String output, int blocksize, int mapFiles) throws IOException { Configuration conf = CachedConfiguration.getInstance(); FileSystem fs = FileSystem.get(conf); SequenceFile.Reader in = new SequenceFile.Reader(fs, new Path(input + "/" + MapFile.DATA_FILE_NAME), conf); boolean someFilesExist = false; MapFile.Writer out[] = new MapFile.Writer[mapFiles]; for (int i = 0; i < out.length; i++) { if (!fs.exists(new Path(output + "_" + i + "_" + mapFiles))) { out[i] = new MapFile.Writer(conf, fs, output + "_" + i + "_" + mapFiles, Key.class, Value.class, SequenceFile.CompressionType.RECORD); } else {/*from w ww. j ava2 s . c o m*/ someFilesExist = true; } } Key key = new Key(); Value value = new Value(); Random r = new Random(); if (someFilesExist) { System.out.println("NOT Creating " + mapFiles + " map files using a compression block size of " + blocksize + " some files exist"); } else { while (in.next(key, value)) { int i = r.nextInt(mapFiles); out[i].append(key, value); } } String names[] = new String[mapFiles]; in.close(); for (int i = 0; i < out.length; i++) { if (out[i] != null) { out[i].close(); } names[i] = output + "_" + i + "_" + mapFiles; } return names; }
From source file:org.apache.accumulo.server.util.MapFilePerformanceTest.java
License:Apache License
public static void selectRandomKeys(String input, double percentage, ArrayList<Key> keys) throws IOException { System.out.println("Selecting random keys ..."); Configuration conf = CachedConfiguration.getInstance(); FileSystem fs = FileSystem.get(conf); Random r = new Random(); SequenceFile.Reader in = new SequenceFile.Reader(fs, new Path(input + "/" + MapFile.DATA_FILE_NAME), conf); Key key = new Key(); while (in.next(key)) { if (r.nextDouble() < percentage) keys.add(new Key(key)); }//from w ww . j a v a 2s . c om in.close(); Collections.shuffle(keys); System.out.println("Selected " + keys.size() + " random keys."); }