Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.accumulo.server.master.tableOps; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map.Entry; import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.ThreadPoolExecutor; import org.apache.accumulo.core.Constants; import org.apache.accumulo.core.client.Connector; import org.apache.accumulo.core.client.Instance; import org.apache.accumulo.core.client.IsolatedScanner; import org.apache.accumulo.core.client.Scanner; import org.apache.accumulo.core.client.impl.ServerClient; import org.apache.accumulo.core.client.impl.Tables; import org.apache.accumulo.core.client.impl.thrift.ClientService; import org.apache.accumulo.core.client.impl.thrift.ClientService.Client; import org.apache.accumulo.core.client.impl.thrift.TableOperation; import org.apache.accumulo.core.client.impl.thrift.TableOperationExceptionType; import org.apache.accumulo.core.client.impl.thrift.ThriftTableOperationException; import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.conf.SiteConfiguration; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.KeyExtent; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.file.FileOperations; import org.apache.accumulo.core.master.state.tables.TableState; import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection; import org.apache.accumulo.core.security.Authorizations; import org.apache.accumulo.core.util.Pair; import org.apache.accumulo.core.util.SimpleThreadPool; import org.apache.accumulo.core.util.UtilWaitThread; import org.apache.accumulo.fate.Repo; import org.apache.accumulo.server.ServerConstants; import org.apache.accumulo.server.client.HdfsZooInstance; import org.apache.accumulo.server.conf.ServerConfiguration; import org.apache.accumulo.server.fs.VolumeManager; import org.apache.accumulo.server.master.LiveTServerSet.TServerConnection; import org.apache.accumulo.server.master.Master; import org.apache.accumulo.server.master.state.TServerInstance; import org.apache.accumulo.server.security.SystemCredentials; import org.apache.accumulo.server.tabletserver.UniqueNameAllocator; import org.apache.accumulo.server.util.MetadataTableUtil; import org.apache.accumulo.server.zookeeper.DistributedWorkQueue; import org.apache.accumulo.server.zookeeper.TransactionWatcher.ZooArbitrator; import org.apache.accumulo.trace.instrument.TraceExecutorService; import org.apache.accumulo.trace.instrument.Tracer; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.MapFile; import org.apache.hadoop.io.Text; import org.apache.log4j.Logger; import org.apache.thrift.TException; /* * Bulk import makes requests of tablet servers, and those requests can take a * long time. Our communications to the tablet server may fail, so we won't know * the status of the request. The master will repeat failed requests so now * there are multiple requests to the tablet server. The tablet server will not * execute the request multiple times, so long as the marker it wrote in the * metadata table stays there. The master needs to know when all requests have * finished so it can remove the markers. Did it start? Did it finish? We can see * that *a* request completed by seeing the flag written into the metadata * table, but we won't know if some other rogue thread is still waiting to start * a thread and repeat the operation. * * The master can ask the tablet server if it has any requests still running. * Except the tablet server might have some thread about to start a request, but * before it has made any bookkeeping about the request. To prevent problems * like this, an Arbitrator is used. Before starting any new request, the tablet * server checks the Arbitrator to see if the request is still valid. * */ public class BulkImport extends MasterRepo { public static final String FAILURES_TXT = "failures.txt"; private static final long serialVersionUID = 1L; private static final Logger log = Logger.getLogger(BulkImport.class); private String tableId; private String sourceDir; private String errorDir; private boolean setTime; public BulkImport(String tableId, String sourceDir, String errorDir, boolean setTime) { this.tableId = tableId; this.sourceDir = sourceDir; this.errorDir = errorDir; this.setTime = setTime; } @Override public long isReady(long tid, Master master) throws Exception { if (!Utils.getReadLock(tableId, tid).tryLock()) return 100; Instance instance = HdfsZooInstance.getInstance(); Tables.clearCache(instance); if (Tables.getTableState(instance, tableId) == TableState.ONLINE) { long reserve1, reserve2; reserve1 = reserve2 = Utils.reserveHdfsDirectory(sourceDir, tid); if (reserve1 == 0) reserve2 = Utils.reserveHdfsDirectory(errorDir, tid); return reserve2; } else { throw new ThriftTableOperationException(tableId, null, TableOperation.BULK_IMPORT, TableOperationExceptionType.OFFLINE, null); } } @Override public Repo<Master> call(long tid, Master master) throws Exception { log.debug(" tid " + tid + " sourceDir " + sourceDir); Utils.getReadLock(tableId, tid).lock(); // check that the error directory exists and is empty VolumeManager fs = master.getFileSystem(); Path errorPath = new Path(errorDir); FileStatus errorStatus = null; try { errorStatus = fs.getFileStatus(errorPath); } catch (FileNotFoundException ex) { // ignored } if (errorStatus == null) throw new ThriftTableOperationException(tableId, null, TableOperation.BULK_IMPORT, TableOperationExceptionType.BULK_BAD_ERROR_DIRECTORY, errorDir + " does not exist"); if (!errorStatus.isDir()) throw new ThriftTableOperationException(tableId, null, TableOperation.BULK_IMPORT, TableOperationExceptionType.BULK_BAD_ERROR_DIRECTORY, errorDir + " is not a directory"); if (fs.listStatus(errorPath).length != 0) throw new ThriftTableOperationException(tableId, null, TableOperation.BULK_IMPORT, TableOperationExceptionType.BULK_BAD_ERROR_DIRECTORY, errorDir + " is not empty"); ZooArbitrator.start(Constants.BULK_ARBITRATOR_TYPE, tid); // move the files into the directory try { String bulkDir = prepareBulkImport(fs, sourceDir, tableId); log.debug(" tid " + tid + " bulkDir " + bulkDir); return new LoadFiles(tableId, sourceDir, bulkDir, errorDir, setTime); } catch (IOException ex) { log.error("error preparing the bulk import directory", ex); throw new ThriftTableOperationException(tableId, null, TableOperation.BULK_IMPORT, TableOperationExceptionType.BULK_BAD_INPUT_DIRECTORY, sourceDir + ": " + ex); } } private Path createNewBulkDir(VolumeManager fs, String tableId) throws IOException { String tableDir = null; loop: for (String dir : fs.getFileSystems().keySet()) { if (this.sourceDir.startsWith(dir)) { for (String path : ServerConstants.getTablesDirs()) { if (path.startsWith(dir)) { tableDir = path; break loop; } } break; } } if (tableDir == null) throw new IllegalStateException(sourceDir + " is not in a known namespace"); Path directory = new Path(tableDir + "/" + tableId); fs.mkdirs(directory); // only one should be able to create the lock file // the purpose of the lock file is to avoid a race // condition between the call to fs.exists() and // fs.mkdirs()... if only hadoop had a mkdir() function // that failed when the dir existed UniqueNameAllocator namer = UniqueNameAllocator.getInstance(); while (true) { Path newBulkDir = new Path(directory, Constants.BULK_PREFIX + namer.getNextName()); if (fs.exists(newBulkDir)) // sanity check throw new IllegalStateException("Dir exist when it should not " + newBulkDir); if (fs.mkdirs(newBulkDir)) return newBulkDir; log.warn("Failed to create " + newBulkDir + " for unknown reason"); UtilWaitThread.sleep(3000); } } private String prepareBulkImport(VolumeManager fs, String dir, String tableId) throws IOException { Path bulkDir = createNewBulkDir(fs, tableId); MetadataTableUtil.addBulkLoadInProgressFlag("/" + bulkDir.getParent().getName() + "/" + bulkDir.getName()); Path dirPath = new Path(dir); FileStatus[] mapFiles = fs.listStatus(dirPath); UniqueNameAllocator namer = UniqueNameAllocator.getInstance(); for (FileStatus fileStatus : mapFiles) { String sa[] = fileStatus.getPath().getName().split("\\."); String extension = ""; if (sa.length > 1) { extension = sa[sa.length - 1]; if (!FileOperations.getValidExtensions().contains(extension)) { log.warn(fileStatus.getPath() + " does not have a valid extension, ignoring"); continue; } } else { // assume it is a map file extension = Constants.MAPFILE_EXTENSION; } if (extension.equals(Constants.MAPFILE_EXTENSION)) { if (!fileStatus.isDir()) { log.warn(fileStatus.getPath() + " is not a map file, ignoring"); continue; } if (fileStatus.getPath().getName().equals("_logs")) { log.info( fileStatus.getPath() + " is probably a log directory from a map/reduce task, skipping"); continue; } try { FileStatus dataStatus = fs .getFileStatus(new Path(fileStatus.getPath(), MapFile.DATA_FILE_NAME)); if (dataStatus.isDir()) { log.warn(fileStatus.getPath() + " is not a map file, ignoring"); continue; } } catch (FileNotFoundException fnfe) { log.warn(fileStatus.getPath() + " is not a map file, ignoring"); continue; } } String newName = "I" + namer.getNextName() + "." + extension; Path newPath = new Path(bulkDir, newName); try { fs.rename(fileStatus.getPath(), newPath); log.debug("Moved " + fileStatus.getPath() + " to " + newPath); } catch (IOException E1) { log.error("Could not move: " + fileStatus.getPath().toString() + " " + E1.getMessage()); } } return bulkDir.toString(); } @Override public void undo(long tid, Master environment) throws Exception { // unreserve source/error directories Utils.unreserveHdfsDirectory(sourceDir, tid); Utils.unreserveHdfsDirectory(errorDir, tid); Utils.getReadLock(tableId, tid).unlock(); } } class CleanUpBulkImport extends MasterRepo { private static final long serialVersionUID = 1L; private static final Logger log = Logger.getLogger(CleanUpBulkImport.class); private String tableId; private String source; private String bulk; private String error; public CleanUpBulkImport(String tableId, String source, String bulk, String error) { this.tableId = tableId; this.source = source; this.bulk = bulk; this.error = error; } @Override public Repo<Master> call(long tid, Master master) throws Exception { log.debug("removing the bulk processing flag file in " + bulk); Path bulkDir = new Path(bulk); MetadataTableUtil .removeBulkLoadInProgressFlag("/" + bulkDir.getParent().getName() + "/" + bulkDir.getName()); MetadataTableUtil.addDeleteEntry(tableId, "/" + bulkDir.getName()); log.debug("removing the metadata table markers for loaded files"); Connector conn = master.getConnector(); MetadataTableUtil.removeBulkLoadEntries(conn, tableId, tid); log.debug("releasing HDFS reservations for " + source + " and " + error); Utils.unreserveHdfsDirectory(source, tid); Utils.unreserveHdfsDirectory(error, tid); Utils.getReadLock(tableId, tid).unlock(); log.debug("completing bulk import transaction " + tid); ZooArbitrator.cleanup(Constants.BULK_ARBITRATOR_TYPE, tid); return null; } } class CompleteBulkImport extends MasterRepo { private static final long serialVersionUID = 1L; private String tableId; private String source; private String bulk; private String error; public CompleteBulkImport(String tableId, String source, String bulk, String error) { this.tableId = tableId; this.source = source; this.bulk = bulk; this.error = error; } @Override public Repo<Master> call(long tid, Master master) throws Exception { ZooArbitrator.stop(Constants.BULK_ARBITRATOR_TYPE, tid); return new CopyFailed(tableId, source, bulk, error); } } class CopyFailed extends MasterRepo { private static final long serialVersionUID = 1L; private String tableId; private String source; private String bulk; private String error; public CopyFailed(String tableId, String source, String bulk, String error) { this.tableId = tableId; this.source = source; this.bulk = bulk; this.error = error; } @Override public long isReady(long tid, Master master) throws Exception { Set<TServerInstance> finished = new HashSet<TServerInstance>(); Set<TServerInstance> running = master.onlineTabletServers(); for (TServerInstance server : running) { try { TServerConnection client = master.getConnection(server); if (client != null && !client.isActive(tid)) finished.add(server); } catch (TException ex) { log.info("Ignoring error trying to check on tid " + tid + " from server " + server + ": " + ex); } } if (finished.containsAll(running)) return 0; return 500; } @Override public Repo<Master> call(long tid, Master master) throws Exception { // This needs to execute after the arbiter is stopped VolumeManager fs = master.getFileSystem(); if (!fs.exists(new Path(error, BulkImport.FAILURES_TXT))) return new CleanUpBulkImport(tableId, source, bulk, error); HashMap<String, String> failures = new HashMap<String, String>(); HashMap<String, String> loadedFailures = new HashMap<String, String>(); FSDataInputStream failFile = fs.open(new Path(error, BulkImport.FAILURES_TXT)); BufferedReader in = new BufferedReader(new InputStreamReader(failFile)); try { String line = null; while ((line = in.readLine()) != null) { Path path = new Path(line); if (!fs.exists(new Path(error, path.getName()))) failures.put("/" + path.getParent().getName() + "/" + path.getName(), line); } } finally { failFile.close(); } /* * I thought I could move files that have no file references in the table. However its possible a clone references a file. Therefore only move files that * have no loaded markers. */ // determine which failed files were loaded Connector conn = master.getConnector(); Scanner mscanner = new IsolatedScanner(conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY)); mscanner.setRange(new KeyExtent(new Text(tableId), null, null).toMetadataRange()); mscanner.fetchColumnFamily(TabletsSection.BulkFileColumnFamily.NAME); for (Entry<Key, Value> entry : mscanner) { if (Long.parseLong(entry.getValue().toString()) == tid) { String loadedFile = entry.getKey().getColumnQualifier().toString(); String absPath = failures.remove(loadedFile); if (absPath != null) { loadedFailures.put(loadedFile, absPath); } } } // move failed files that were not loaded for (String failure : failures.values()) { Path orig = new Path(failure); Path dest = new Path(error, orig.getName()); fs.rename(orig, dest); log.debug("tid " + tid + " renamed " + orig + " to " + dest + ": import failed"); } if (loadedFailures.size() > 0) { DistributedWorkQueue bifCopyQueue = new DistributedWorkQueue(Constants.ZROOT + "/" + HdfsZooInstance.getInstance().getInstanceID() + Constants.ZBULK_FAILED_COPYQ); HashSet<String> workIds = new HashSet<String>(); for (String failure : loadedFailures.values()) { Path orig = new Path(failure); Path dest = new Path(error, orig.getName()); if (fs.exists(dest)) continue; bifCopyQueue.addWork(orig.getName(), (failure + "," + dest).getBytes()); workIds.add(orig.getName()); log.debug("tid " + tid + " added to copyq: " + orig + " to " + dest + ": failed"); } bifCopyQueue.waitUntilDone(workIds); } fs.deleteRecursively(new Path(error, BulkImport.FAILURES_TXT)); return new CleanUpBulkImport(tableId, source, bulk, error); } } class LoadFiles extends MasterRepo { private static final long serialVersionUID = 1L; private static ExecutorService threadPool = null; static { } private static final Logger log = Logger.getLogger(BulkImport.class); private String tableId; private String source; private String bulk; private String errorDir; private boolean setTime; public LoadFiles(String tableId, String source, String bulk, String errorDir, boolean setTime) { this.tableId = tableId; this.source = source; this.bulk = bulk; this.errorDir = errorDir; this.setTime = setTime; } @Override public long isReady(long tid, Master master) throws Exception { if (master.onlineTabletServers().size() == 0) return 500; return 0; } synchronized void initializeThreadPool(Master master) { if (threadPool == null) { int threadPoolSize = master.getSystemConfiguration().getCount(Property.MASTER_BULK_THREADPOOL_SIZE); ThreadPoolExecutor pool = new SimpleThreadPool(threadPoolSize, "bulk import"); pool.allowCoreThreadTimeOut(true); threadPool = new TraceExecutorService(pool); } } @Override public Repo<Master> call(final long tid, final Master master) throws Exception { initializeThreadPool(master); final SiteConfiguration conf = ServerConfiguration.getSiteConfiguration(); VolumeManager fs = master.getFileSystem(); List<FileStatus> files = new ArrayList<FileStatus>(); for (FileStatus entry : fs.listStatus(new Path(bulk))) { files.add(entry); } log.debug("tid " + tid + " importing " + files.size() + " files"); Path writable = new Path(this.errorDir, ".iswritable"); if (!fs.createNewFile(writable)) { // Maybe this is a re-try... clear the flag and try again fs.delete(writable); if (!fs.createNewFile(writable)) throw new ThriftTableOperationException(tableId, null, TableOperation.BULK_IMPORT, TableOperationExceptionType.BULK_BAD_ERROR_DIRECTORY, "Unable to write to " + this.errorDir); } fs.delete(writable); final Set<String> filesToLoad = Collections.synchronizedSet(new HashSet<String>()); for (FileStatus f : files) filesToLoad.add(f.getPath().toString()); final int RETRIES = Math.max(1, conf.getCount(Property.MASTER_BULK_RETRIES)); for (int attempt = 0; attempt < RETRIES && filesToLoad.size() > 0; attempt++) { List<Future<List<String>>> results = new ArrayList<Future<List<String>>>(); if (master.onlineTabletServers().size() == 0) log.warn("There are no tablet server to process bulk import, waiting (tid = " + tid + ")"); while (master.onlineTabletServers().size() == 0) { UtilWaitThread.sleep(500); } // Use the threadpool to assign files one-at-a-time to the server final List<String> loaded = Collections.synchronizedList(new ArrayList<String>()); for (final String file : filesToLoad) { results.add(threadPool.submit(new Callable<List<String>>() { @Override public List<String> call() { List<String> failures = new ArrayList<String>(); ClientService.Client client = null; String server = null; try { // get a connection to a random tablet server, do not prefer cached connections because // this is running on the master and there are lots of connections to tablet servers // serving the !METADATA tablets long timeInMillis = master.getConfiguration().getConfiguration() .getTimeInMillis(Property.MASTER_BULK_TIMEOUT); Pair<String, Client> pair = ServerClient.getConnection(master.getInstance(), false, timeInMillis); client = pair.getSecond(); server = pair.getFirst(); List<String> attempt = Collections.singletonList(file); log.debug("Asking " + pair.getFirst() + " to bulk import " + file); List<String> fail = client.bulkImportFiles(Tracer.traceInfo(), SystemCredentials.get().toThrift(master.getInstance()), tid, tableId, attempt, errorDir, setTime); if (fail.isEmpty()) { loaded.add(file); } else { failures.addAll(fail); } } catch (Exception ex) { log.error("rpc failed server:" + server + ", tid:" + tid + " " + ex); } finally { ServerClient.close(client); } return failures; } })); } Set<String> failures = new HashSet<String>(); for (Future<List<String>> f : results) failures.addAll(f.get()); filesToLoad.removeAll(loaded); if (filesToLoad.size() > 0) { log.debug( "tid " + tid + " attempt " + (attempt + 1) + " " + sampleList(filesToLoad, 10) + " failed"); UtilWaitThread.sleep(100); } } FSDataOutputStream failFile = fs.create(new Path(errorDir, BulkImport.FAILURES_TXT), true); BufferedWriter out = new BufferedWriter(new OutputStreamWriter(failFile)); try { for (String f : filesToLoad) { out.write(f); out.write("\n"); } } finally { out.close(); } // return the next step, which will perform cleanup return new CompleteBulkImport(tableId, source, bulk, errorDir); } static String sampleList(Collection<?> potentiallyLongList, int max) { StringBuffer result = new StringBuffer(); result.append("["); int i = 0; for (Object obj : potentiallyLongList) { result.append(obj); if (i >= max) { result.append("..."); break; } else { result.append(", "); } i++; } if (i < max) result.delete(result.length() - 2, result.length()); result.append("]"); return result.toString(); } }