List of usage examples for org.apache.hadoop.fs FileSystem exists
public boolean exists(Path f) throws IOException
From source file:com.dasasian.chok.command.ListIndicesCommand.java
License:Apache License
private long calculateIndexDiskUsage(String index) { Path indexPath = new Path(index); URI indexUri = indexPath.toUri(); try {/*from w w w . ja v a 2 s. c o m*/ FileSystem fileSystem = FileSystem.get(indexUri, new Configuration()); if (!fileSystem.exists(indexPath)) { return -1; } return fileSystem.getContentSummary(indexPath).getLength(); } catch (Exception e) { return -1; } }
From source file:com.dasasian.chok.operation.master.BalanceIndexOperation.java
License:Apache License
@Override public List<OperationId> execute(MasterContext context, List<MasterOperation> runningOperations) throws Exception { InteractionProtocol protocol = context.getProtocol(); IndexMetaData indexMD = protocol.getIndexMD(_indexName); if (indexMD == null) {// could be undeployed in meantime LOG.info("skip balancing for index '" + _indexName + "' cause it is already undeployed"); return null; }//from w w w. java 2 s. c o m if (!canAndShouldRegulateReplication(protocol, indexMD)) { LOG.info("skip balancing for index '" + _indexName + "' cause there is no possible optimization"); return null; } try { FileSystem fileSystem = context.getFileSystem(indexMD); Path path = new Path(indexMD.getPath()); if (!fileSystem.exists(path)) { LOG.warn("skip balancing for index '" + _indexName + "' cause source '" + path + "' does not exists anymore"); return null; } } catch (Exception e) { LOG.error("skip balancing of index '" + _indexName + "' cause failed to access source '" + indexMD.getPath() + "'", e); return null; } LOG.info("balancing shards for index '" + _indexName + "'"); try { List<OperationId> operationIds = distributeIndexShards(context, indexMD, protocol.getLiveNodes(), runningOperations); return operationIds; } catch (Exception e) { ExceptionUtil.rethrowInterruptedException(e); LOG.error("failed to deploy balance " + _indexName, e); handleMasterDeployException(protocol, indexMD, e); return null; } }
From source file:com.dasasian.chok.operation.master.BalanceIndexOperationTest.java
License:Apache License
@Test public void testStopBalance_WhenSourceFileDoesNotExistAnymore() throws Exception { // add nodes and index List<Node> nodes = Mocks.mockNodes(2); List<NodeQueue> nodeQueues = Mocks.publishNodes(getInteractionProtocol(), nodes); deployIndexWithError();//from ww w.j a va 2s . com // index deployed on 2 nodes / desired replica is 3 for (NodeQueue nodeqQueue : nodeQueues) { assertEquals(1, nodeqQueue.size()); } publishShards(nodes, nodeQueues); // add node and then balance again Node node3 = Mocks.mockNode(); NodeQueue nodeQueue3 = Mocks.publishNode(getInteractionProtocol(), node3); assertEquals(0, nodeQueue3.size()); BalanceIndexOperation balanceOperation = new BalanceIndexOperation(testIndex.getIndexName()); FileSystem fileSystem = Mockito.mock(FileSystem.class); Mockito.when(fileSystem.exists(Matchers.any(Path.class))).thenReturn(false); MasterContext spiedContext = spy(masterContext); Mockito.doReturn(fileSystem).when(spiedContext).getFileSystem(Matchers.any(IndexMetaData.class)); List<OperationId> nodeOperations = balanceOperation.execute(spiedContext, EMPTY_LIST); assertEquals(null, nodeOperations); }
From source file:com.dasasian.chok.operation.master.IndexDeployOperation.java
License:Apache License
protected static List<Shard> readShardsFromFs(final String indexName, final String indexPathString) throws IndexDeployException { // get shard folders from source URI uri;//from w w w.jav a 2 s . co m try { uri = new URI(indexPathString); } catch (final URISyntaxException e) { throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE, "unable to parse index path uri '" + indexPathString + "', make sure it starts with file:// or hdfs:// ", e); } FileSystem fileSystem; try { fileSystem = HadoopUtil.getFileSystem(new Path(uri.toString())); } catch (final IOException e) { throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE, "unable to retrive file system for index path '" + indexPathString + "', make sure your path starts with hadoop support prefix like file:// or hdfs://", e); } List<Shard> shards = new ArrayList<>(); try { final Path indexPath = new Path(indexPathString); if (!fileSystem.exists(indexPath)) { throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE, "index path '" + uri + "' does not exists"); } final FileStatus[] listStatus = fileSystem.listStatus(indexPath, new PathFilter() { public boolean accept(final Path aPath) { return !aPath.getName().startsWith("."); } }); for (final FileStatus fileStatus : listStatus) { String shardPath = fileStatus.getPath().toString(); if (fileStatus.isDir() || shardPath.endsWith(".zip")) { shards.add(new Shard(createShardName(indexName, shardPath), shardPath)); } } } catch (final IOException e) { throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE, "could not access index path: " + indexPathString, e); } if (shards.size() == 0) { throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE, "index does not contain any shard"); } return shards; }
From source file:com.datamoin.tajo.tpcds.TpcDSTestUtil.java
License:Apache License
public static void createTables(String database, TajoClient client) throws Exception { String dataDir = getDataDir(); if (dataDir == null || dataDir.isEmpty()) { throw new IOException("No TPCDS_DATA_DIR property. Use -DTPCDS_DATA_DIR=<data dir>"); }//from ww w .j a va2 s. c om if (dataDir.startsWith("hdfs://")) { Path path = new Path(dataDir); FileSystem fs = path.getFileSystem(new Configuration()); for (String eachTable : tableNames) { Path tableDataDir = new Path(path, eachTable); if (!fs.exists(tableDataDir)) { throw new IOException(eachTable + " data dir [" + tableDataDir + "] not exists."); } } } else { File dataDirFile = new File(dataDir); if (!dataDirFile.exists()) { throw new IOException("TPCDS_DATA_DIR [" + dataDir + "] not exists."); } if (dataDirFile.isFile()) { throw new IOException("TPCDS_DATA_DIR [" + dataDir + "] is not a directory."); } for (String eachTable : tableNames) { File tableDataDir = new File(dataDirFile, eachTable); if (!tableDataDir.exists()) { throw new IOException(eachTable + " data dir [" + tableDataDir + "] not exists."); } } } KeyValueSet opt = new KeyValueSet(); opt.set(StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER); LOG.info("Create database: " + database); client.executeQuery("create database if not exists " + database); Path tpcdsResourceURL = new Path(ClassLoader.getSystemResource("tpcds").toString()); Path ddlPath = new Path(tpcdsResourceURL, "ddl"); FileSystem localFs = FileSystem.getLocal(new Configuration()); FileStatus[] files = localFs.listStatus(ddlPath); String dataDirWithPrefix = dataDir; if (dataDir.indexOf("://") < 0) { dataDirWithPrefix = "file://" + dataDir; } for (FileStatus eachFile : files) { if (eachFile.isFile()) { String tableName = eachFile.getPath().getName().split("\\.")[0]; String query = FileUtil.readTextFile(new File(eachFile.getPath().toUri())); query = query.replace("${DB}", database); query = query.replace("${DATA_LOCATION}", dataDirWithPrefix + "/" + tableName); LOG.info("Create table:" + tableName + "," + query); client.executeQuery(query); } } }
From source file:com.datasalt.pangool.utils.DCUtils.java
License:Apache License
/** * Utility method for serializing an object and saving it in the Distributed Cache. * <p>/*from w w w .ja v a 2 s . com*/ * The file where it has been serialized will be saved into a Hadoop Configuration property so that you can call * {@link DCUtils#loadSerializedObjectInDC(Configuration, Class, String, boolean)} to re-instantiate the serialized instance. * * @param obj The obj instance to serialize using Java serialization. * @param serializeToLocalFile The local file where the instance will be serialized. It will be copied to the HDFS and removed. * @param conf The Hadoop Configuration. * @throws FileNotFoundException * @throws IOException * @throws URISyntaxException */ public static void serializeToDC(Object obj, String serializeToLocalFile, Configuration conf) throws FileNotFoundException, IOException, URISyntaxException { File hadoopTmpDir = new File(conf.get("hadoop.tmp.dir")); if (!hadoopTmpDir.exists()) { hadoopTmpDir.mkdir(); } File file = new File(hadoopTmpDir, serializeToLocalFile); FileSystem fS = FileSystem.get(conf); ObjectOutput out = new ObjectOutputStream(new FileOutputStream(file)); out.writeObject(obj); out.close(); if (fS.equals(FileSystem.getLocal(conf))) { return; } String tmpHdfsFolder = conf.get(HDFS_TMP_FOLDER_CONF); if (tmpHdfsFolder == null) { // set the temporary folder for Pangool instances to the temporary of the user that is running the Job // This folder will be used across the cluster for location the instances. This way, tasktrackers // that are being run as different user will still be able to locate this folder tmpHdfsFolder = conf.get("hadoop.tmp.dir"); conf.set(HDFS_TMP_FOLDER_CONF, tmpHdfsFolder); } Path toHdfs = new Path(tmpHdfsFolder, serializeToLocalFile); if (fS.exists(toHdfs)) { // Optionally, copy to DFS if fS.delete(toHdfs, false); } FileUtil.copy(FileSystem.getLocal(conf), new Path(file + ""), FileSystem.get(conf), toHdfs, true, conf); DistributedCache.addCacheFile(toHdfs.toUri(), conf); }
From source file:com.datasalt.pangool.utils.HadoopUtils.java
License:Apache License
public static void deleteIfExists(FileSystem dFs, Path path) throws IOException { if (dFs.exists(path)) { dFs.delete(path, true);/*from ww w . ja v a 2 s.co m*/ } }
From source file:com.datasalt.pangool.utils.HadoopUtils.java
License:Apache License
/** * Reads the content of a file into a String. Return null if the file does not * exist./*from w w w . j a va 2 s .com*/ */ public static String fileToString(FileSystem fs, Path path) throws IOException { if (!fs.exists(path)) { return null; } InputStream is = fs.open(path); InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr); char[] buff = new char[256]; StringBuilder sb = new StringBuilder(); int read; while ((read = br.read(buff)) != -1) { sb.append(buff, 0, read); } br.close(); return sb.toString(); }
From source file:com.datasalt.pangool.utils.InstancesDistributor.java
License:Apache License
/** * Utility method for serializing an object and saving it in a way that later can be recovered * anywhere in the cluster./* w ww. ja v a 2 s. c o m*/ * <p> * The file where it has been serialized will be saved into a Hadoop Configuration property so that you can call * {@link InstancesDistributor#loadInstance(Configuration, Class, String, boolean)} to re-instantiate the serialized instance. * * @param obj The obj instance to serialize using Java serialization. * @param fileName The file name where the instance will be serialized. * @param conf The Hadoop Configuration. * @throws FileNotFoundException * @throws IOException * @throws URISyntaxException */ public static void distribute(Object obj, String fileName, Configuration conf) throws FileNotFoundException, IOException, URISyntaxException { FileSystem fS = FileSystem.get(conf); // set the temporary folder for Pangool instances to the temporary of the user that is running the Job // This folder will be used across the cluster for location the instances. // The default value can be changed by a user-provided one. String tmpHdfsFolder = conf.get(HDFS_TMP_FOLDER_CONF, DEFAULT_HDFS_TMP_FOLDER_CONF_VALUE); Path toHdfs = new Path(tmpHdfsFolder, fileName); if (fS.exists(toHdfs)) { // Optionally, copy to DFS if fS.delete(toHdfs, false); } ObjectOutput out = new ObjectOutputStream(fS.create(toHdfs)); out.writeObject(obj); out.close(); DistributedCache.addCacheFile(toHdfs.toUri(), conf); }
From source file:com.datatorrent.contrib.parser.AbstractCsvParser.java
License:Apache License
@Override public void setup(OperatorContext context) { if (fieldmappingFile != null) { Configuration conf = new Configuration(); try {/*from w w w . j a v a 2 s. c o m*/ FileSystem fs = FileSystem.get(conf); Path filepath = new Path(fieldmappingFile); if (fs.exists(filepath)) { BufferedReader bfr = new BufferedReader(new InputStreamReader(fs.open(filepath))); String str; while ((str = bfr.readLine()) != null) { logger.debug("string is {}", str); String[] temp = str.split(fieldmappingFileDelimiter); Field field = new Field(); field.setName(temp[0]); field.setType(temp[1]); getFields().add(field); } } else { logger.debug( "File containing fields and their data types does not exist.Please specify the fields and data type through properties of this operator."); } } catch (IOException ex) { DTThrowable.rethrow(ex); } } int countKeyValue = getFields().size(); properties = new String[countKeyValue]; processors = new CellProcessor[countKeyValue]; initialise(properties, processors); CsvPreference preference = new CsvPreference.Builder('"', fieldDelimiter, lineDelimiter).build(); csvReader = getReader(csvStringReader, preference); }