List of usage examples for org.apache.hadoop.fs Path toString
@Override
public String toString()
From source file:com.datasalt.pangool.solr.SolrRecordWriter.java
License:Apache License
private Path findSolrConfig(Configuration conf) throws IOException { Path solrHome = null;//from w w w . j a v a2 s . co m // we added these lines to make this patch work on Hadoop 0.20.2 FileSystem localFs = FileSystem.getLocal(conf); if (FileSystem.get(conf).equals(localFs)) { return new Path(localSolrHome); } // end-of-addition Path[] localArchives = DistributedCache.getLocalCacheArchives(conf); if (localArchives.length == 0) { throw new IOException(String.format("No local cache archives, where is %s", zipName)); } for (Path unpackedDir : localArchives) { // Only logged if debugging if (LOG.isDebugEnabled()) { LOG.debug(String.format("Examining unpack directory %s for %s", unpackedDir, zipName)); ProcessBuilder lsCmd = new ProcessBuilder( new String[] { "/bin/ls", "-lR", unpackedDir.toString() }); lsCmd.redirectErrorStream(); Process ls = lsCmd.start(); try { byte[] buf = new byte[16 * 1024]; InputStream all = ls.getInputStream(); int count; while ((count = all.read(buf)) > 0) { System.err.write(buf, 0, count); } } catch (IOException ignore) { } System.err.format("Exit value is %d%n", ls.exitValue()); } if (unpackedDir.getName().equals(zipName)) { solrHome = unpackedDir; break; } } return solrHome; }
From source file:com.datasalt.pangool.solr.TupleSolrOutputFormat.java
License:Apache License
private void setupSolrHomeCache(File solrHome, Configuration conf) throws IOException { if (solrHome == null || !(solrHome.exists() && solrHome.isDirectory())) { throw new IOException("Invalid solr.home: " + solrHome); }/* w ww .ja va2 s. c o m*/ localSolrHome = solrHome.getAbsolutePath(); File tmpZip = File.createTempFile("solr", "zip"); createZip(solrHome, tmpZip); // Make a reasonably unique name for the zip file in the distributed cache // to avoid collisions if multiple jobs are running. String hdfsZipName = UUID.randomUUID().toString() + '.' + ZIP_FILE_BASE_NAME; zipName = hdfsZipName; Path zipPath = new Path("/tmp", zipName); FileSystem fs = FileSystem.get(conf); fs.copyFromLocalFile(new Path(tmpZip.toString()), zipPath); final URI baseZipUrl = fs.getUri().resolve(zipPath.toString() + '#' + zipName); DistributedCache.addCacheArchive(baseZipUrl, conf); LOG.info("Set Solr cache: " + Arrays.asList(DistributedCache.getCacheArchives(conf))); }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.PangoolMultipleInputs.java
License:Apache License
/** * Add a {@link Path} with a custom {@link InputFormat} and {@link Mapper} to the list of inputs for the map-reduce * job. Returns the instance files created. * /* www .j ava 2 s . c o m*/ * @param job * The {@link Job} * @param path * {@link Path} to be added to the list of inputs for the job * @param inputFormat * {@link InputFormat} class to use for this path * @param mapperInstance * {@link Mapper} instance to use * @throws IOException * @throws FileNotFoundException */ public static Set<String> addInputPath(Job job, Path path, InputFormat inputFormat, Mapper mapperInstance, Map<String, String> specificContext) throws FileNotFoundException, IOException { Set<String> instanceFiles = new HashSet<String>(); // Serialize the Mapper instance String uniqueNameMapper = UUID.randomUUID().toString() + '.' + "mapper.dat"; try { InstancesDistributor.distribute(mapperInstance, uniqueNameMapper, job.getConfiguration()); instanceFiles.add(uniqueNameMapper); } catch (URISyntaxException e) { throw new IOException(e); } // Serialize the Input Format String uniqueNameInputFormat = UUID.randomUUID().toString() + '.' + "inputFormat.dat"; try { InstancesDistributor.distribute(inputFormat, uniqueNameInputFormat, job.getConfiguration()); instanceFiles.add(uniqueNameInputFormat); } catch (URISyntaxException e) { throw new IOException(e); } for (Map.Entry<String, String> contextKeyValue : specificContext.entrySet()) { PangoolMultipleInputs.addInputContext(job, uniqueNameInputFormat, contextKeyValue.getKey(), contextKeyValue.getValue()); } addInputPath(job, path, uniqueNameInputFormat); Configuration conf = job.getConfiguration(); String mapperMapping = path.toString() + ";" + uniqueNameMapper; String mappers = conf.get(PANGOOL_INPUT_DIR_MAPPERS_CONF); conf.set(PANGOOL_INPUT_DIR_MAPPERS_CONF, mappers == null ? mapperMapping : mappers + "," + mapperMapping); job.setMapperClass(DelegatingMapper.class); return instanceFiles; }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.PangoolMultipleInputs.java
License:Apache License
private static void addInputPath(Job job, Path path, String inputFormatInstance) { /*//w w w.ja v a 2 s . c o m * Only internal -> not allowed to add inputs without associated InputProcessor files */ String inputFormatMapping = path.toString() + ";" + inputFormatInstance; Configuration conf = job.getConfiguration(); String inputFormats = conf.get(PANGOOL_INPUT_DIR_FORMATS_CONF); conf.set(PANGOOL_INPUT_DIR_FORMATS_CONF, inputFormats == null ? inputFormatMapping : inputFormats + "," + inputFormatMapping); job.setInputFormatClass(DelegatingInputFormat.class); }
From source file:com.datasalt.pangool.utils.DCUtils.java
License:Apache License
/** * Given a file post-fix, locate a file in the DistributedCache. It iterates over all the local files and returns the * first one that meets this condition.//from ww w. ja v a 2s .c om * * @param conf * The Hadoop Configuration. * @param filePostFix * The file post-fix. * @throws IOException */ public static Path locateFileInDC(Configuration conf, String filePostFix) throws IOException { FileSystem fS = FileSystem.get(conf); Path locatedFile = null; if (fS.equals(FileSystem.getLocal(conf))) { // We use the File Java API in local because the Hadoop Path, FileSystem, etc is too slow for tests that // need to call this method a lot File tmpFolder = new File(conf.get("hadoop.tmp.dir")); for (File file : tmpFolder.listFiles()) { if (file.getName().endsWith(filePostFix)) { locatedFile = new Path(file.toString()); break; } } } else { Path tmpHdfsFolder = new Path(conf.get(HDFS_TMP_FOLDER_CONF, conf.get("hadoop.tmp.dir"))); for (FileStatus fSt : fS.listStatus(tmpHdfsFolder)) { Path path = fSt.getPath(); if (path.toString().endsWith(filePostFix)) { locatedFile = path; break; } } } return locatedFile; }
From source file:com.datasalt.pangool.utils.test.AbstractHadoopTestLibrary.java
License:Apache License
public void cleanUp() throws IOException { for (Map.Entry<String, Object> entry : inputs.entrySet()) { trash(entry.getKey());//from w ww. j a v a2s . c om } for (Map.Entry<String, List<Pair<Object, Object>>> entry : outputs.entrySet()) { Path p = new Path(entry.getKey()); if (p.toString().contains("-0000")) { p = p.getParent(); } trash(p.toString()); } }
From source file:com.datasalt.utils.commons.HadoopUtils.java
License:Apache License
/** * Given a file post-fix, locate a file in the DistributedCache * //from ww w. java 2 s . c om * @param conf * @param filePostFix * * @throws IOException */ public static Path locateFileInDC(Configuration conf, String filePostFix) throws IOException { Path locatedFile = null; Path[] paths = DistributedCache.getLocalCacheFiles(conf); if (paths == null) { return null; } for (Path p : paths) { if (p.toString().endsWith(filePostFix)) { locatedFile = p; break; } } return locatedFile; }
From source file:com.datasalt.utils.mapred.joiner.MultiJoiner.java
License:Apache License
/** * Adds a CHANNELED input specification. A channeled input specification is a channel associated to a Mapper and a * input file or glob. The user will implement a {@link MultiJoinChanneledMapper} which will be tied to a single * channel./*from w ww. j a v a 2s.c o m*/ * <p> * The user must be consistent with the channel numbers it provides. For instance, in case that two or more different * files must belong to the same channel. * * @param channel * @param location * @param channelClass * @param inputFormat * @param mapper * * @throws IOException */ public MultiJoiner addChanneledInput(Integer channel, Path location, Class<? extends Object> channelClass, Class<? extends InputFormat> inputFormat, Class<? extends MultiJoinChanneledMapper> mapper) throws IOException { /* * Configure the MultiJoiner */ setChannelDatumClass(channel, channelClass); FileSystem fS = FileSystem.get(getJob().getConfiguration()); if (location.toString().contains("*")) { // is a glob for (FileStatus fSt : fS.globStatus(location, hiddenFileFilter)) { // expands the glob addChanneledInputInner(channel, fSt.getPath(), channelClass, inputFormat, mapper); } } else if (fS.getFileStatus(location).isDir()) { for (FileStatus fSt : fS.listStatus(location, hiddenFileFilter)) { // expands the glob addChanneledInputInner(channel, fSt.getPath(), channelClass, inputFormat, mapper); } } else { addChanneledInputInner(channel, location, channelClass, inputFormat, mapper); } return this; }
From source file:com.datasalt.utils.mapred.joiner.MultiJoiner.java
License:Apache License
private void addChanneledInputInner(Integer channel, Path location, Class<? extends Object> channelClass, Class<? extends InputFormat> inputFormat, Class<? extends MultiJoinChanneledMapper> mapper) throws IOException { FileSystem fS = location.getFileSystem(getJob().getConfiguration()); if (!location.toString().startsWith("/")) { // relative path location = new Path(fS.getWorkingDirectory(), location); } else {/*from w w w .j a v a 2s. c o m*/ // absolute path location = new Path(fS.getUri() + location.toString()); } addInOrder(channel + "", MultiJoinChanneledMapper.MULTIJOINER_CHANNELED_CHANNELS, getJob().getConfiguration()); addInOrder(location.toString(), MultiJoinChanneledMapper.MULTIJOINER_CHANNELED_FILES, getJob().getConfiguration()); System.out.println("Adding file " + location + " with mapper " + mapper.getName()); MultipleInputs.addInputPath(getJob(), location, inputFormat, mapper); }
From source file:com.datatorrent.flume.source.HdfsTestSource.java
License:Open Source License
private List<String> findFiles() throws IOException { List<String> files = Lists.newArrayList(); Path directoryPath = new Path(directory); FileSystem lfs = FileSystem.newInstance(directoryPath.toUri(), configuration); try {/* www.ja v a 2 s .c om*/ logger.debug("checking for new files in {}", directoryPath); RemoteIterator<LocatedFileStatus> statuses = lfs.listFiles(directoryPath, true); for (; statuses.hasNext();) { FileStatus status = statuses.next(); Path path = status.getPath(); String filePathStr = path.toString(); if (!filePathStr.endsWith(".gz")) { continue; } logger.debug("new file {}", filePathStr); files.add(path.toString()); } } catch (FileNotFoundException e) { logger.warn("Failed to list directory {}", directoryPath, e); throw new RuntimeException(e); } finally { lfs.close(); } return files; }