List of usage examples for org.apache.hadoop.fs FileSystem open
public FSDataInputStream open(PathHandle fd) throws IOException
From source file:com.bizosys.hsearch.console.ui.SearchServlet.java
License:Apache License
private String getFileData(String path) throws IOException { StringBuilder sb = new StringBuilder(); BufferedReader br = null;//w w w . j ava2s .c o m FileSystem fs = null; try { Path hadoopPath = new Path(path); fs = FileSystem.get(conf); if (fs.exists(hadoopPath)) { br = new BufferedReader(new InputStreamReader(fs.open(hadoopPath))); String line = null; boolean first = true; while ((line = br.readLine()) != null) { if (first) first = false; else sb.append('\n'); sb.append(line); } } } catch (FileNotFoundException fex) { System.err.println("Cannot read from path " + path); throw new IOException(fex); } catch (Exception pex) { System.err.println("Error : " + path); throw new IOException(pex); } finally { if (null != br) try { br.close(); } catch (Exception e) { } if (null != fs) try { fs.close(); } catch (Exception e) { } } return sb.toString(); }
From source file:com.bizosys.hsearch.kv.indexer.KVIndexer.java
License:Apache License
public static FieldMapping createFieldMapping(Configuration conf, String path, StringBuilder sb) throws IOException { try {//from w w w. j a v a2 s .co m FieldMapping fm = null; BufferedReader br = null; Path hadoopPath = new Path(path); FileSystem fs = FileSystem.get(conf); if (fs.exists(hadoopPath)) { br = new BufferedReader(new InputStreamReader(fs.open(hadoopPath))); String line = null; while ((line = br.readLine()) != null) { sb.append(line); } fm = new FieldMapping(); fm.parseXMLString(sb.toString()); } else { fm = FieldMapping.getInstance(path); } IdSearchLog.l.debug("Field mapping instance create for " + path); return fm; } catch (FileNotFoundException fex) { System.err.println("Cannot read from path " + path); throw new IOException(fex); } catch (ParseException pex) { System.err.println("Cannot Parse File " + path); throw new IOException(pex); } catch (Exception pex) { System.err.println("Error : " + path); throw new IOException(pex); } }
From source file:com.bizosys.unstructured.StopwordAndSynonymAnalyzer.java
License:Apache License
public void load() throws IOException { InputStream stopwordStream = null; InputStream synonumStream = null; Configuration hsearchConf = HSearchConfig.getInstance().getConfiguration(); String filenameSynonum = hsearchConf.get("synonyms.file.location", "synonyms.txt"); String filenameStopword = hsearchConf.get("stopword.file.location", "stopwords.txt"); isLowerCaseEnabled = hsearchConf.getBoolean("lucene.analysis.lowercasefilter", true); isAccentFilterEnabled = hsearchConf.getBoolean("lucene.analysis.accentfilter", true); isSnoballStemEnabled = hsearchConf.getBoolean("lucene.analysis.snowballfilter", true); isStopFilterEnabled = hsearchConf.getBoolean("lucene.analysis.stopfilter", true); if (null != stopwords) return;/*from ww w . j a v a 2 s . co m*/ org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration(); FileSystem fs = FileSystem.get(conf); if (null != fs) { /** * STOPWORD */ Path stopPath = new Path(filenameStopword); if (fs.exists(stopPath)) { if (DEBUG_ENABLED) IdSearchLog.l.debug("Loading Stopword file from HDFS :" + stopPath.toString()); stopwordStream = fs.open(stopPath); } else { IdSearchLog.l.fatal("Stopword file not available in HDFS :" + stopPath.toString()); } /** * SYNONUM */ Path synPath = new Path(filenameSynonum); if (fs.exists(synPath)) { synonumStream = fs.open(synPath); if (DEBUG_ENABLED) IdSearchLog.l.debug("Loading synonym file from HDFS :" + filenameSynonum.toString()); } else { IdSearchLog.l.fatal("Synonym file not available in HDFS :" + filenameSynonum.toString()); IdSearchLog.l.fatal("Working Directory :" + fs.getWorkingDirectory().getName()); } } ClassLoader classLoader = null; if (null == stopwordStream || null == synonumStream) { classLoader = Thread.currentThread().getContextClassLoader(); } if (null == stopwordStream) { URL stopUrl = classLoader.getResource(filenameStopword); if (null != stopUrl) { String stopFile = stopUrl.getPath(); if (null != stopFile) { File stopwordFile = new File(stopFile); if (stopwordFile.exists() && stopwordFile.canRead()) { stopwordStream = new FileInputStream(stopwordFile); if (DEBUG_ENABLED) IdSearchLog.l .debug("Loading Stopword file from Local :" + stopwordFile.getAbsolutePath()); } else { IdSearchLog.l.fatal("Stopword file not available at :" + stopwordFile.getAbsolutePath()); IdSearchLog.l.fatal("Working Directory :" + fs.getHomeDirectory().getName()); } } else { if (DEBUG_ENABLED) IdSearchLog.l.debug("Ignoring Stopwords > " + filenameStopword); } } } if (null == synonumStream) { URL synUrl = classLoader.getResource(filenameSynonum); if (null != synUrl) { String synFileName = synUrl.getPath(); if (null != synFileName) { File synFile = new File(synFileName); if (synFile.exists() && synFile.canRead()) { synonumStream = new FileInputStream(synFile); if (DEBUG_ENABLED) IdSearchLog.l.debug("Loading Synonum file from Local :" + synFile.getAbsolutePath()); } else { if (DEBUG_ENABLED) IdSearchLog.l.debug("Synonum file not available at :" + synFile.getAbsolutePath()); } } else { if (DEBUG_ENABLED) IdSearchLog.l.debug("Ignoring Synonyms > " + filenameSynonum); } } } load(stopwordStream, synonumStream); }
From source file:com.blackberry.logdriver.util.QueryIndex.java
License:Apache License
public static void main(String[] args) throws IOException, JSONException, ParseException { Map<String, Map<String, Map<String, Map<String, Component>>>> data = new HashMap<String, Map<String, Map<String, Map<String, Component>>>>(); boolean printComponents = false; boolean printDates = false; boolean printSizes = false; boolean printIngest = false; boolean greppable = false; boolean datesSet = false; Date startDate = new Date(0); Date endDate = new Date(); boolean printIngestOverTime = false; String[] pathArgs = new String[] { ".*", ".*", ".*", ".*" }; int pathLevel = 0; inputFormat.setTimeZone(TimeZone.getTimeZone("UTC")); outputFormat.setTimeZone(TimeZone.getTimeZone("UTC")); // Set up HDFS filesystem Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); // Get path of latest index Path indexPath = getLatestIndex(fs); if (indexPath != null) { BufferedReader indexFile = new BufferedReader(new InputStreamReader(fs.open(indexPath))); String thisLine = indexFile.readLine(); readJSONIntoMap(data, thisLine); } else {/*from w w w . j a v a2s . co m*/ System.out.println("No valid index files found."); System.exit(0); } for (int i = 0; i < args.length; i++) { if (args[i].matches("^-.*")) { // This is a command line switch if (args[i].matches("-p")) { printComponents = true; } else if (args[i].matches("-d")) { printDates = true; } else if (args[i].matches("-s")) { printSizes = true; } else if (args[i].matches("-i")) { printIngest = true; } else if (args[i].matches("-l")) { greppable = true; } else if (args[i].matches("-t")) { try { startDate = LogStats.roundDownToHour(new Date((Long.parseLong(args[i + 1]) * 1000))); endDate = LogStats.roundUpToHour(new Date((Long.parseLong(args[i + 2]) * 1000))); datesSet = true; } catch (ParseException e) { System.out.println("Can't parse start and end dates."); System.exit(0); } if (endDate.before(startDate)) { System.out.println("Can't plot over a negative time range."); System.exit(0); } i += 2; } else if (args[i].matches("-a")) { printIngestOverTime = true; } else { System.out.println("Invalid switch " + args[i]); System.exit(0); } } else { try { pathArgs[pathLevel] = args[i].substring(1, args[i].length() - 1); if (pathArgs[pathLevel].matches("[*]")) { pathArgs[pathLevel] = ".*"; } pathLevel++; } catch (ArrayIndexOutOfBoundsException e) { System.out.println("Error: Too many path arguments."); System.exit(0); } } } if (printIngestOverTime && (!datesSet || !printComponents)) { System.out.println("-stats set without -p or -t, aborting."); System.exit(0); } Set<Component> components = matchedComponents(data, pathArgs[0], pathArgs[1], pathArgs[2], pathArgs[3], startDate, endDate); if (components.size() > 0) { if (printComponents) { printComponents(fs, components, printDates, printSizes, printIngest, greppable, startDate, endDate, printIngestOverTime); } double totalIngestDays = ((getLatestDate(components).getTime() - getEarliestDate(components).getTime()) / oneDay) + 1; double totalIngestRate = totalSize(components) / totalIngestDays; System.out.println("\nTotals for all matched components:\n\n" + " Total Size: " + formatByteSize(totalSize(components)) + "\n" + " Ingest Rate: " + formatByteSize(totalIngestRate) + "/day\n" + " Earliest Date: " + outputFormat.format(getEarliestDate(components)) + "\n" + " Latest Date: " + outputFormat.format(getLatestDate(components)) + "\n" + " Total Time: " + (int) totalIngestDays + " days\n"); } else { System.out.println("\nNo components found.\n"); } }
From source file:com.blackberry.logtools.LogTools.java
License:Apache License
public void runPigRemote(Map<String, String> params, String out, String tmp, boolean quiet, boolean silent, Configuration conf, String queue_name, String additional_jars, File pig_tmp, ArrayList<String> D_options, String PIG_DIR, FileSystem fs) { //Set input parameter for pig job - calling Pig directly params.put("tmpdir", StringEscapeUtils.escapeJava(tmp)); //Check for an out of '-', meaning write to stdout String pigout;//w ww.j a va 2 s. c o m if (out.equals("-")) { params.put("out", tmp + "/final"); pigout = tmp + "/final"; } else { params.put("out", StringEscapeUtils.escapeJava(out)); pigout = StringEscapeUtils.escapeJava(out); } try { logConsole(quiet, silent, info, "Running PIG Command"); conf.set("mapred.job.queue.name", queue_name); conf.set("pig.additional.jars", additional_jars); conf.set("pig.exec.reducers.bytes.per.reducer", Integer.toString(100 * 1000 * 1000)); conf.set("pig.logfile", pig_tmp.toString()); conf.set("hadoopversion", "23"); //PIG temp directory set to be able to delete all temp files/directories conf.set("pig.temp.dir", tmp); //Setting output separator for logdriver String DEFAULT_OUTPUT_SEPARATOR = "\t"; Charset UTF_8 = Charset.forName("UTF-8"); String outputSeparator = conf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { logConsole(true, true, error, "The output separator must be a single byte in UTF-8."); System.exit(1); } conf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); dOpts(D_options, silent, out, conf); PigServer pigServer = new PigServer(ExecType.MAPREDUCE, conf); pigServer.registerScript(PIG_DIR + "/formatAndSort.pg", params); } catch (Exception e) { e.printStackTrace(); System.exit(1); } logConsole(quiet, silent, warn, "PIG Job Completed."); if (out.equals("-")) { System.out.println(";#################### DATA RESULTS ####################"); try { //Create filter to find files with the results from PIG job PathFilter filter = new PathFilter() { public boolean accept(Path file) { return file.getName().contains("part-"); } }; //Find the files in the directory, open and printout results FileStatus[] status = fs.listStatus(new Path(tmp + "/final"), filter); for (int i = 0; i < status.length; i++) { BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(status[i].getPath()))); String line; line = br.readLine(); while (line != null) { System.out.println(line); line = br.readLine(); } } System.out.println(";#################### END OF RESULTS ####################"); } catch (IOException e) { e.printStackTrace(); System.exit(1); } } else { System.out.println( ";#################### Done. Search results are in " + pigout + " ####################"); } }
From source file:com.blm.orc.OrcRawRecordMerger.java
License:Apache License
/** * Read the side file to get the last flush length. * @param fs the file system to use//from w w w . j av a 2 s . c o m * @param deltaFile the path of the delta file * @return the maximum size of the file to use * @throws IOException */ private static long getLastFlushLength(FileSystem fs, Path deltaFile) throws IOException { Path lengths = OrcRecordUpdater.getSideFile(deltaFile); long result = Long.MAX_VALUE; try { FSDataInputStream stream = fs.open(lengths); result = -1; while (stream.available() > 0) { result = stream.readLong(); } stream.close(); return result; } catch (IOException ioe) { return result; } }
From source file:com.blm.orc.ReaderImpl.java
License:Apache License
private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs, Path path, long maxFileLength) throws IOException { FSDataInputStream file = fs.open(path); // figure out the size of the file using the option or filesystem long size;// www . j av a 2 s.com if (maxFileLength == Long.MAX_VALUE) { size = fs.getFileStatus(path).getLen(); } else { size = maxFileLength; } //read last bytes into buffer to get PostScript int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS); file.seek(size - readSize); ByteBuffer buffer = ByteBuffer.allocate(readSize); file.readFully(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining()); //read the PostScript //get length of PostScript int psLen = buffer.get(readSize - 1) & 0xff; ensureOrcFooter(file, path, psLen, buffer); int psOffset = readSize - 1 - psLen; CodedInputStream in = CodedInputStream.newInstance(buffer.array(), buffer.arrayOffset() + psOffset, psLen); OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in); checkOrcVersion(LOG, path, ps.getVersionList()); int footerSize = (int) ps.getFooterLength(); int metadataSize = (int) ps.getMetadataLength(); OrcFile.WriterVersion writerVersion; if (ps.hasWriterVersion()) { writerVersion = getWriterVersion(ps.getWriterVersion()); } else { writerVersion = OrcFile.WriterVersion.ORIGINAL; } //check compression codec switch (ps.getCompression()) { case NONE: break; case ZLIB: break; case SNAPPY: break; case LZO: break; default: throw new IllegalArgumentException("Unknown compression"); } //check if extra bytes need to be read int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize); if (extra > 0) { //more bytes need to be read, seek back to the right place and read extra bytes file.seek(size - readSize - extra); ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize); file.readFully(extraBuf.array(), extraBuf.arrayOffset() + extraBuf.position(), extra); extraBuf.position(extra); //append with already read bytes extraBuf.put(buffer); buffer = extraBuf; buffer.position(0); buffer.limit(footerSize + metadataSize); } else { //footer is already in the bytes in buffer, just adjust position, length buffer.position(psOffset - footerSize - metadataSize); buffer.limit(psOffset); } // remember position for later buffer.mark(); file.close(); return new FileMetaInfo(ps.getCompression().toString(), (int) ps.getCompressionBlockSize(), (int) ps.getMetadataLength(), buffer, ps.getVersionList(), writerVersion); }
From source file:com.blm.orc.RecordReaderImpl.java
License:Apache License
RecordReaderImpl(List<StripeInformation> stripes, FileSystem fileSystem, Path path, Reader.Options options, List<OrcProto.Type> types, CompressionCodec codec, int bufferSize, long strideRate, Configuration conf) throws IOException { this.file = fileSystem.open(path); this.codec = codec; this.types = types; this.bufferSize = bufferSize; this.included = options.getInclude(); this.conf = conf; this.sarg = options.getSearchArgument(); if (sarg != null) { sargLeaves = sarg.getLeaves();/*from w ww . ja v a 2 s .c o m*/ filterColumns = mapSargColumns(sargLeaves, options.getColumnNames(), 0); } else { sargLeaves = null; filterColumns = null; } long rows = 0; long skippedRows = 0; long offset = options.getOffset(); long maxOffset = options.getMaxOffset(); for (StripeInformation stripe : stripes) { long stripeStart = stripe.getOffset(); if (offset > stripeStart) { skippedRows += stripe.getNumberOfRows(); } else if (stripeStart < maxOffset) { this.stripes.add(stripe); rows += stripe.getNumberOfRows(); } } final boolean zeroCopy = (conf != null) && (HiveConf.getBoolVar(conf, HIVE_ORC_ZEROCOPY)); if (zeroCopy && (codec == null || ((codec instanceof DirectDecompressionCodec) && ((DirectDecompressionCodec) codec).isAvailable()))) { /* codec is null or is available */ this.zcr = ShimLoader.getHadoopShims().getZeroCopyReader(file, pool); } else { this.zcr = null; } firstRow = skippedRows; totalRowCount = rows; reader = createTreeReader(path, 0, types, included, conf); indexes = new OrcProto.RowIndex[types.size()]; rowIndexStride = strideRate; advanceToNextRow(0L); }
From source file:com.bluedata.flume.sinks.AvroEventSerializer.java
License:Apache License
private Schema loadFromUrl(String schemaUrl) throws IOException { Configuration conf = new Configuration(); Schema.Parser parser = new Schema.Parser(); if (schemaUrl.toLowerCase(Locale.ENGLISH).startsWith("hdfs://") || schemaUrl.toLowerCase(Locale.ENGLISH).startsWith("dtap://")) { FileSystem fs = FileSystem.get(conf); FSDataInputStream input = null;//from w w w .ja v a 2 s. c o m try { input = fs.open(new Path(schemaUrl)); return parser.parse(input); } finally { if (input != null) { input.close(); } } } else { InputStream is = null; try { is = new URL(schemaUrl).openStream(); return parser.parse(is); } finally { if (is != null) { is.close(); } } } }
From source file:com.bonc.mr_roamRecognition_hjpt.comm.PathRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE); start = split.getStart();//from w ww . j a v a 2 s .c om end = start + split.getLength(); final Path file = split.getPath(); path = split.getPath().toString(); // open the file and seek to the start of the split final FileSystem fs = file.getFileSystem(job); fileIn = fs.open(file); CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file); if (null != codec) { isCompressedInput = true; decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes); start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; } else { in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes); filePosition = fileIn; } } else { fileIn.seek(start); in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes); filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += in.readLine(new Text(), 0, maxBytesToConsume(start)); } this.pos = start; }