Example usage for org.apache.hadoop.fs FileSystem open

List of usage examples for org.apache.hadoop.fs FileSystem open

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem open.

Prototype

public FSDataInputStream open(PathHandle fd) throws IOException 

Source Link

Document

Open an FSDataInputStream matching the PathHandle instance.

Usage

From source file:com.bizosys.hsearch.console.ui.SearchServlet.java

License:Apache License

private String getFileData(String path) throws IOException {
    StringBuilder sb = new StringBuilder();
    BufferedReader br = null;//w  w  w  . j  ava2s  .c  o m
    FileSystem fs = null;
    try {
        Path hadoopPath = new Path(path);
        fs = FileSystem.get(conf);
        if (fs.exists(hadoopPath)) {
            br = new BufferedReader(new InputStreamReader(fs.open(hadoopPath)));
            String line = null;
            boolean first = true;
            while ((line = br.readLine()) != null) {
                if (first)
                    first = false;
                else
                    sb.append('\n');
                sb.append(line);
            }
        }
    } catch (FileNotFoundException fex) {
        System.err.println("Cannot read from path " + path);
        throw new IOException(fex);
    } catch (Exception pex) {
        System.err.println("Error : " + path);
        throw new IOException(pex);
    } finally {
        if (null != br)
            try {
                br.close();
            } catch (Exception e) {
            }
        if (null != fs)
            try {
                fs.close();
            } catch (Exception e) {
            }
    }
    return sb.toString();
}

From source file:com.bizosys.hsearch.kv.indexer.KVIndexer.java

License:Apache License

public static FieldMapping createFieldMapping(Configuration conf, String path, StringBuilder sb)
        throws IOException {
    try {//from  w w w. j  a v a2 s .co  m
        FieldMapping fm = null;
        BufferedReader br = null;
        Path hadoopPath = new Path(path);
        FileSystem fs = FileSystem.get(conf);
        if (fs.exists(hadoopPath)) {
            br = new BufferedReader(new InputStreamReader(fs.open(hadoopPath)));
            String line = null;
            while ((line = br.readLine()) != null) {
                sb.append(line);
            }
            fm = new FieldMapping();
            fm.parseXMLString(sb.toString());
        } else {
            fm = FieldMapping.getInstance(path);
        }
        IdSearchLog.l.debug("Field mapping instance create for " + path);
        return fm;

    } catch (FileNotFoundException fex) {
        System.err.println("Cannot read from path " + path);
        throw new IOException(fex);
    } catch (ParseException pex) {
        System.err.println("Cannot Parse File " + path);
        throw new IOException(pex);
    } catch (Exception pex) {
        System.err.println("Error : " + path);
        throw new IOException(pex);
    }
}

From source file:com.bizosys.unstructured.StopwordAndSynonymAnalyzer.java

License:Apache License

public void load() throws IOException {

    InputStream stopwordStream = null;
    InputStream synonumStream = null;

    Configuration hsearchConf = HSearchConfig.getInstance().getConfiguration();
    String filenameSynonum = hsearchConf.get("synonyms.file.location", "synonyms.txt");
    String filenameStopword = hsearchConf.get("stopword.file.location", "stopwords.txt");

    isLowerCaseEnabled = hsearchConf.getBoolean("lucene.analysis.lowercasefilter", true);
    isAccentFilterEnabled = hsearchConf.getBoolean("lucene.analysis.accentfilter", true);
    isSnoballStemEnabled = hsearchConf.getBoolean("lucene.analysis.snowballfilter", true);
    isStopFilterEnabled = hsearchConf.getBoolean("lucene.analysis.stopfilter", true);

    if (null != stopwords)
        return;/*from   ww w  .  j  a v  a  2  s . co m*/

    org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
    FileSystem fs = FileSystem.get(conf);

    if (null != fs) {

        /**
         * STOPWORD
         */
        Path stopPath = new Path(filenameStopword);
        if (fs.exists(stopPath)) {
            if (DEBUG_ENABLED)
                IdSearchLog.l.debug("Loading Stopword file from HDFS :" + stopPath.toString());
            stopwordStream = fs.open(stopPath);
        } else {
            IdSearchLog.l.fatal("Stopword file not available in HDFS :" + stopPath.toString());
        }

        /**
         * SYNONUM
         */

        Path synPath = new Path(filenameSynonum);
        if (fs.exists(synPath)) {
            synonumStream = fs.open(synPath);
            if (DEBUG_ENABLED)
                IdSearchLog.l.debug("Loading synonym file from HDFS :" + filenameSynonum.toString());
        } else {
            IdSearchLog.l.fatal("Synonym file not available in HDFS :" + filenameSynonum.toString());
            IdSearchLog.l.fatal("Working Directory :" + fs.getWorkingDirectory().getName());
        }
    }

    ClassLoader classLoader = null;

    if (null == stopwordStream || null == synonumStream) {
        classLoader = Thread.currentThread().getContextClassLoader();
    }

    if (null == stopwordStream) {
        URL stopUrl = classLoader.getResource(filenameStopword);
        if (null != stopUrl) {
            String stopFile = stopUrl.getPath();
            if (null != stopFile) {
                File stopwordFile = new File(stopFile);
                if (stopwordFile.exists() && stopwordFile.canRead()) {
                    stopwordStream = new FileInputStream(stopwordFile);
                    if (DEBUG_ENABLED)
                        IdSearchLog.l
                                .debug("Loading Stopword file from Local :" + stopwordFile.getAbsolutePath());
                } else {
                    IdSearchLog.l.fatal("Stopword file not available at :" + stopwordFile.getAbsolutePath());
                    IdSearchLog.l.fatal("Working Directory :" + fs.getHomeDirectory().getName());
                }
            } else {
                if (DEBUG_ENABLED)
                    IdSearchLog.l.debug("Ignoring Stopwords > " + filenameStopword);
            }
        }
    }

    if (null == synonumStream) {
        URL synUrl = classLoader.getResource(filenameSynonum);
        if (null != synUrl) {
            String synFileName = synUrl.getPath();
            if (null != synFileName) {
                File synFile = new File(synFileName);
                if (synFile.exists() && synFile.canRead()) {
                    synonumStream = new FileInputStream(synFile);
                    if (DEBUG_ENABLED)
                        IdSearchLog.l.debug("Loading Synonum file from Local :" + synFile.getAbsolutePath());
                } else {
                    if (DEBUG_ENABLED)
                        IdSearchLog.l.debug("Synonum file not available at :" + synFile.getAbsolutePath());
                }
            } else {
                if (DEBUG_ENABLED)
                    IdSearchLog.l.debug("Ignoring Synonyms > " + filenameSynonum);
            }
        }
    }

    load(stopwordStream, synonumStream);
}

From source file:com.blackberry.logdriver.util.QueryIndex.java

License:Apache License

public static void main(String[] args) throws IOException, JSONException, ParseException {

    Map<String, Map<String, Map<String, Map<String, Component>>>> data = new HashMap<String, Map<String, Map<String, Map<String, Component>>>>();
    boolean printComponents = false;
    boolean printDates = false;
    boolean printSizes = false;
    boolean printIngest = false;
    boolean greppable = false;
    boolean datesSet = false;
    Date startDate = new Date(0);
    Date endDate = new Date();
    boolean printIngestOverTime = false;

    String[] pathArgs = new String[] { ".*", ".*", ".*", ".*" };
    int pathLevel = 0;

    inputFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
    outputFormat.setTimeZone(TimeZone.getTimeZone("UTC"));

    // Set up HDFS filesystem
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    // Get path of latest index
    Path indexPath = getLatestIndex(fs);
    if (indexPath != null) {
        BufferedReader indexFile = new BufferedReader(new InputStreamReader(fs.open(indexPath)));
        String thisLine = indexFile.readLine();
        readJSONIntoMap(data, thisLine);
    } else {/*from w w  w  .  j  a  v  a2s . co  m*/
        System.out.println("No valid index files found.");
        System.exit(0);
    }

    for (int i = 0; i < args.length; i++) {
        if (args[i].matches("^-.*")) {
            // This is a command line switch
            if (args[i].matches("-p")) {
                printComponents = true;
            } else if (args[i].matches("-d")) {
                printDates = true;
            } else if (args[i].matches("-s")) {
                printSizes = true;
            } else if (args[i].matches("-i")) {
                printIngest = true;
            } else if (args[i].matches("-l")) {
                greppable = true;
            } else if (args[i].matches("-t")) {
                try {
                    startDate = LogStats.roundDownToHour(new Date((Long.parseLong(args[i + 1]) * 1000)));
                    endDate = LogStats.roundUpToHour(new Date((Long.parseLong(args[i + 2]) * 1000)));
                    datesSet = true;
                } catch (ParseException e) {
                    System.out.println("Can't parse start and end dates.");
                    System.exit(0);
                }
                if (endDate.before(startDate)) {
                    System.out.println("Can't plot over a negative time range.");
                    System.exit(0);
                }
                i += 2;
            } else if (args[i].matches("-a")) {
                printIngestOverTime = true;
            } else {
                System.out.println("Invalid switch " + args[i]);
                System.exit(0);
            }
        } else {
            try {
                pathArgs[pathLevel] = args[i].substring(1, args[i].length() - 1);
                if (pathArgs[pathLevel].matches("[*]")) {
                    pathArgs[pathLevel] = ".*";
                }
                pathLevel++;
            } catch (ArrayIndexOutOfBoundsException e) {
                System.out.println("Error: Too many path arguments.");
                System.exit(0);
            }
        }
    }

    if (printIngestOverTime && (!datesSet || !printComponents)) {
        System.out.println("-stats set without -p or -t, aborting.");
        System.exit(0);
    }

    Set<Component> components = matchedComponents(data, pathArgs[0], pathArgs[1], pathArgs[2], pathArgs[3],
            startDate, endDate);
    if (components.size() > 0) {
        if (printComponents) {
            printComponents(fs, components, printDates, printSizes, printIngest, greppable, startDate, endDate,
                    printIngestOverTime);
        }
        double totalIngestDays = ((getLatestDate(components).getTime() - getEarliestDate(components).getTime())
                / oneDay) + 1;
        double totalIngestRate = totalSize(components) / totalIngestDays;
        System.out.println("\nTotals for all matched components:\n\n" + "  Total Size:    "
                + formatByteSize(totalSize(components)) + "\n" + "  Ingest Rate:   "
                + formatByteSize(totalIngestRate) + "/day\n" + "  Earliest Date: "
                + outputFormat.format(getEarliestDate(components)) + "\n" + "  Latest Date:   "
                + outputFormat.format(getLatestDate(components)) + "\n" + "  Total Time:    "
                + (int) totalIngestDays + " days\n");
    } else {
        System.out.println("\nNo components found.\n");
    }
}

From source file:com.blackberry.logtools.LogTools.java

License:Apache License

public void runPigRemote(Map<String, String> params, String out, String tmp, boolean quiet, boolean silent,
        Configuration conf, String queue_name, String additional_jars, File pig_tmp,
        ArrayList<String> D_options, String PIG_DIR, FileSystem fs) {
    //Set input parameter for pig job - calling Pig directly
    params.put("tmpdir", StringEscapeUtils.escapeJava(tmp));

    //Check for an out of '-', meaning write to stdout
    String pigout;//w  ww.j  a  va  2  s.  c  o m
    if (out.equals("-")) {
        params.put("out", tmp + "/final");
        pigout = tmp + "/final";
    } else {
        params.put("out", StringEscapeUtils.escapeJava(out));
        pigout = StringEscapeUtils.escapeJava(out);
    }

    try {
        logConsole(quiet, silent, info, "Running PIG Command");
        conf.set("mapred.job.queue.name", queue_name);
        conf.set("pig.additional.jars", additional_jars);
        conf.set("pig.exec.reducers.bytes.per.reducer", Integer.toString(100 * 1000 * 1000));
        conf.set("pig.logfile", pig_tmp.toString());
        conf.set("hadoopversion", "23");
        //PIG temp directory set to be able to delete all temp files/directories
        conf.set("pig.temp.dir", tmp);

        //Setting output separator for logdriver
        String DEFAULT_OUTPUT_SEPARATOR = "\t";
        Charset UTF_8 = Charset.forName("UTF-8");
        String outputSeparator = conf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            logConsole(true, true, error, "The output separator must be a single byte in UTF-8.");
            System.exit(1);
        }
        conf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));

        dOpts(D_options, silent, out, conf);

        PigServer pigServer = new PigServer(ExecType.MAPREDUCE, conf);
        pigServer.registerScript(PIG_DIR + "/formatAndSort.pg", params);
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(1);
    }

    logConsole(quiet, silent, warn, "PIG Job Completed.");
    if (out.equals("-")) {
        System.out.println(";#################### DATA RESULTS ####################");
        try {
            //Create filter to find files with the results from PIG job
            PathFilter filter = new PathFilter() {
                public boolean accept(Path file) {
                    return file.getName().contains("part-");
                }
            };

            //Find the files in the directory, open and printout results
            FileStatus[] status = fs.listStatus(new Path(tmp + "/final"), filter);
            for (int i = 0; i < status.length; i++) {
                BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(status[i].getPath())));
                String line;
                line = br.readLine();
                while (line != null) {
                    System.out.println(line);
                    line = br.readLine();
                }
            }
            System.out.println(";#################### END OF RESULTS ####################");
        } catch (IOException e) {
            e.printStackTrace();
            System.exit(1);
        }
    } else {
        System.out.println(
                ";#################### Done. Search results are in " + pigout + " ####################");
    }
}

From source file:com.blm.orc.OrcRawRecordMerger.java

License:Apache License

/**
 * Read the side file to get the last flush length.
 * @param fs the file system to use//from   w w w  .  j av  a 2  s . c  o m
 * @param deltaFile the path of the delta file
 * @return the maximum size of the file to use
 * @throws IOException
 */
private static long getLastFlushLength(FileSystem fs, Path deltaFile) throws IOException {
    Path lengths = OrcRecordUpdater.getSideFile(deltaFile);
    long result = Long.MAX_VALUE;
    try {
        FSDataInputStream stream = fs.open(lengths);
        result = -1;
        while (stream.available() > 0) {
            result = stream.readLong();
        }
        stream.close();
        return result;
    } catch (IOException ioe) {
        return result;
    }
}

From source file:com.blm.orc.ReaderImpl.java

License:Apache License

private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs, Path path, long maxFileLength)
        throws IOException {
    FSDataInputStream file = fs.open(path);

    // figure out the size of the file using the option or filesystem
    long size;// www .  j  av a 2  s.com
    if (maxFileLength == Long.MAX_VALUE) {
        size = fs.getFileStatus(path).getLen();
    } else {
        size = maxFileLength;
    }

    //read last bytes into buffer to get PostScript
    int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS);
    file.seek(size - readSize);
    ByteBuffer buffer = ByteBuffer.allocate(readSize);
    file.readFully(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining());

    //read the PostScript
    //get length of PostScript
    int psLen = buffer.get(readSize - 1) & 0xff;
    ensureOrcFooter(file, path, psLen, buffer);
    int psOffset = readSize - 1 - psLen;
    CodedInputStream in = CodedInputStream.newInstance(buffer.array(), buffer.arrayOffset() + psOffset, psLen);
    OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in);

    checkOrcVersion(LOG, path, ps.getVersionList());

    int footerSize = (int) ps.getFooterLength();
    int metadataSize = (int) ps.getMetadataLength();
    OrcFile.WriterVersion writerVersion;
    if (ps.hasWriterVersion()) {
        writerVersion = getWriterVersion(ps.getWriterVersion());
    } else {
        writerVersion = OrcFile.WriterVersion.ORIGINAL;
    }

    //check compression codec
    switch (ps.getCompression()) {
    case NONE:
        break;
    case ZLIB:
        break;
    case SNAPPY:
        break;
    case LZO:
        break;
    default:
        throw new IllegalArgumentException("Unknown compression");
    }

    //check if extra bytes need to be read
    int extra = Math.max(0, psLen + 1 + footerSize + metadataSize - readSize);
    if (extra > 0) {
        //more bytes need to be read, seek back to the right place and read extra bytes
        file.seek(size - readSize - extra);
        ByteBuffer extraBuf = ByteBuffer.allocate(extra + readSize);
        file.readFully(extraBuf.array(), extraBuf.arrayOffset() + extraBuf.position(), extra);
        extraBuf.position(extra);
        //append with already read bytes
        extraBuf.put(buffer);
        buffer = extraBuf;
        buffer.position(0);
        buffer.limit(footerSize + metadataSize);
    } else {
        //footer is already in the bytes in buffer, just adjust position, length
        buffer.position(psOffset - footerSize - metadataSize);
        buffer.limit(psOffset);
    }

    // remember position for later
    buffer.mark();

    file.close();

    return new FileMetaInfo(ps.getCompression().toString(), (int) ps.getCompressionBlockSize(),
            (int) ps.getMetadataLength(), buffer, ps.getVersionList(), writerVersion);
}

From source file:com.blm.orc.RecordReaderImpl.java

License:Apache License

RecordReaderImpl(List<StripeInformation> stripes, FileSystem fileSystem, Path path, Reader.Options options,
        List<OrcProto.Type> types, CompressionCodec codec, int bufferSize, long strideRate, Configuration conf)
        throws IOException {
    this.file = fileSystem.open(path);
    this.codec = codec;
    this.types = types;
    this.bufferSize = bufferSize;
    this.included = options.getInclude();
    this.conf = conf;
    this.sarg = options.getSearchArgument();
    if (sarg != null) {
        sargLeaves = sarg.getLeaves();/*from w  ww . ja  v a  2 s  .c o  m*/
        filterColumns = mapSargColumns(sargLeaves, options.getColumnNames(), 0);
    } else {
        sargLeaves = null;
        filterColumns = null;
    }
    long rows = 0;
    long skippedRows = 0;
    long offset = options.getOffset();
    long maxOffset = options.getMaxOffset();
    for (StripeInformation stripe : stripes) {
        long stripeStart = stripe.getOffset();
        if (offset > stripeStart) {
            skippedRows += stripe.getNumberOfRows();
        } else if (stripeStart < maxOffset) {
            this.stripes.add(stripe);
            rows += stripe.getNumberOfRows();
        }
    }

    final boolean zeroCopy = (conf != null) && (HiveConf.getBoolVar(conf, HIVE_ORC_ZEROCOPY));

    if (zeroCopy && (codec == null || ((codec instanceof DirectDecompressionCodec)
            && ((DirectDecompressionCodec) codec).isAvailable()))) {
        /* codec is null or is available */
        this.zcr = ShimLoader.getHadoopShims().getZeroCopyReader(file, pool);
    } else {
        this.zcr = null;
    }

    firstRow = skippedRows;
    totalRowCount = rows;
    reader = createTreeReader(path, 0, types, included, conf);
    indexes = new OrcProto.RowIndex[types.size()];
    rowIndexStride = strideRate;
    advanceToNextRow(0L);
}

From source file:com.bluedata.flume.sinks.AvroEventSerializer.java

License:Apache License

private Schema loadFromUrl(String schemaUrl) throws IOException {
    Configuration conf = new Configuration();
    Schema.Parser parser = new Schema.Parser();
    if (schemaUrl.toLowerCase(Locale.ENGLISH).startsWith("hdfs://")
            || schemaUrl.toLowerCase(Locale.ENGLISH).startsWith("dtap://")) {
        FileSystem fs = FileSystem.get(conf);
        FSDataInputStream input = null;//from w w w  .ja  v a  2 s.  c o  m
        try {
            input = fs.open(new Path(schemaUrl));
            return parser.parse(input);
        } finally {
            if (input != null) {
                input.close();
            }
        }
    } else {
        InputStream is = null;
        try {
            is = new URL(schemaUrl).openStream();
            return parser.parse(is);
        } finally {
            if (is != null) {
                is.close();
            }
        }
    }
}

From source file:com.bonc.mr_roamRecognition_hjpt.comm.PathRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();//from   w ww .  j  a  v  a 2  s .c om
    end = start + split.getLength();
    final Path file = split.getPath();

    path = split.getPath().toString();

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);

    CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
    if (null != codec) {
        isCompressedInput = true;
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job,
                    this.recordDelimiterBytes);
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}