Example usage for org.apache.hadoop.io Text decode

List of usage examples for org.apache.hadoop.io Text decode

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text decode.

Prototype

public static String decode(byte[] utf8, int start, int length) throws CharacterCodingException 

Source Link

Usage

From source file:mvm.rya.indexing.accumulo.temporal.AccumuloTemporalIndexer.java

License:Apache License

/**
  * An iteration wrapper for a loaded scanner that is returned for each query above.
  */*from  w  w  w  .  j av a  2  s .c  om*/
  * @param scanner
  *            the results to iterate, then close.
  * @return an anonymous object that will iterate the resulting statements from a given scanner.
  */
private static CloseableIteration<Statement, QueryEvaluationException> getIteratorWrapper(
        final ScannerBase scanner) {

    final Iterator<Entry<Key, Value>> i = scanner.iterator();

    return new CloseableIteration<Statement, QueryEvaluationException>() {
        @Override
        public boolean hasNext() {
            return i.hasNext();
        }

        @Override
        public Statement next() throws QueryEvaluationException {
            Entry<Key, Value> entry = i.next();
            Value v = entry.getValue();
            try {
                String dataString = Text.decode(v.get(), 0, v.getSize());
                Statement s = StatementSerializer.readStatement(dataString);
                return s;
            } catch (CharacterCodingException e) {
                logger.error("Error decoding value=" + Arrays.toString(v.get()), e);
                throw new QueryEvaluationException(e);
            } catch (IOException e) {
                logger.error("Error de-serializing statement, string=" + v.get(), e);
                throw new QueryEvaluationException(e);
            }
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException("Remove not implemented");
        }

        @Override
        public void close() throws QueryEvaluationException {
            scanner.close();
        }
    };
}

From source file:org.apache.mahout.utils.nlp.collocations.llr.Gram.java

License:Apache License

/**
 * @return gram term string/* www  . ja v  a  2 s.c om*/
 */
public String getString() {
    try {
        return Text.decode(bytes, 1, length - 1);
    } catch (CharacterCodingException e) {
        throw new IllegalStateException("Should not have happened " + e.toString());
    }
}

From source file:org.apache.orc.impl.ReaderImpl.java

License:Apache License

/**
 * Ensure this is an ORC file to prevent users from trying to read text
 * files or RC files as ORC files.//from w ww. jav  a2  s . com
 * @param in the file being read
 * @param path the filename for error messages
 * @param psLen the postscript length
 * @param buffer the tail of the file
 * @throws IOException
 */
protected static void ensureOrcFooter(FSDataInputStream in, Path path, int psLen, ByteBuffer buffer)
        throws IOException {
    int magicLength = OrcFile.MAGIC.length();
    int fullLength = magicLength + 1;
    if (psLen < fullLength || buffer.remaining() < fullLength) {
        throw new FileFormatException("Malformed ORC file " + path + ". Invalid postscript length " + psLen);
    }
    int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - fullLength;
    byte[] array = buffer.array();
    // now look for the magic string at the end of the postscript.
    if (!Text.decode(array, offset, magicLength).equals(OrcFile.MAGIC)) {
        // If it isn't there, this may be the 0.11.0 version of ORC.
        // Read the first 3 bytes of the file to check for the header
        byte[] header = new byte[magicLength];
        in.readFully(0, header, 0, magicLength);
        // if it isn't there, this isn't an ORC file
        if (!Text.decode(header, 0, magicLength).equals(OrcFile.MAGIC)) {
            throw new FileFormatException("Malformed ORC file " + path + ". Invalid postscript.");
        }
    }
}

From source file:org.apache.orc.impl.ReaderImpl.java

License:Apache License

/**
 * Ensure this is an ORC file to prevent users from trying to read text
 * files or RC files as ORC files.//from w w w  . j av a  2s . c o m
 * @param psLen the postscript length
 * @param buffer the tail of the file
 * @throws IOException
 */
protected static void ensureOrcFooter(ByteBuffer buffer, int psLen) throws IOException {
    int magicLength = OrcFile.MAGIC.length();
    int fullLength = magicLength + 1;
    if (psLen < fullLength || buffer.remaining() < fullLength) {
        throw new FileFormatException("Malformed ORC file. Invalid postscript length " + psLen);
    }

    int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - fullLength;
    byte[] array = buffer.array();
    // now look for the magic string at the end of the postscript.
    if (!Text.decode(array, offset, magicLength).equals(OrcFile.MAGIC)) {
        // if it isn't there, this may be 0.11.0 version of the ORC file.
        // Read the first 3 bytes from the buffer to check for the header
        if (!Text.decode(buffer.array(), 0, magicLength).equals(OrcFile.MAGIC)) {
            throw new FileFormatException("Malformed ORC file. Invalid postscript length " + psLen);
        }
    }
}

From source file:org.apache.rya.indexing.accumulo.freetext.AccumuloFreeTextIndexer.java

License:Apache License

private static CloseableIteration<Statement, QueryEvaluationException> getIteratorWrapper(final Scanner s) {

    final Iterator<Entry<Key, Value>> i = s.iterator();

    return new CloseableIteration<Statement, QueryEvaluationException>() {
        @Override/* w w w  .ja va2s.c  o m*/
        public boolean hasNext() {
            return i.hasNext();
        }

        @Override
        public Statement next() throws QueryEvaluationException {
            final Entry<Key, Value> entry = i.next();
            final Value v = entry.getValue();
            try {
                final String dataString = Text.decode(v.get(), 0, v.getSize());
                final Statement s = StatementSerializer.readStatement(dataString);
                return s;
            } catch (final CharacterCodingException e) {
                logger.error("Error decoding value", e);
                throw new QueryEvaluationException(e);
            } catch (final IOException e) {
                logger.error("Error deserializing statement", e);
                throw new QueryEvaluationException(e);
            }
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException("Remove not implemented");
        }

        @Override
        public void close() throws QueryEvaluationException {
            if (s != null) {
                s.close();
            }
        }
    };
}

From source file:org.apache.rya.indexing.accumulo.temporal.AccumuloTemporalIndexer.java

License:Apache License

/**
 * An iteration wrapper for a loaded scanner that is returned for each query above.
 *
 * @param scanner//from   www  .ja  v a2  s.  c  om
 *            the results to iterate, then close.
 * @return an anonymous object that will iterate the resulting statements from a given scanner.
 */
private static CloseableIteration<Statement, QueryEvaluationException> getIteratorWrapper(
        final ScannerBase scanner) {

    final Iterator<Entry<Key, Value>> i = scanner.iterator();

    return new CloseableIteration<Statement, QueryEvaluationException>() {
        @Override
        public boolean hasNext() {
            return i.hasNext();
        }

        @Override
        public Statement next() throws QueryEvaluationException {
            final Entry<Key, Value> entry = i.next();
            final Value v = entry.getValue();
            try {
                final String dataString = Text.decode(v.get(), 0, v.getSize());
                final Statement s = StatementSerializer.readStatement(dataString);
                return s;
            } catch (final CharacterCodingException e) {
                logger.error("Error decoding value=" + Arrays.toString(v.get()), e);
                throw new QueryEvaluationException(e);
            } catch (final IOException e) {
                logger.error("Error de-serializing statement, string=" + v.get(), e);
                throw new QueryEvaluationException(e);
            }
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException("Remove not implemented");
        }

        @Override
        public void close() throws QueryEvaluationException {
            scanner.close();
        }
    };
}

From source file:org.terrier.structures.CompressingMetaIndex.java

License:Mozilla Public License

/** {@inheritDoc} */
public String getItem(String Key, int docid) throws IOException {
    Inflater unzip = inflaterCache.get();
    unzip.reset();//from ww  w  .jav  a  2s  .  c o  m
    unzip.setInput(dataSource.read(offsetLookup.getOffset(docid), offsetLookup.getLength(docid)));

    byte[] bOut = new byte[recordLength];
    try {
        unzip.inflate(bOut);
    } catch (DataFormatException dfe) {
        logger.error(dfe);
    }
    return Text.decode(bOut, key2byteoffset.get(Key), key2bytelength.get(Key)).trim();
}

From source file:org.terrier.structures.CompressingMetaIndex.java

License:Mozilla Public License

/** {@inheritDoc} */
public String[] getItems(String[] Keys, int docid) throws IOException {
    Inflater unzip = inflaterCache.get();
    unzip.reset();/*w  w  w.jav  a  2  s  .  c o  m*/
    unzip.setInput(dataSource.read(offsetLookup.getOffset(docid), offsetLookup.getLength(docid)));
    byte[] bOut = new byte[recordLength];
    try {
        unzip.inflate(bOut);
    } catch (DataFormatException dfe) {
        logger.error(dfe);
    }
    final int kCount = Keys.length;
    String[] sOut = new String[kCount];
    for (int i = 0; i < kCount; i++) {
        sOut[i] = Text.decode(bOut, key2byteoffset.get(Keys[i]), key2bytelength.get(Keys[i])).trim();
    }
    return sOut;
}

From source file:org.terrier.structures.CompressingMetaIndex.java

License:Mozilla Public License

/** {@inheritDoc} */
public String[] getAllItems(int docid) throws IOException {
    Inflater unzip = inflaterCache.get();
    unzip.reset();//from w  w  w .  j  av a 2s .  co m
    unzip.setInput(dataSource.read(offsetLookup.getOffset(docid), offsetLookup.getLength(docid)));
    //unzip.setInput(
    //      dataSource.read(docid2offsets[docid],
    //            (docid+1)==docid2offsets.length ? (int)(fileLength-docid2offsets[docid])
    //                                            : (int)(docid2offsets[docid+1] - docid2offsets[docid])));
    byte[] bOut = new byte[recordLength];
    try {
        unzip.inflate(bOut);
    } catch (DataFormatException dfe) {
        logger.error(dfe);
    }
    final int kCount = this.keyCount;
    String[] sOut = new String[kCount];

    for (int i = 0; i < kCount; i++) {
        sOut[i] = Text.decode(bOut, valueByteOffsets[i], valueByteLengths[i]).trim();
    }
    return sOut;
}

From source file:tests.it.crs4.seal.common.TestTextSamMapping.java

License:Open Source License

@Test
public void testFields() throws java.nio.charset.CharacterCodingException {
    TextSamMapping map = new TextSamMapping(new Text(sam));

    assertEquals("ERR020229.100000/1", map.getName());
    assertEquals(89, map.getFlag());/*from  www .  j a  v  a 2  s. c  o  m*/
    assertEquals("chr6", map.getContig());
    assertEquals(3558357, map.get5Position());
    assertEquals(37, map.getMapQ());
    assertEquals("91M", map.getCigarStr());
    assertEquals(91, map.getLength());

    ByteBuffer buf = map.getSequence();
    String s = Text.decode(buf.array(), buf.position(), map.getLength());
    assertEquals("AGCTTCTTTGACTCTCGAATTTTAGCACTAGAAGAAATAGTGAGGATTATATATTTCAGAAGTTCTCACCCAGGATATCAGAACACATTCA",
            s);

    buf = map.getBaseQualities();
    s = Text.decode(buf.array(), buf.position(), map.getLength());
    assertEquals("5:CB:CCBCCB>:C@;BBBB??B;?>1@@=C=4ACCAB3A8=CC=C?CBC=CBCCCCCCCCCCCCC@5>?=?CAAB=3=>====5>=AC?C",
            s);
}