Example usage for org.apache.lucene.util IOUtils UTF_8

List of usage examples for org.apache.lucene.util IOUtils UTF_8

Introduction

In this page you can find the example usage for org.apache.lucene.util IOUtils UTF_8.

Prototype

String UTF_8

To view the source code for org.apache.lucene.util IOUtils UTF_8.

Click Source Link

Document

UTF-8 charset string.

Usage

From source file:com.github.cstoku.neologd.unidic.lucene.analysis.ja.JapaneseTokenizerFactory.java

License:Apache License

@Override
public void inform(ResourceLoader loader) throws IOException {
    if (userDictionaryPath != null) {
        InputStream stream = loader.openResource(userDictionaryPath);
        String encoding = userDictionaryEncoding;
        if (encoding == null) {
            encoding = IOUtils.UTF_8;
        }// w w  w  . java  2 s . c  o m
        CharsetDecoder decoder = Charset.forName(encoding).newDecoder()
                .onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
        Reader reader = new InputStreamReader(stream, decoder);
        userDictionary = UserDictionary.open(reader);
    } else {
        userDictionary = null;
    }
}

From source file:com.o19s.es.ltr.feature.store.index.IndexFeatureStore.java

License:Apache License

private static String readResourceFile(String indexName, String resource) {
    try (InputStream is = IndexFeatureStore.class.getResourceAsStream(resource)) {
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        Streams.copy(is, out);//  w  w  w . j a va2s  . c  o m
        return out.toString(IOUtils.UTF_8);
    } catch (Exception e) {
        LOGGER.error((org.apache.logging.log4j.util.Supplier<?>) () -> new ParameterizedMessage(
                "failed to create ltr feature store index [{}] with resource [{}]", indexName, resource), e);
        throw new IllegalStateException(
                "failed to create ltr feature store index with resource [" + resource + "]", e);
    }
}

From source file:com.o19s.es.ltr.ranker.parser.XGBoostJsonParserTests.java

License:Apache License

private String readModel(String model) throws IOException {
    try (InputStream is = this.getClass().getResourceAsStream(model)) {
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        Streams.copy(is, bos);//  w  w  w .  j  a  va  2 s  .  c o m
        return bos.toString(IOUtils.UTF_8);
    }
}

From source file:com.rocana.lucene.codec.v1.RocanaStats.java

License:Apache License

@Override
public String toString() {
    final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
    PrintStream out;//from   w  w w  .j a v a  2  s  .c  om
    try {
        out = new PrintStream(bos, false, IOUtils.UTF_8);
    } catch (UnsupportedEncodingException bogus) {
        throw new RuntimeException(bogus);
    }

    out.println("  index FST:");
    out.println("    " + indexNumBytes + " bytes");
    out.println("  terms:");
    out.println("    " + totalTermCount + " terms");
    out.println("    " + totalTermBytes + " bytes"
            + (totalTermCount != 0
                    ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalTermBytes) / totalTermCount)
                            + " bytes/term)"
                    : ""));
    out.println("  blocks:");
    out.println("    " + totalBlockCount + " blocks");
    out.println("    " + termsOnlyBlockCount + " terms-only blocks");
    out.println("    " + subBlocksOnlyBlockCount + " sub-block-only blocks");
    out.println("    " + mixedBlockCount + " mixed blocks");
    out.println("    " + floorBlockCount + " floor blocks");
    out.println("    " + (totalBlockCount - floorSubBlockCount) + " non-floor blocks");
    out.println("    " + floorSubBlockCount + " floor sub-blocks");
    out.println(
            "    " + totalBlockSuffixBytes + " term suffix bytes"
                    + (totalBlockCount != 0
                            ? " (" + String.format(Locale.ROOT, "%.1f",
                                    ((double) totalBlockSuffixBytes) / totalBlockCount) + " suffix-bytes/block)"
                            : ""));
    out.println(
            "    " + totalBlockStatsBytes + " term stats bytes"
                    + (totalBlockCount != 0
                            ? " (" + String.format(Locale.ROOT, "%.1f",
                                    ((double) totalBlockStatsBytes) / totalBlockCount) + " stats-bytes/block)"
                            : ""));
    out.println(
            "    " + totalBlockOtherBytes + " other bytes"
                    + (totalBlockCount != 0
                            ? " (" + String.format(Locale.ROOT, "%.1f",
                                    ((double) totalBlockOtherBytes) / totalBlockCount) + " other-bytes/block)"
                            : ""));
    if (totalBlockCount != 0) {
        out.println("    by prefix length:");
        int total = 0;
        for (int prefix = 0; prefix < blockCountByPrefixLen.length; prefix++) {
            final int blockCount = blockCountByPrefixLen[prefix];
            total += blockCount;
            if (blockCount != 0) {
                out.println("      " + String.format(Locale.ROOT, "%2d", prefix) + ": " + blockCount);
            }
        }
        assert totalBlockCount == total;
    }

    try {
        return bos.toString(IOUtils.UTF_8);
    } catch (UnsupportedEncodingException bogus) {
        throw new RuntimeException(bogus);
    }
}

From source file:io.anserini.integration.EndToEndTest.java

License:Apache License

protected void checkIndex() throws IOException {
    ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
    Directory dir = FSDirectory.open(Paths.get(this.indexOutputPrefix + this.collectionClass));
    CheckIndex checker = new CheckIndex(dir);
    checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
    if (VERBOSE)/* w w w .j a v  a  2s . c  o m*/
        checker.setInfoStream(System.out);
    CheckIndex.Status indexStatus = checker.checkIndex();
    if (!indexStatus.clean) {
        System.out.println("CheckIndex failed");
        System.out.println(bos.toString(IOUtils.UTF_8));
        fail();
    }

    final CheckIndex.Status.SegmentInfoStatus seg = indexStatus.segmentInfos.get(0);
    assertTrue(seg.openReaderPassed);

    assertNotNull(seg.diagnostics);

    assertNotNull(seg.fieldNormStatus);
    assertNull(seg.fieldNormStatus.error);
    assertEquals(this.fieldNormStatusTotalFields, seg.fieldNormStatus.totFields);

    assertNotNull(seg.termIndexStatus);
    assertNull(seg.termIndexStatus.error);
    assertEquals(this.termIndexStatusTermCount, seg.termIndexStatus.termCount);
    assertEquals(this.termIndexStatusTotFreq, seg.termIndexStatus.totFreq);
    assertEquals(this.termIndexStatusTotPos, seg.termIndexStatus.totPos);

    assertNotNull(seg.storedFieldStatus);
    assertNull(seg.storedFieldStatus.error);
    assertEquals(this.storedFieldStatusTotalDocCounts, seg.storedFieldStatus.docCount);
    assertEquals(this.storedFieldStatusTotFields, seg.storedFieldStatus.totFields);

    assertTrue(seg.diagnostics.size() > 0);
    final List<String> onlySegments = new ArrayList<>();
    onlySegments.add("_0");

    assertTrue(checker.checkIndex(onlySegments).clean);
    checker.close();
}

From source file:org.apache.solr.analysis.JapaneseTokenizerFactory.java

License:Apache License

@Override
public void inform(ResourceLoader loader) {
    mode = getMode(args);/*w  w w  .  j a v  a2 s.co  m*/
    String userDictionaryPath = args.get(USER_DICT_PATH);
    try {
        if (userDictionaryPath != null) {
            InputStream stream = loader.openResource(userDictionaryPath);
            String encoding = args.get(USER_DICT_ENCODING);
            if (encoding == null) {
                encoding = IOUtils.UTF_8;
            }
            CharsetDecoder decoder = Charset.forName(encoding).newDecoder()
                    .onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
            Reader reader = new InputStreamReader(stream, decoder);
            userDictionary = new UserDictionary(reader);
        } else {
            userDictionary = null;
        }
    } catch (Exception e) {
        throw new InitializationException("Exception thrown while loading dictionary", e);
    }
}

From source file:org.apache.solr.uninverting.TestLegacyFieldCache.java

License:Apache License

public void testInfoStream() throws Exception {
    try {/*ww  w . ja v  a 2 s.  c  o m*/
        FieldCache cache = FieldCache.DEFAULT;
        ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
        cache.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
        cache.getNumerics(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER);
        cache.getNumerics(reader, "theDouble", new FieldCache.Parser() {
            @Override
            public TermsEnum termsEnum(Terms terms) throws IOException {
                return LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator());
            }

            @Override
            public long parseValue(BytesRef term) {
                int val = (int) LegacyNumericUtils.prefixCodedToLong(term);
                if (val < 0)
                    val ^= 0x7fffffff;
                return val;
            }
        });
        assertTrue(bos.toString(IOUtils.UTF_8).indexOf("WARNING") != -1);
    } finally {
        FieldCache.DEFAULT.setInfoStream(null);
        FieldCache.DEFAULT.purgeAllCaches();
    }
}

From source file:org.apache.solr.util.CommandOperation.java

License:Apache License

@Override
public String toString() {
    try {/*www .j  a  v a  2 s.c  o  m*/
        return new String(ZkStateReader.toJSON(singletonMap(name, commandData)), IOUtils.UTF_8);
    } catch (UnsupportedEncodingException e) {
        //should not happen
        return "";
    }
}

From source file:org.elasticsearch.tasks.TaskPersistenceService.java

License:Apache License

public String taskResultIndexMapping() {
    try (InputStream is = getClass().getResourceAsStream(TASK_RESULT_INDEX_MAPPING_FILE)) {
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        Streams.copy(is, out);//from  w  w w  .  j a  v  a2s .  c o m
        return out.toString(IOUtils.UTF_8);
    } catch (Exception e) {
        logger.error("failed to create tasks results index template [{}]", e, TASK_RESULT_INDEX_MAPPING_FILE);
        throw new IllegalStateException(
                "failed to create tasks results index template [" + TASK_RESULT_INDEX_MAPPING_FILE + "]", e);
    }

}

From source file:org.elasticsearch.tasks.TaskResultsService.java

License:Apache License

public String taskResultIndexMapping() {
    try (InputStream is = getClass().getResourceAsStream(TASK_RESULT_INDEX_MAPPING_FILE)) {
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        Streams.copy(is, out);//from  w  w w  .  jav a  2  s .  c  o  m
        return out.toString(IOUtils.UTF_8);
    } catch (Exception e) {
        logger.error(
                (Supplier<?>) () -> new ParameterizedMessage(
                        "failed to create tasks results index template [{}]", TASK_RESULT_INDEX_MAPPING_FILE),
                e);
        throw new IllegalStateException(
                "failed to create tasks results index template [" + TASK_RESULT_INDEX_MAPPING_FILE + "]", e);
    }

}