Java tutorial
/* * Licensed to David Pilato (the "Author") under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. Author licenses this * file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package fr.pilato.elasticsearch.crawler.fs.test.integration; import fr.pilato.elasticsearch.crawler.fs.client.BulkProcessor; import fr.pilato.elasticsearch.crawler.fs.client.BulkRequest; import fr.pilato.elasticsearch.crawler.fs.client.BulkResponse; import fr.pilato.elasticsearch.crawler.fs.client.IndexRequest; import fr.pilato.elasticsearch.crawler.fs.client.SearchResponse; import fr.pilato.elasticsearch.crawler.fs.client.VersionComparator; import fr.pilato.elasticsearch.crawler.fs.meta.settings.TimeValue; import fr.pilato.elasticsearch.crawler.fs.util.FsCrawlerUtil; import org.apache.http.entity.StringEntity; import org.apache.logging.log4j.LogManager; import org.junit.Before; import org.junit.Test; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.concurrent.atomic.AtomicReference; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.hasEntry; import static org.hamcrest.Matchers.is; import static org.junit.Assert.fail; import static org.junit.Assume.assumeThat; /** * Test elasticsearch HTTP client */ public class ElasticsearchClientIT extends AbstractITCase { @Before public void cleanExistingIndex() throws IOException { logger.info(" -> Removing existing index [{}*]", getCrawlerName()); elasticsearchClient.deleteIndex(getCrawlerName() + "*"); } @Test public void testCreateIndex() throws IOException { elasticsearchClient.createIndex(getCrawlerName()); boolean exists = elasticsearchClient.isExistingIndex(getCrawlerName()); assertThat(exists, is(true)); } @Test public void testCreateIndexWithSettings() throws IOException { elasticsearchClient.createIndex(getCrawlerName(), false, "{\n" + " \"settings\": {\n" + " \"number_of_shards\": 1,\n" + " \"number_of_replicas\": 1\n" + " }\n" + "}"); boolean exists = elasticsearchClient.isExistingIndex(getCrawlerName()); assertThat(exists, is(true)); } @Test public void testRefresh() throws IOException { elasticsearchClient.createIndex(getCrawlerName()); refresh(); } @Test public void testCreateIndexAlreadyExists() throws IOException { elasticsearchClient.createIndex(getCrawlerName()); elasticsearchClient.waitForHealthyIndex(getCrawlerName()); try { elasticsearchClient.createIndex(getCrawlerName()); fail("we should reject creation of an already existing index"); } catch (RuntimeException e) { assertThat(e.getMessage(), is("index already exists")); } } @Test public void testIsExistingTypeWithNoIndex() throws IOException { boolean existingType = elasticsearchClient.isExistingType(getCrawlerName(), FsCrawlerUtil.INDEX_TYPE_DOC); assertThat(existingType, is(false)); } @Test public void testIsExistingTypeWithIndexNoType() throws IOException { elasticsearchClient.createIndex(getCrawlerName()); elasticsearchClient.waitForHealthyIndex(getCrawlerName()); boolean existingType = elasticsearchClient.isExistingType(getCrawlerName(), FsCrawlerUtil.INDEX_TYPE_DOC); assertThat(existingType, is(false)); } @Test public void testIsExistingTypeWithIndexAndType() throws IOException { elasticsearchClient.createIndex(getCrawlerName()); elasticsearchClient.waitForHealthyIndex(getCrawlerName()); elasticsearchClient.putMapping(getCrawlerName(), FsCrawlerUtil.INDEX_TYPE_DOC, "{\"doc\":{}}"); boolean existingType = elasticsearchClient.isExistingType(getCrawlerName(), FsCrawlerUtil.INDEX_TYPE_DOC); assertThat(existingType, is(true)); } @Test public void testSearch() throws IOException { elasticsearchClient.createIndex(getCrawlerName()); elasticsearchClient.waitForHealthyIndex(getCrawlerName()); // Depending on the version we are using, we need to adapt the test mapping String version = elasticsearchClient.findVersion(); String mapping; // With elasticsearch 5.0.0, we need to use `type: text` instead of `type: string` if (new VersionComparator().compare(version, "5") >= 0) { mapping = "{\n" + " \"doc\" : {\n" + " \"properties\" : {\n" + " \"foo\" : {\n" + " \"type\" : \"text\",\n" + " \"store\" : true\n" + " }\n" + " }\n" + " }\n" + " }\n" + " }"; } else { mapping = "{\n" + " \"doc\" : {\n" + " \"properties\" : {\n" + " \"foo\" : {\n" + " \"type\" : \"string\",\n" + " \"store\" : true\n" + " }\n" + " }\n" + " }\n" + " }\n" + " }"; } elasticsearchClient.putMapping(getCrawlerName(), FsCrawlerUtil.INDEX_TYPE_DOC, mapping); elasticsearchClient.index(getCrawlerName(), FsCrawlerUtil.INDEX_TYPE_DOC, "1", "{ \"foo\" : \"bar\" }"); elasticsearchClient.index(getCrawlerName(), FsCrawlerUtil.INDEX_TYPE_DOC, "2", "{ \"foo\" : \"baz\" }"); elasticsearchClient.refresh(getCrawlerName()); // match_all SearchResponse response = elasticsearchClient.search(getCrawlerName(), FsCrawlerUtil.INDEX_TYPE_DOC, (String) null); assertThat(response.getHits().getTotal(), is(2L)); // term response = elasticsearchClient.search(getCrawlerName(), FsCrawlerUtil.INDEX_TYPE_DOC, "foo:bar"); assertThat(response.getHits().getTotal(), is(1L)); // using fields response = elasticsearchClient.search(getCrawlerName(), FsCrawlerUtil.INDEX_TYPE_DOC, "foo:bar", 10, "_source"); assertThat(response.getHits().getTotal(), is(1L)); response = elasticsearchClient.search(getCrawlerName(), FsCrawlerUtil.INDEX_TYPE_DOC, "foo:bar", 10, "foo"); assertThat(response.getHits().getTotal(), is(1L)); assertThat(response.getHits().getHits().get(0).getFields(), hasEntry("foo", Collections.singletonList("bar"))); // match_all response = elasticsearchClient.searchJson(getCrawlerName(), FsCrawlerUtil.INDEX_TYPE_DOC, "{}"); assertThat(response.getHits().getTotal(), is(2L)); // match response = elasticsearchClient.searchJson(getCrawlerName(), FsCrawlerUtil.INDEX_TYPE_DOC, "{ \"query\" : { \"match\": { \"foo\" : \"bar\" } } }"); assertThat(response.getHits().getTotal(), is(1L)); } @Test public void testBulkWithTime() throws IOException, InterruptedException { // Create the index first elasticsearchClient.createIndex(getCrawlerName()); elasticsearchClient.waitForHealthyIndex(getCrawlerName()); BulkProcessor bulkProcessor = BulkProcessor.simpleBulkProcessor(elasticsearchClient, 100, TimeValue.timeValueSeconds(2), null); for (int i = 0; i < 10; i++) { bulkProcessor.add(new IndexRequest(getCrawlerName(), FsCrawlerUtil.INDEX_TYPE_DOC, "id" + i) .source("{\"foo\":\"bar\"}")); } elasticsearchClient.refresh(getCrawlerName()); waitForAllShardsAssigned(); // We wait for 3 seconds (2 should be enough) Thread.sleep(3000L); elasticsearchClient.refresh(getCrawlerName()); // We should have now our docs SearchResponse response = elasticsearchClient.search(getCrawlerName(), FsCrawlerUtil.INDEX_TYPE_DOC, (String) null); assertThat(response.getHits().getTotal(), is(10L)); bulkProcessor.close(); } @Test public void testBulkWithoutTime() throws IOException, InterruptedException { // Create the index first elasticsearchClient.createIndex(getCrawlerName()); elasticsearchClient.waitForHealthyIndex(getCrawlerName()); BulkProcessor bulkProcessor = BulkProcessor.simpleBulkProcessor(elasticsearchClient, 10, null, null); for (int i = 0; i < 9; i++) { bulkProcessor.add(new IndexRequest(getCrawlerName(), FsCrawlerUtil.INDEX_TYPE_DOC, "id" + i) .source("{\"foo\":\"bar\"}")); } elasticsearchClient.refresh(getCrawlerName()); waitForAllShardsAssigned(); bulkProcessor.add(new IndexRequest(getCrawlerName(), FsCrawlerUtil.INDEX_TYPE_DOC, "id" + 9) .source("{\"foo\":\"bar\"}")); elasticsearchClient.refresh(getCrawlerName()); // We should have now our docs SearchResponse response = elasticsearchClient.search(getCrawlerName(), FsCrawlerUtil.INDEX_TYPE_DOC, (String) null); assertThat(response.getHits().getTotal(), is(10L)); bulkProcessor.close(); } @Test public void testBulkWithPipeline() throws IOException, InterruptedException { // We can only run this test against a 5.0 cluster or > assumeThat("We skip the test as we are not running it with a 5.0 cluster or >", elasticsearchClient.isIngestSupported(), is(true)); // Create the index first elasticsearchClient.createIndex(getCrawlerName()); elasticsearchClient.waitForHealthyIndex(getCrawlerName()); // Create an empty ingest pipeline String pipeline = "{\n" + " \"description\" : \"describe pipeline\",\n" + " \"processors\" : [\n" + " {\n" + " \"set\" : {\n" + " \"field\": \"foo\",\n" + " \"value\": \"bar\"\n" + " }\n" + " }\n" + " ]\n" + "}"; StringEntity entity = new StringEntity(pipeline, StandardCharsets.UTF_8); elasticsearchClient.getClient().performRequest("PUT", "_ingest/pipeline/" + getCrawlerName(), Collections.emptyMap(), entity); BulkProcessor bulkProcessor = BulkProcessor.simpleBulkProcessor(elasticsearchClient, 100, TimeValue.timeValueSeconds(2), getCrawlerName()); for (int i = 0; i < 10; i++) { bulkProcessor.add(new IndexRequest(getCrawlerName(), FsCrawlerUtil.INDEX_TYPE_DOC, "id" + i) .source("{\"field\": \"baz\"}")); } elasticsearchClient.refresh(getCrawlerName()); waitForAllShardsAssigned(); // We wait for 3 seconds (2 should be enough) Thread.sleep(3000L); elasticsearchClient.refresh(getCrawlerName()); // We should have now our docs SearchResponse response = elasticsearchClient.search(getCrawlerName(), FsCrawlerUtil.INDEX_TYPE_DOC, "foo:bar"); assertThat(response.getHits().getTotal(), is(10L)); bulkProcessor.close(); } @Test public void testBulkWithErrors() throws IOException, InterruptedException { // Create the index first elasticsearchClient.createIndex(getCrawlerName()); elasticsearchClient.waitForHealthyIndex(getCrawlerName()); AtomicReference<BulkResponse> bulkResponse = new AtomicReference<>(); BulkProcessor bulkProcessor = new BulkProcessor.Builder(elasticsearchClient, new BulkProcessor.Listener() { @Override public void beforeBulk(long executionId, BulkRequest request) { } @Override public void afterBulk(long executionId, BulkRequest request, BulkResponse response) { bulkResponse.set(response); } @Override public void afterBulk(long executionId, BulkRequest request, Throwable failure) { } }).setBulkActions(100).setFlushInterval(TimeValue.timeValueMillis(200)).build(); bulkProcessor.add( new IndexRequest(getCrawlerName(), FsCrawlerUtil.INDEX_TYPE_DOC, "id").source("{\"foo\":\"bar\"")); bulkProcessor.close(); BulkResponse response = bulkResponse.get(); Throwable message = response.buildFailureMessage(); assertThat(message.getMessage(), containsString("1 failures")); // If we run the test with a TRACE level, we can check more things if (LogManager.getLogger(BulkResponse.class).isTraceEnabled()) { assertThat(message.getMessage(), containsString("failed to parse")); } } /** * If we search just a few ms after sending the requests, we won't have all data. * But in elasticsearch 1.x series that might fail with: * [2016-07-06 19:35:58,613][DEBUG][action.search.type ] [Mentus] All shards failed for phase: [query] * org.elasticsearch.index.IndexShardMissingException: [fscrawler_test_bulk_without_time][4] missing * at org.elasticsearch.index.IndexService.shardSafe(IndexService.java:210) * at org.elasticsearch.search.SearchService.createContext(SearchService.java:560) * at org.elasticsearch.search.SearchService.createAndPutContext(SearchService.java:544) * at org.elasticsearch.search.SearchService.executeQueryPhase(SearchService.java:306) * at org.elasticsearch.search.action.SearchServiceTransportAction$5.call(SearchServiceTransportAction.java:231) * at org.elasticsearch.search.action.SearchServiceTransportAction$5.call(SearchServiceTransportAction.java:228) * at org.elasticsearch.search.action.SearchServiceTransportAction$23.run(SearchServiceTransportAction.java:559) * at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) * at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) * at java.lang.Thread.run(Thread.java:745) * * @throws InterruptedException in case of error */ private void waitForAllShardsAssigned() throws InterruptedException { awaitBusy(() -> { try { elasticsearchClient.search(getCrawlerName(), FsCrawlerUtil.INDEX_TYPE_DOC, (String) null); } catch (IOException e) { // For elasticsearch 1.x series if (e.getMessage().contains("SearchPhaseExecutionException")) { logger.warn("Error while running against 1.x cluster. Trying again..."); return false; } fail("We got an unexpected exception: " + e.getMessage()); } return true; }); } @Test public void testFindVersion() throws IOException { String version = elasticsearchClient.findVersion(); logger.info("Current elasticsearch version: [{}]", version); // TODO if we store in a property file the elasticsearch version we are running tests against we can add some assertions } }