Example usage for org.apache.lucene.index IndexWriter addDocument

List of usage examples for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Document

Adds a document to this index.

Usage

From source file:com.czw.search.lucene.example.xmlparser.FormBasedXmlQueryDemo.java

License:Apache License

private void openExampleIndex() throws IOException {
    //Create a RAM-based index from our test data file
    RAMDirectory rd = new RAMDirectory();
    IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer);
    IndexWriter writer = new IndexWriter(rd, iwConfig);
    InputStream dataIn = getServletContext().getResourceAsStream("/WEB-INF/data.tsv");
    BufferedReader br = new BufferedReader(new InputStreamReader(dataIn, StandardCharsets.UTF_8));
    String line = br.readLine();/*from ww  w . j  a  va2 s  .  com*/
    final FieldType textNoNorms = new FieldType(TextField.TYPE_STORED);
    textNoNorms.setOmitNorms(true);
    while (line != null) {
        line = line.trim();
        if (line.length() > 0) {
            //parse row and create a document
            StringTokenizer st = new StringTokenizer(line, "\t");
            Document doc = new Document();
            doc.add(new Field("location", st.nextToken(), textNoNorms));
            doc.add(new Field("salary", st.nextToken(), textNoNorms));
            doc.add(new Field("type", st.nextToken(), textNoNorms));
            doc.add(new Field("description", st.nextToken(), textNoNorms));
            writer.addDocument(doc);
        }
        line = br.readLine();
    }
    writer.close();

    //open searcher
    // this example never closes it reader!
    IndexReader reader = DirectoryReader.open(rd);
    searcher = new IndexSearcher(reader);
}

From source file:com.dasasian.chok.lucene.integration.LuceneClientTest.java

License:Apache License

@Test
public void testGetBinaryDetails() throws Exception {
    File index = temporaryFolder.newFolder("indexWithBinaryData");
    File indexShard = new File(index, "binaryShard");
    if (!indexShard.mkdirs()) {
        throw new RuntimeException("Unable to create directory " + indexShard.getAbsolutePath());
    }/*from   www .j a  v  a2 s. c o m*/

    String textFieldName = "textField";
    String binaryFieldName = "binaryField";
    String textFieldContent = "sample text";
    byte[] bytesFieldContent = new byte[] { 1, 2, 3 };

    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(indexShard),
            new StandardAnalyzer(Version.LUCENE_30), true, MaxFieldLength.UNLIMITED);
    Document document = new Document();
    document.add(new Field(binaryFieldName, bytesFieldContent, Store.YES));
    document.add(new Field(textFieldName, textFieldContent, Store.NO, Index.ANALYZED));
    indexWriter.addDocument(document);
    indexWriter.close(true);
    DeployClient deployClient = new DeployClient(miniCluster.getProtocol());
    IndexState indexState = deployClient.addIndex(index.getName(), index.getAbsolutePath(), 1).joinDeployment();
    assertEquals(IndexState.DEPLOYED, indexState);

    LuceneClient client = new LuceneClient(miniCluster.createInteractionProtocol());
    final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer())
            .parse(textFieldName + ": " + textFieldContent);
    final Hits hits = client.search(query, new String[] { index.getName() }, 10);
    assertNotNull(hits);
    assertEquals(1, hits.getHits().size());
    final Hit hit = hits.getHits().get(0);
    final MapWritable details = client.getDetails(hit);
    final Set<Writable> keySet = details.keySet();
    assertEquals(1, keySet.size());
    final Writable writable = details.get(new Text(binaryFieldName));
    assertNotNull(writable);
    assertThat(writable, instanceOf(BytesWritable.class));
    BytesWritable bytesWritable = (BytesWritable) writable;
    bytesWritable.setCapacity(bytesWritable.getLength());// getBytes() returns
    // the full array
    assertArrayEquals(bytesFieldContent, bytesWritable.getBytes());
    client.close();
}

From source file:com.dasasian.chok.lucene.integration.LuceneClientTest.java

License:Apache License

@Test
public void testFieldSortWithNoResultShard() throws Exception {
    String indexName = "sortIndex";

    File sortIndex = temporaryFolder.newFolder(indexName);
    File sortShard1 = new File(sortIndex, "sortIndex1");
    File sortShard2 = new File(sortIndex, "sortIndex2");
    IndexWriter indexWriter1 = new IndexWriter(FSDirectory.open(sortShard1),
            new StandardAnalyzer(Version.LUCENE_30), true, MaxFieldLength.UNLIMITED);
    IndexWriter indexWriter2 = new IndexWriter(FSDirectory.open(sortShard2),
            new StandardAnalyzer(Version.LUCENE_30), true, MaxFieldLength.UNLIMITED);

    Document document = new Document();
    document.add(new Field("text", "abc", Field.Store.YES, Index.NOT_ANALYZED));
    document.add(new NumericField("timesort", Field.Store.YES, false).setLongValue(1234567890123l));
    indexWriter1.addDocument(document);
    indexWriter1.close();//from w ww .  j a v a  2  s  . c om

    document = new Document();
    document.add(new Field("text", "abc2", Field.Store.YES, Index.NOT_ANALYZED));
    document.add(new NumericField("timesort", Field.Store.YES, false).setLongValue(1234567890123l));
    indexWriter2.addDocument(document);
    indexWriter2.close();

    miniCluster.deployIndex(indexName, sortIndex, 1);

    // query and compare results
    LuceneClient client = new LuceneClient(miniCluster.createInteractionProtocol());
    Sort sort = new Sort(new SortField[] { new SortField("timesort", SortField.LONG) });

    // query both documents
    Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("text:ab*");
    Hits hits = client.search(query, null, 20, sort);
    assertEquals(2, hits.size());

    // query only one document
    query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("text:abc2");
    hits = client.search(query, null, 20, sort);
    assertEquals(1, hits.size());

    // query only one document on one node
    miniCluster.shutdownNode(0);
    TestUtil.waitUntilIndexBalanced(miniCluster.getProtocol(), indexName);
    query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("text:abc2");
    hits = client.search(query, null, 20, sort);
    assertEquals(1, hits.size());
    client.close();
}

From source file:com.dasasian.chok.lucene.integration.LuceneClientTest.java

License:Apache License

@SuppressWarnings("unchecked")
@Test/*from   w ww  . ja  va2  s.c  o m*/
public void testSortedSearch() throws Exception {
    // write and deploy test index
    String queryTerm = "2";
    String textFieldName = "textField";
    File sortIndex = temporaryFolder.newFolder("sortIndex2");
    File sortShard = new File(sortIndex, "sortShard");
    String sortFieldName = "sortField";
    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(sortShard),
            new StandardAnalyzer(Version.LUCENE_30), true, MaxFieldLength.UNLIMITED);
    for (int i = 0; i < 20; i++) {
        Document document = new Document();
        document.add(new Field(sortFieldName, "" + i, Store.NO, Index.NOT_ANALYZED));
        String textField = "sample text";
        if (i % 2 == 0) {// produce some different scores
            for (int j = 0; j < i; j++) {
                textField += " " + queryTerm;
            }
        }
        document.add(new Field(textFieldName, textField, Store.NO, Index.ANALYZED));
        indexWriter.addDocument(document);
    }
    indexWriter.close(true);
    DeployClient deployClient = new DeployClient(miniCluster.getProtocol());
    IndexState indexState = deployClient.addIndex(sortIndex.getName(), sortIndex.getAbsolutePath(), 1)
            .joinDeployment();
    assertEquals(IndexState.DEPLOYED, indexState);

    // query and compare results
    LuceneClient client = new LuceneClient(miniCluster.createInteractionProtocol());
    final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer())
            .parse(textFieldName + ": " + queryTerm);
    Sort sort = new Sort(new SortField[] { new SortField(sortFieldName, SortField.INT) });
    final Hits hits = client.search(query, new String[] { sortIndex.getName() }, 20, sort);
    assertNotNull(hits);
    List<Hit> hitsList = hits.getHits();
    for (final Hit hit : hitsList) {
        writeToLog(hit);
    }
    assertEquals(9, hits.size());
    assertEquals(9, hitsList.size());
    assertEquals(1, hitsList.get(0).getSortFields().length);
    for (int i = 0; i < hitsList.size() - 1; i++) {
        int compareTo = hitsList.get(i).getSortFields()[0].compareTo(hitsList.get(i + 1).getSortFields()[0]);
        assertTrue("results not after field", compareTo == 0 || compareTo == -1);
    }
    client.close();
}

From source file:com.dasasian.chok.lucene.integration.LuceneClientTest.java

License:Apache License

@Test
public void testFilteredSearch() throws Exception {
    // write and deploy test index
    File filterIndex = temporaryFolder.newFolder("filterIndex");
    File filterShard = new File(filterIndex, "filterShard");
    String textFieldName = "textField";
    String filterFieldName = "filterField";
    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(filterShard),
            new StandardAnalyzer(Version.LUCENE_30), true, MaxFieldLength.UNLIMITED);
    for (int i = 0; i < 100; i++) {
        Document document = new Document();
        document.add(new Field(textFieldName, "sample " + i, Store.YES, Index.NOT_ANALYZED));
        document.add(new Field(filterFieldName, "" + (i % 10), Store.YES, Index.NOT_ANALYZED));
        indexWriter.addDocument(document);
    }//from  www .j  a  va2s. c o m
    indexWriter.close(true);

    DeployClient deployClient = new DeployClient(miniCluster.createInteractionProtocol());
    IndexState indexState = deployClient.addIndex(filterIndex.getName(), filterIndex.getAbsolutePath(), 1)
            .joinDeployment();
    assertEquals(IndexState.DEPLOYED, indexState);

    // build filter for terms in set {i | (i % 10) == 3}.
    LuceneClient client = new LuceneClient(miniCluster.getZkConfiguration());
    TermQuery filterQuery = new TermQuery(new Term(filterFieldName, "3"));
    QueryWrapperFilter filter = new QueryWrapperFilter(filterQuery);
    final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer())
            .parse(textFieldName + ":" + "sample*3");

    final Hits hits = client.search(query, new String[] { filterIndex.getName() }, 100, null, filter);
    assertNotNull(hits);
    List<Hit> hitsList = hits.getHits();
    for (final Hit hit : hitsList) {
        writeToLog(hit);
    }
    assertEquals(10, hits.size());
    assertEquals(10, hitsList.size());

    // check that returned results conform to the filter
    for (final Hit hit : hitsList) {
        MapWritable mw = client.getDetails(hit);
        Text text = (Text) mw.get(new Text("textField"));
        assertNotNull(text);
        String[] parts = text.toString().split(" ");
        assertTrue(parts.length == 2);
        int num = Integer.valueOf(parts[1]);
        assertTrue((num % 10) == 3);
    }
    client.close();
}

From source file:com.dasasian.chok.lucene.integration.LuceneComplianceTest.java

License:Apache License

private static void writeIndex(File file, List<Document> documents) throws IOException {
    file.mkdirs();//w w w.j  a  va  2s. c o  m
    assertTrue(file.exists());
    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(file), new StandardAnalyzer(Version.LUCENE_30),
            true, MaxFieldLength.UNLIMITED);
    for (Document document : documents) {
        indexWriter.addDocument(document);
    }
    indexWriter.close(true);

}

From source file:com.dasasian.chok.lucene.testutil.LuceneIndexGenerator.java

License:Apache License

public void createIndex(TestIndex testIndex, String[] wordList, int wordsPerDoc, int docsPerShard) {
    long startTime = System.currentTimeMillis();
    try {//w  w w .j  ava2s  .  co  m
        for (File index : testIndex.getShardFiles()) {
            int count = wordList.length;
            Random random = new Random(System.currentTimeMillis());
            IndexWriter indexWriter = new IndexWriter(FSDirectory.open(index),
                    new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED);
            for (int i = 0; i < docsPerShard; i++) {
                // generate text first
                StringBuilder text = new StringBuilder();
                for (int j = 0; j < wordsPerDoc; j++) {
                    text.append(wordList[random.nextInt(count)]);
                    text.append(" ");
                }

                Document document = new Document();
                document.add(new Field("key", "key_" + i, Field.Store.NO, Field.Index.NOT_ANALYZED));
                document.add(new Field("text", text.toString(), Field.Store.NO, Field.Index.ANALYZED));
                indexWriter.addDocument(document);

            }
            indexWriter.optimize();
            indexWriter.close();
            System.out.println("Index created with : " + docsPerShard + " documents in "
                    + (System.currentTimeMillis() - startTime) + " ms");

            // when we are ready we move the index to the final destination and write
            // a done flag file we can use in shell scripts to identify the move is
            // done.

            new File(index, "done").createNewFile();
        }

    } catch (Exception e) {
        throw new RuntimeException("Unable to write index", e);
    }
}

From source file:com.devb.search.IndicIndexer.java

License:Apache License

@Override
public void makeIndex() {
    String indexPath = servletContext.getRealPath("/") + "/hindex/";
    String docsPath = servletContext.getRealPath("/") + "/hdocs/";
    boolean create = true;

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path\n");
        return;/*from  w ww .j  a  v a2  s  . c o m*/
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...\n");

        org.apache.lucene.store.Directory dir = FSDirectory.open(new File(indexPath));
        Analyzer analyzer = new HindiAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(null, analyzer);

        if (create) {
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        IndexWriter writer = new IndexWriter(dir, iwc);
        if (docDir.canRead()) {
            if (docDir.isDirectory()) {
                String[] files = docDir.list();
                if (files != null) {
                    for (int i = 0; i < files.length; i++) {
                        File file = new File(docDir, files[i]);
                        FileInputStream fileInputStream = new FileInputStream(file);
                        BufferedReader reader = new BufferedReader(
                                new InputStreamReader(fileInputStream, "UTF-8"));
                        Tokenizer tokenizer = new StandardTokenizer(reader);
                        CharTermAttribute termAtt = tokenizer.addAttribute(CharTermAttribute.class);
                        tokenizer.reset();
                        int lineNumber = 0;
                        try {
                            while (tokenizer.incrementToken()) {
                                Document doc = new Document();
                                Field pathField = new StringField("path", file.getName(), Field.Store.YES);
                                doc.add(pathField);
                                TextField nField = new TextField("linenumber",
                                        new Integer(++lineNumber).toString(), Store.YES);
                                doc.add(nField);
                                TextField field = new TextField("contents", termAtt.toString(), Store.YES);
                                doc.add(field);
                                writer.addDocument(doc);
                            }
                            System.out.println("Adding " + file + "\n");
                        } catch (Exception e) {
                            e.printStackTrace();
                        } finally {
                            tokenizer.close();
                            reader.close();
                            fileInputStream.close();
                        }
                    }
                }
            }
        }

        writer.close();

        Date end = new Date();
        System.out.println((end.getTime() - start.getTime()) + " total milliseconds\n");

    } catch (IOException e) {
        System.out.println("Caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:com.devb.search.StandardIndexer.java

License:Apache License

@Override
public void makeIndex() {
    String indexPath = servletContext.getRealPath("/") + "/index/";
    String docsPath = servletContext.getRealPath("/") + "/docs/";
    boolean create = true;

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path\n");
        return;/*from   w w w.j  a v a  2s.c  om*/
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...\n");

        org.apache.lucene.store.Directory dir = FSDirectory.open(new File(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(null, analyzer);

        if (create) {
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        IndexWriter writer = new IndexWriter(dir, iwc);
        if (docDir.canRead()) {
            if (docDir.isDirectory()) {
                String[] files = docDir.list();
                if (files != null) {
                    for (int i = 0; i < files.length; i++) {
                        File file = new File(docDir, files[i]);
                        FileReader fr = new FileReader(file);
                        BufferedReader br = new BufferedReader(fr);
                        String line;
                        int lineNumber = 0;
                        try {
                            while ((line = br.readLine()) != null) {
                                Document doc = new Document();
                                Field pathField = new StringField("path", file.getName(), Field.Store.YES);
                                doc.add(pathField);
                                TextField nField = new TextField("linenumber",
                                        new Integer(++lineNumber).toString(), Store.YES);
                                doc.add(nField);
                                TextField field = new TextField("contents", line, Store.YES);
                                doc.add(field);
                                writer.addDocument(doc);
                            }
                            System.out.println("Adding " + file + "\n");
                        } catch (Exception e) {
                            e.printStackTrace();
                        } finally {
                            br.close();
                            fr.close();
                        }
                    }
                }
            }
        }

        writer.close();

        Date end = new Date();
        System.out.println((end.getTime() - start.getTime()) + " total milliseconds\n");

    } catch (IOException e) {
        System.out.println("Caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }

}

From source file:com.docdoku.server.IndexerBean.java

License:Open Source License

private void addDoc(IndexWriter pIndexWriter, Reader pContentReader, String pFullName)
        throws FileNotFoundException, CorruptIndexException, IOException {
    Document doc = new Document();
    doc.add(new Field("fullName", pFullName, Field.Store.YES, Field.Index.NOT_ANALYZED));
    doc.add(new Field("content", pContentReader));
    pIndexWriter.addDocument(doc);
}