Example usage for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException

Source Link

Document

Adds a document to this index.

Usage

From source file:com.czw.search.lucene.example.xmlparser.FormBasedXmlQueryDemo.java

License:Apache License

private void openExampleIndex() throws IOException {
    //Create a RAM-based index from our test data file
    RAMDirectory rd = new RAMDirectory();
    IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer);
    IndexWriter writer = new IndexWriter(rd, iwConfig);
    InputStream dataIn = getServletContext().getResourceAsStream("/WEB-INF/data.tsv");
    BufferedReader br = new BufferedReader(new InputStreamReader(dataIn, StandardCharsets.UTF_8));
    String line = br.readLine();/*from ww  w . j  a  va2 s  .  com*/
    final FieldType textNoNorms = new FieldType(TextField.TYPE_STORED);
    textNoNorms.setOmitNorms(true);
    while (line != null) {
        line = line.trim();
        if (line.length() > 0) {
            //parse row and create a document
            StringTokenizer st = new StringTokenizer(line, "\t");
            Document doc = new Document();
            doc.add(new Field("location", st.nextToken(), textNoNorms));
            doc.add(new Field("salary", st.nextToken(), textNoNorms));
            doc.add(new Field("type", st.nextToken(), textNoNorms));
            doc.add(new Field("description", st.nextToken(), textNoNorms));
            writer.addDocument(doc);
        }
        line = br.readLine();
    }
    writer.close();

    //open searcher
    // this example never closes it reader!
    IndexReader reader = DirectoryReader.open(rd);
    searcher = new IndexSearcher(reader);
}

From source file:com.dasasian.chok.lucene.integration.LuceneClientTest.java

License:Apache License

@Test
public void testGetBinaryDetails() throws Exception {
    File index = temporaryFolder.newFolder("indexWithBinaryData");
    File indexShard = new File(index, "binaryShard");
    if (!indexShard.mkdirs()) {
        throw new RuntimeException("Unable to create directory " + indexShard.getAbsolutePath());
    }/*from   www .j a  v  a2 s. c o m*/

    String textFieldName = "textField";
    String binaryFieldName = "binaryField";
    String textFieldContent = "sample text";
    byte[] bytesFieldContent = new byte[] { 1, 2, 3 };

    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(indexShard),
            new StandardAnalyzer(Version.LUCENE_30), true, MaxFieldLength.UNLIMITED);
    Document document = new Document();
    document.add(new Field(binaryFieldName, bytesFieldContent, Store.YES));
    document.add(new Field(textFieldName, textFieldContent, Store.NO, Index.ANALYZED));
    indexWriter.addDocument(document);
    indexWriter.close(true);
    DeployClient deployClient = new DeployClient(miniCluster.getProtocol());
    IndexState indexState = deployClient.addIndex(index.getName(), index.getAbsolutePath(), 1).joinDeployment();
    assertEquals(IndexState.DEPLOYED, indexState);

    LuceneClient client = new LuceneClient(miniCluster.createInteractionProtocol());
    final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer())
            .parse(textFieldName + ": " + textFieldContent);
    final Hits hits = client.search(query, new String[] { index.getName() }, 10);
    assertNotNull(hits);
    assertEquals(1, hits.getHits().size());
    final Hit hit = hits.getHits().get(0);
    final MapWritable details = client.getDetails(hit);
    final Set<Writable> keySet = details.keySet();
    assertEquals(1, keySet.size());
    final Writable writable = details.get(new Text(binaryFieldName));
    assertNotNull(writable);
    assertThat(writable, instanceOf(BytesWritable.class));
    BytesWritable bytesWritable = (BytesWritable) writable;
    bytesWritable.setCapacity(bytesWritable.getLength());// getBytes() returns
    // the full array
    assertArrayEquals(bytesFieldContent, bytesWritable.getBytes());
    client.close();
}

From source file:com.dasasian.chok.lucene.integration.LuceneClientTest.java

License:Apache License

@Test
public void testFieldSortWithNoResultShard() throws Exception {
    String indexName = "sortIndex";

    File sortIndex = temporaryFolder.newFolder(indexName);
    File sortShard1 = new File(sortIndex, "sortIndex1");
    File sortShard2 = new File(sortIndex, "sortIndex2");
    IndexWriter indexWriter1 = new IndexWriter(FSDirectory.open(sortShard1),
            new StandardAnalyzer(Version.LUCENE_30), true, MaxFieldLength.UNLIMITED);
    IndexWriter indexWriter2 = new IndexWriter(FSDirectory.open(sortShard2),
            new StandardAnalyzer(Version.LUCENE_30), true, MaxFieldLength.UNLIMITED);

    Document document = new Document();
    document.add(new Field("text", "abc", Field.Store.YES, Index.NOT_ANALYZED));
    document.add(new NumericField("timesort", Field.Store.YES, false).setLongValue(1234567890123l));
    indexWriter1.addDocument(document);
    indexWriter1.close();//from w ww .  j a v a  2  s  . c om

    document = new Document();
    document.add(new Field("text", "abc2", Field.Store.YES, Index.NOT_ANALYZED));
    document.add(new NumericField("timesort", Field.Store.YES, false).setLongValue(1234567890123l));
    indexWriter2.addDocument(document);
    indexWriter2.close();

    miniCluster.deployIndex(indexName, sortIndex, 1);

    // query and compare results
    LuceneClient client = new LuceneClient(miniCluster.createInteractionProtocol());
    Sort sort = new Sort(new SortField[] { new SortField("timesort", SortField.LONG) });

    // query both documents
    Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("text:ab*");
    Hits hits = client.search(query, null, 20, sort);
    assertEquals(2, hits.size());

    // query only one document
    query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("text:abc2");
    hits = client.search(query, null, 20, sort);
    assertEquals(1, hits.size());

    // query only one document on one node
    miniCluster.shutdownNode(0);
    TestUtil.waitUntilIndexBalanced(miniCluster.getProtocol(), indexName);
    query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("text:abc2");
    hits = client.search(query, null, 20, sort);
    assertEquals(1, hits.size());
    client.close();
}

From source file:com.dasasian.chok.lucene.integration.LuceneClientTest.java

License:Apache License

@SuppressWarnings("unchecked")
@Test/*from   w ww  . ja  va2  s.c  o m*/
public void testSortedSearch() throws Exception {
    // write and deploy test index
    String queryTerm = "2";
    String textFieldName = "textField";
    File sortIndex = temporaryFolder.newFolder("sortIndex2");
    File sortShard = new File(sortIndex, "sortShard");
    String sortFieldName = "sortField";
    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(sortShard),
            new StandardAnalyzer(Version.LUCENE_30), true, MaxFieldLength.UNLIMITED);
    for (int i = 0; i < 20; i++) {
        Document document = new Document();
        document.add(new Field(sortFieldName, "" + i, Store.NO, Index.NOT_ANALYZED));
        String textField = "sample text";
        if (i % 2 == 0) {// produce some different scores
            for (int j = 0; j < i; j++) {
                textField += " " + queryTerm;
            }
        }
        document.add(new Field(textFieldName, textField, Store.NO, Index.ANALYZED));
        indexWriter.addDocument(document);
    }
    indexWriter.close(true);
    DeployClient deployClient = new DeployClient(miniCluster.getProtocol());
    IndexState indexState = deployClient.addIndex(sortIndex.getName(), sortIndex.getAbsolutePath(), 1)
            .joinDeployment();
    assertEquals(IndexState.DEPLOYED, indexState);

    // query and compare results
    LuceneClient client = new LuceneClient(miniCluster.createInteractionProtocol());
    final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer())
            .parse(textFieldName + ": " + queryTerm);
    Sort sort = new Sort(new SortField[] { new SortField(sortFieldName, SortField.INT) });
    final Hits hits = client.search(query, new String[] { sortIndex.getName() }, 20, sort);
    assertNotNull(hits);
    List<Hit> hitsList = hits.getHits();
    for (final Hit hit : hitsList) {
        writeToLog(hit);
    }
    assertEquals(9, hits.size());
    assertEquals(9, hitsList.size());
    assertEquals(1, hitsList.get(0).getSortFields().length);
    for (int i = 0; i < hitsList.size() - 1; i++) {
        int compareTo = hitsList.get(i).getSortFields()[0].compareTo(hitsList.get(i + 1).getSortFields()[0]);
        assertTrue("results not after field", compareTo == 0 || compareTo == -1);
    }
    client.close();
}

From source file:com.dasasian.chok.lucene.integration.LuceneClientTest.java

License:Apache License

@Test
public void testFilteredSearch() throws Exception {
    // write and deploy test index
    File filterIndex = temporaryFolder.newFolder("filterIndex");
    File filterShard = new File(filterIndex, "filterShard");
    String textFieldName = "textField";
    String filterFieldName = "filterField";
    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(filterShard),
            new StandardAnalyzer(Version.LUCENE_30), true, MaxFieldLength.UNLIMITED);
    for (int i = 0; i < 100; i++) {
        Document document = new Document();
        document.add(new Field(textFieldName, "sample " + i, Store.YES, Index.NOT_ANALYZED));
        document.add(new Field(filterFieldName, "" + (i % 10), Store.YES, Index.NOT_ANALYZED));
        indexWriter.addDocument(document);
    }//from  www .j  a  va2s. c o m
    indexWriter.close(true);

    DeployClient deployClient = new DeployClient(miniCluster.createInteractionProtocol());
    IndexState indexState = deployClient.addIndex(filterIndex.getName(), filterIndex.getAbsolutePath(), 1)
            .joinDeployment();
    assertEquals(IndexState.DEPLOYED, indexState);

    // build filter for terms in set {i | (i % 10) == 3}.
    LuceneClient client = new LuceneClient(miniCluster.getZkConfiguration());
    TermQuery filterQuery = new TermQuery(new Term(filterFieldName, "3"));
    QueryWrapperFilter filter = new QueryWrapperFilter(filterQuery);
    final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer())
            .parse(textFieldName + ":" + "sample*3");

    final Hits hits = client.search(query, new String[] { filterIndex.getName() }, 100, null, filter);
    assertNotNull(hits);
    List<Hit> hitsList = hits.getHits();
    for (final Hit hit : hitsList) {
        writeToLog(hit);
    }
    assertEquals(10, hits.size());
    assertEquals(10, hitsList.size());

    // check that returned results conform to the filter
    for (final Hit hit : hitsList) {
        MapWritable mw = client.getDetails(hit);
        Text text = (Text) mw.get(new Text("textField"));
        assertNotNull(text);
        String[] parts = text.toString().split(" ");
        assertTrue(parts.length == 2);
        int num = Integer.valueOf(parts[1]);
        assertTrue((num % 10) == 3);
    }
    client.close();
}

From source file:com.dasasian.chok.lucene.integration.LuceneComplianceTest.java

License:Apache License

private static void writeIndex(File file, List<Document> documents) throws IOException {
    file.mkdirs();//w w w.j  a  va  2s. c o  m
    assertTrue(file.exists());
    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(file), new StandardAnalyzer(Version.LUCENE_30),
            true, MaxFieldLength.UNLIMITED);
    for (Document document : documents) {
        indexWriter.addDocument(document);
    }
    indexWriter.close(true);

}

From source file:com.dasasian.chok.lucene.testutil.LuceneIndexGenerator.java

License:Apache License

public void createIndex(TestIndex testIndex, String[] wordList, int wordsPerDoc, int docsPerShard) {
    long startTime = System.currentTimeMillis();
    try {//w  w w .j  ava2s  .  co  m
        for (File index : testIndex.getShardFiles()) {
            int count = wordList.length;
            Random random = new Random(System.currentTimeMillis());
            IndexWriter indexWriter = new IndexWriter(FSDirectory.open(index),
                    new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED);
            for (int i = 0; i < docsPerShard; i++) {
                // generate text first
                StringBuilder text = new StringBuilder();
                for (int j = 0; j < wordsPerDoc; j++) {
                    text.append(wordList[random.nextInt(count)]);
                    text.append(" ");
                }

                Document document = new Document();
                document.add(new Field("key", "key_" + i, Field.Store.NO, Field.Index.NOT_ANALYZED));
                document.add(new Field("text", text.toString(), Field.Store.NO, Field.Index.ANALYZED));
                indexWriter.addDocument(document);

            }
            indexWriter.optimize();
            indexWriter.close();
            System.out.println("Index created with : " + docsPerShard + " documents in "
                    + (System.currentTimeMillis() - startTime) + " ms");

            // when we are ready we move the index to the final destination and write
            // a done flag file we can use in shell scripts to identify the move is
            // done.

            new File(index, "done").createNewFile();
        }

    } catch (Exception e) {
        throw new RuntimeException("Unable to write index", e);
    }
}

From source file:com.devb.search.IndicIndexer.java

License:Apache License

@Override
public void makeIndex() {
    String indexPath = servletContext.getRealPath("/") + "/hindex/";
    String docsPath = servletContext.getRealPath("/") + "/hdocs/";
    boolean create = true;

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path\n");
        return;/*from  w ww .j  a  v a2  s  . c o m*/
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...\n");

        org.apache.lucene.store.Directory dir = FSDirectory.open(new File(indexPath));
        Analyzer analyzer = new HindiAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(null, analyzer);

        if (create) {
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        IndexWriter writer = new IndexWriter(dir, iwc);
        if (docDir.canRead()) {
            if (docDir.isDirectory()) {
                String[] files = docDir.list();
                if (files != null) {
                    for (int i = 0; i < files.length; i++) {
                        File file = new File(docDir, files[i]);
                        FileInputStream fileInputStream = new FileInputStream(file);
                        BufferedReader reader = new BufferedReader(
                                new InputStreamReader(fileInputStream, "UTF-8"));
                        Tokenizer tokenizer = new StandardTokenizer(reader);
                        CharTermAttribute termAtt = tokenizer.addAttribute(CharTermAttribute.class);
                        tokenizer.reset();
                        int lineNumber = 0;
                        try {
                            while (tokenizer.incrementToken()) {
                                Document doc = new Document();
                                Field pathField = new StringField("path", file.getName(), Field.Store.YES);
                                doc.add(pathField);
                                TextField nField = new TextField("linenumber",
                                        new Integer(++lineNumber).toString(), Store.YES);
                                doc.add(nField);
                                TextField field = new TextField("contents", termAtt.toString(), Store.YES);
                                doc.add(field);
                                writer.addDocument(doc);
                            }
                            System.out.println("Adding " + file + "\n");
                        } catch (Exception e) {
                            e.printStackTrace();
                        } finally {
                            tokenizer.close();
                            reader.close();
                            fileInputStream.close();
                        }
                    }
                }
            }
        }

        writer.close();

        Date end = new Date();
        System.out.println((end.getTime() - start.getTime()) + " total milliseconds\n");

    } catch (IOException e) {
        System.out.println("Caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:com.devb.search.StandardIndexer.java

License:Apache License

@Override
public void makeIndex() {
    String indexPath = servletContext.getRealPath("/") + "/index/";
    String docsPath = servletContext.getRealPath("/") + "/docs/";
    boolean create = true;

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path\n");
        return;/*from   w w w.j  a v a  2s.c  om*/
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...\n");

        org.apache.lucene.store.Directory dir = FSDirectory.open(new File(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(null, analyzer);

        if (create) {
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        IndexWriter writer = new IndexWriter(dir, iwc);
        if (docDir.canRead()) {
            if (docDir.isDirectory()) {
                String[] files = docDir.list();
                if (files != null) {
                    for (int i = 0; i < files.length; i++) {
                        File file = new File(docDir, files[i]);
                        FileReader fr = new FileReader(file);
                        BufferedReader br = new BufferedReader(fr);
                        String line;
                        int lineNumber = 0;
                        try {
                            while ((line = br.readLine()) != null) {
                                Document doc = new Document();
                                Field pathField = new StringField("path", file.getName(), Field.Store.YES);
                                doc.add(pathField);
                                TextField nField = new TextField("linenumber",
                                        new Integer(++lineNumber).toString(), Store.YES);
                                doc.add(nField);
                                TextField field = new TextField("contents", line, Store.YES);
                                doc.add(field);
                                writer.addDocument(doc);
                            }
                            System.out.println("Adding " + file + "\n");
                        } catch (Exception e) {
                            e.printStackTrace();
                        } finally {
                            br.close();
                            fr.close();
                        }
                    }
                }
            }
        }

        writer.close();

        Date end = new Date();
        System.out.println((end.getTime() - start.getTime()) + " total milliseconds\n");

    } catch (IOException e) {
        System.out.println("Caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }

}

From source file:com.docdoku.server.IndexerBean.java

License:Open Source License

private void addDoc(IndexWriter pIndexWriter, Reader pContentReader, String pFullName)
        throws FileNotFoundException, CorruptIndexException, IOException {
    Document doc = new Document();
    doc.add(new Field("fullName", pFullName, Field.Store.YES, Field.Index.NOT_ANALYZED));
    doc.add(new Field("content", pContentReader));
    pIndexWriter.addDocument(doc);
}