Example usage for org.apache.lucene.index IndexWriter addDocument

List of usage examples for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Document

Adds a document to this index.

Usage

From source file:com.browseengine.bobo.test.TestPathMultiVal.java

License:Apache License

@Override
protected void setUp() throws Exception {
    directory = new RAMDirectory();
    analyzer = new WhitespaceAnalyzer();
    IndexWriter writer = new IndexWriter(directory, analyzer, true, MaxFieldLength.UNLIMITED);
    Document doc = new Document();
    addMetaDataField(doc, PathHandlerName, new String[] { "/a/b/c", "/a/b/d" });
    writer.addDocument(doc);
    writer.commit();/*  ww w  .  ja  v  a  2 s.  c  o m*/

    PathFacetHandler pathHandler = new PathFacetHandler("path", true);
    facetHandlers.add(pathHandler);
}

From source file:com.bsiag.smartfield.server.services.custom.lucene.indexwriter.IndexWriterService.java

License:Open Source License

public void createZipIndex(RemoteFile file) {
    try {//  w  ww  .  j  a  v a  2  s.  c  o  m
        Set<String> zipCache = new HashSet<String>();

        String path = BundleContextUtility
                .resolve("${workspace_loc}/com.bsiag.smartfield.server/resources/index");
        IOUtility.deleteDirectory(path);
        Directory index = new SimpleFSDirectory(new File(path));
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_34,
                new StandardAnalyzer(Version.LUCENE_34));

        IndexWriter w;
        w = new IndexWriter(index, config);

        file.setCharsetName("UTF-8");
        BufferedReader reader = new BufferedReader(file.getDecompressedReader());
        String line;
        while ((line = reader.readLine()) != null) {
            String[] tokens = line.split("\t");
            if (tokens.length > 1) {
                String zip = tokens[0].trim();
                String city = tokens[1].trim();
                if (StringUtility.hasText(city) && StringUtility.hasText(zip)) {
                    w.addDocument(createDocoment(zip, city));
                    zipCache.add(zip + city);
                }
            }
        }

        w.close();

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:com.bugull.mongo.lucene.backend.IndexInsertJob.java

License:Apache License

@Override
public void doJob() {
    Class<?> clazz = obj.getClass();
    String name = MapperUtil.getEntityName(clazz);
    IndexWriterHolder holder = IndexWriterHolder.getInstance();
    IndexWriter writer = holder.get(name);
    Document doc = new Document();
    IndexCreator creator = new IndexCreator(obj, "");
    creator.create(doc);/*from   w  ww  .  jav a 2  s  . c om*/
    try {
        writer.addDocument(doc);
    } catch (CorruptIndexException ex) {
        logger.error("IndexWriter can not add a document to the lucene index", ex);
    } catch (IOException ex) {
        logger.error("IndexWriter can not add a document to the lucene index", ex);
    }
}

From source file:com.bugull.mongo.lucene.backend.IndexInsertTask.java

License:Apache License

@Override
public void run() {
    Class<?> clazz = obj.getClass();
    String name = MapperUtil.getEntityName(clazz);
    IndexWriterCache cache = IndexWriterCache.getInstance();
    IndexWriter writer = cache.get(name);
    Document doc = new Document();
    IndexCreator creator = new IndexCreator(obj, "");
    creator.create(doc);/*from www  .  jav a  2  s  .c om*/
    try {
        writer.addDocument(doc);
    } catch (CorruptIndexException ex) {
        logger.error("IndexWriter can not add a document to the lucene index", ex);
    } catch (IOException ex) {
        logger.error("IndexWriter can not add a document to the lucene index", ex);
    }
}

From source file:com.burkeware.search.api.internal.lucene.DefaultIndexer.java

License:Open Source License

/**
 * Write json representation of a single object as a single document entry inside Lucene index.
 *
 * @param jsonObject the json object to be written to the index
 * @param resource   the configuration to transform json to lucene document
 * @param writer     the lucene index writer
 * @throws java.io.IOException when writing document failed
 *///from   ww  w .  j  av  a2  s.c  o m
private void writeObject(final Object jsonObject, final Resource resource, final IndexWriter writer)
        throws IOException {

    Document document = new Document();
    document.add(new Field(DEFAULT_FIELD_JSON, jsonObject.toString(), Field.Store.YES, Field.Index.NO));
    document.add(new Field(DEFAULT_FIELD_UUID, UUID.randomUUID().toString(), Field.Store.YES,
            Field.Index.ANALYZED_NO_NORMS));
    document.add(new Field(DEFAULT_FIELD_CLASS, resource.getResourceObject().getName(), Field.Store.YES,
            Field.Index.ANALYZED_NO_NORMS));
    document.add(new Field(DEFAULT_FIELD_RESOURCE, resource.getName(), Field.Store.YES,
            Field.Index.ANALYZED_NO_NORMS));

    for (SearchableField searchableField : resource.getSearchableFields()) {
        Object value = JsonPath.read(jsonObject, searchableField.getExpression());
        document.add(new Field(searchableField.getName(), String.valueOf(value), Field.Store.YES,
                Field.Index.ANALYZED_NO_NORMS));
    }

    if (getLogger().isDebugEnabled())
        getLogger().debug(this.getClass().getSimpleName(), "Writing document: " + document);

    writer.addDocument(document);
}

From source file:com.cep.darkstar.onramp.djnews.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * //from   w ww  .jav  a  2s.c om
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new Field("path", file.getPath(), Field.Store.YES,
                        Field.Index.NOT_ANALYZED_NO_NORMS);
                pathField.setOmitTermFreqAndPositions(true);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a NumericField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                NumericField modifiedField = new NumericField("modified");
                modifiedField.setLongValue(file.lastModified());
                doc.add(modifiedField);

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:com.chenyi.langeasy.lucene.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        count++;/*from   ww  w.  j  a  v a  2 s  . c  o  m*/
        if (count % 500 == 499) {
            System.out.println(count + "/" + new Date());
        }
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path". Use a
        // field that is indexed (i.e. searchable), but don't tokenize
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongField that is indexed (i.e. efficiently filterable with
        // NumericRangeFilter). This indexes to milli-second resolution,
        // which
        // is often too fine. You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you
        // require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongField("modified", lastModified, Field.Store.NO));

        // Add the contents of the file to a field named "contents". Specify
        // a Reader,
        // so that the text of the file is tokenized and indexed, but not
        // stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will
        // fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can
            // be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been
            // indexed) so
            // we use updateDocument instead to replace the old one matching
            // the exact
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.chimpler.example.FacetLuceneIndexer.java

License:Apache License

public static void main(String args[]) throws Exception {
    //      if (args.length != 3) {
    //         System.err.println("Parameters: [index directory] [taxonomy directory] [json file]");
    //         System.exit(1);
    //      }// w ww .  j  a  v a  2 s.c  om

    String indexDirectory = "index";
    String taxonomyDirectory = "taxonomy";
    String jsonFileName = "/home/qiuqiang/workspace/facet-lucene-example/books.json";

    IndexWriterConfig writerConfig = new IndexWriterConfig(LUCENE_VERSION,
            new WhitespaceAnalyzer(LUCENE_VERSION));
    writerConfig.setOpenMode(OpenMode.APPEND);
    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexDirectory)), writerConfig);

    TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(MMapDirectory.open(new File(taxonomyDirectory)),
            OpenMode.APPEND);

    TaxonomyReader taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.open(new File(taxonomyDirectory)));

    String content = IOUtils.toString(new FileInputStream(jsonFileName));
    JSONArray bookArray = new JSONArray(content);

    Field idField = new IntField("id", 0, Store.YES);
    Field titleField = new TextField("title", "", Store.YES);
    Field authorsField = new TextField("authors", "", Store.YES);
    Field bookCategoryField = new TextField("book_category", "", Store.YES);

    indexWriter.deleteAll();

    FacetFields facetFields = new FacetFields(taxonomyWriter);

    for (int i = 0; i < bookArray.length(); i++) {
        Document document = new Document();

        JSONObject book = bookArray.getJSONObject(i);
        int id = book.getInt("id");
        String title = book.getString("title");
        String bookCategory = book.getString("book_category");

        List<CategoryPath> categoryPaths = new ArrayList<CategoryPath>();

        String authorsString = "";
        JSONArray authors = book.getJSONArray("authors");
        for (int j = 0; j < authors.length(); j++) {
            String author = authors.getString(j);
            if (j > 0) {
                authorsString += ", ";
            }
            categoryPaths.add(new CategoryPath("author", author));
            authorsString += author;
        }
        categoryPaths.add(new CategoryPath("book_category" + bookCategory, '/'));

        idField.setIntValue(id);
        titleField.setStringValue(title);
        authorsField.setStringValue(authorsString);
        bookCategoryField.setStringValue(bookCategory);

        facetFields.addFields(document, categoryPaths);

        document.add(idField);
        document.add(titleField);
        document.add(authorsField);
        document.add(bookCategoryField);

        indexWriter.addDocument(document);

        System.out.printf("Book: id=%d, title=%s, book_category=%s, authors=%s\n", id, title, bookCategory,
                authors);
    }

    taxonomyWriter.prepareCommit();
    try {
        taxonomyWriter.commit();
    } catch (Exception e) {
        taxonomyWriter.rollback();
    }

    //      taxonomyWriter.close();
    //      
    //      indexWriter.commit();
    //      indexWriter.close();

    String query = "story";

    IndexReader indexReader = DirectoryReader.open(indexWriter, false);
    IndexReader indexReader2 = DirectoryReader.open(indexWriter, false);
    System.out.println(indexReader == indexReader2);

    IndexSearcher indexSearcher = new IndexSearcher(indexReader);

    TaxonomyReader newTaxonomyReader = DirectoryTaxonomyReader.openIfChanged(taxonomyReader);
    if (newTaxonomyReader != null) {
        TaxonomyReader tmp = taxonomyReader;
        taxonomyReader = newTaxonomyReader;
        tmp.close();
    } else {
        System.out.println("null");
    }

    ArrayList<FacetRequest> facetRequests = new ArrayList<FacetRequest>();
    facetRequests.add(new CountFacetRequest(new CategoryPath("author"), 100));
    facetRequests.add(new CountFacetRequest(new CategoryPath("book_category"), 100));

    FacetSearchParams searchParams = new FacetSearchParams(facetRequests);

    ComplexPhraseQueryParser queryParser = new ComplexPhraseQueryParser(LUCENE_VERSION, "title",
            new StandardAnalyzer(LUCENE_VERSION));
    Query luceneQuery = queryParser.parse(query);

    // Collectors to get top results and facets
    TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, true);
    FacetsCollector facetsCollector = FacetsCollector.create(searchParams, indexReader, taxonomyReader);
    indexSearcher.search(luceneQuery, MultiCollector.wrap(topScoreDocCollector, facetsCollector));
    System.out.println("Found:");

    for (ScoreDoc scoreDoc : topScoreDocCollector.topDocs().scoreDocs) {
        Document document = indexReader.document(scoreDoc.doc);
        System.out.printf("- book: id=%s, title=%s, book_category=%s, authors=%s, score=%f\n",
                document.get("id"), document.get("title"), document.get("book_category"),
                document.get("authors"), scoreDoc.score);
    }

    System.out.println("Facets:");
    for (FacetResult facetResult : facetsCollector.getFacetResults()) {
        System.out.println("- " + facetResult.getFacetResultNode().label);
        for (FacetResultNode facetResultNode : facetResult.getFacetResultNode().subResults) {
            System.out.printf("    - %s (%f)\n", facetResultNode.label.toString(), facetResultNode.value);
            for (FacetResultNode subFacetResultNode : facetResultNode.subResults) {
                System.out.printf("        - %s (%f)\n", subFacetResultNode.label.toString(),
                        subFacetResultNode.value);
            }
        }
    }
    taxonomyReader.close();
    indexReader.close();

    taxonomyWriter.commit();
    taxonomyWriter.close();

    indexWriter.commit();
    indexWriter.close();

}

From source file:com.codecrate.shard.search.ObjectIndexer.java

License:Apache License

public void save(Serializable id, Object entity) {
    removeDocuments(id);/*  w  ww. jav a  2  s.co  m*/

    IndexWriter writer = null;
    try {
        writer = new IndexWriter(directory, analyzer, DO_NOT_CREATE_INDEX);
        Document document = new Document();
        document.add(Field.Keyword(HibernateObjectSearcher.FIELD_CLASS, entity.getClass().getName()));
        document.add(Field.Keyword(HibernateObjectSearcher.FIELD_ID, id.toString()));
        document.add(Field.Text(HibernateObjectSearcher.FIELD_TEXT, entity.toString()));

        LOG.debug("saving " + document);
        writer.addDocument(document);
    } catch (IOException e) {
        LOG.error("Error updating index for object " + entity, e);
    } finally {
        closeWriter(writer);
    }
}

From source file:com.codenvy.test.lucene.DeleteFilesWithSameName.java

License:Open Source License

private static void indexDocs(IndexWriter writer, Path file) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        Document doc = new Document();

        System.out.println("file path " + file.toAbsolutePath().toString());
        Field pathField = new StringField(PATH, file.toAbsolutePath().toString(), Field.Store.YES);
        doc.add(pathField);/*  w  w  w .  jav a  2 s. c  om*/

        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {

            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            System.out.println("updating " + file);
            writer.updateDocument(new Term(PATH, file.toString()), doc);
        }
    }
}