Example usage for org.apache.lucene.index IndexWriter updateDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter updateDocument.

Prototype

private long updateDocument(final DocumentsWriterDeleteQueue.Node<?> delNode,
            Iterable<? extends IndexableField> doc) throws IOException

Source Link

Usage

From source file:cs571.proj1.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    if (checkForIgnoredFile(file.getFileName().toString()))
        return;/* w  w w.j av a2  s.  c  om*/
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:

        // Add the last modified date of the file a field named "modified".
        // Use a LongField that is indexed (i.e. efficiently filterable with
        // NumericRangeFilter).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.

        //doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        BufferedReader br = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
        String line, docNO = null, docID = null, s;
        StringBuilder sb = new StringBuilder();
        Field docIDField = null;
        Document doc = new Document();

        boolean docFound = false;
        //      boolean testOut = false;
        //      PrintWriter pw = new PrintWriter("out.txt");
        while ((line = br.readLine()) != null) {
            line = line.trim();
            //To-do detect document boundaries
            if (line.startsWith(docNO_start)) {
                docNO = removeTags(line);
                continue;
            }
            if (line.startsWith(docID_start)) {
                docID = removeTags(line);
                continue;
            }

            //         if(checkForIgnoreStartsWith(line)){continue;}
            //         if(checkForIgnoreLine(line)){continue;}

            if (line.equals(docStart)) {
                docFound = true;
                //testOut = true;
                continue;
            }

            if (line.equals(docEnd)) {
                docFound = false;

                if (docNO != null) {
                    docIDField = new StringField("docID", docNO, Field.Store.YES);
                    doc.add(docIDField);
                } else if (docID != null) {
                    docIDField = new StringField("docID", docID, Field.Store.YES);
                    doc.add(docIDField);
                } else {
                    continue;
                }

                //doc.add(new TermVector("contents",sb.toString(),Field.Store.NO));
                if (tfidf || bm25) {
                    FieldType tv = new FieldType();
                    tv.setTokenized(true);
                    tv.setStoreTermVectors(true);
                    tv.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
                    s = sb.toString();
                    byte[] bytes = s.getBytes();
                    ByteArrayInputStream bstream = new ByteArrayInputStream(bytes);
                    InputStreamReader isr = new InputStreamReader(bstream);
                    doc.add(new Field("contents", isr, tv));
                } else {
                    doc.add(new TextField("contents", sb.toString(), Field.Store.NO));
                }

                Field pathField = new StringField("path", file.toString(), Field.Store.YES);
                doc.add(pathField);
                doc.add(new LongField("modified", lastModified, Field.Store.NO));
                numOfDocuments++;
                sb.setLength(0);
                //             testOut = false;
                //             pw.close();
                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.toString()), doc);
                }
                doc = new Document();
                continue;
            }

            if (docFound) {
                sb.append(line).append("\n");
                //            if(testOut) pw.println(line);
            }

        }

    }
}

From source file:de.berlios.jhelpdesk.utils.LuceneIndexer.java

License:Open Source License

public synchronized void updateIndexedArticle(Article article) {
    IndexWriter indexWriter = null;
    try {/*from  ww w.  j  a  va  2  s.c  o m*/
        Document document = articleToDocument(article);
        indexWriter = getIndexWriter();
        indexWriter.updateDocument(new Term("id", String.valueOf(article.getArticleId())), document);
        indexWriter.commit();
    } catch (Exception ex) {
        log.error(ex.getMessage(), ex);
        throw new RuntimeException(ex);
    } finally {
        closeWriter(indexWriter);
    }
}

From source file:de.hsmannheim.ss15.alr.searchengine.DefaultLuceneController.java

/**
 * Indexes a single document/*from  w  ww .  ja v a  2s  .co m*/
 */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongField that is indexed (i.e. efficiently filterable with
        // NumericRangeFilter).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongField("modified", lastModified, Field.Store.NO));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        BufferedReader reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));

        List<String> lines = new ArrayList<>();
        while (reader.ready()) {
            lines.add(reader.readLine());
        }
        if (lines.size() > 0) {
            String urlLine = lines.remove(0);
            if (urlLine != null && urlLine.startsWith("URL:")) {
                urlLine = urlLine.substring(4);
                doc.add(new TextField("URL", urlLine, Field.Store.YES));
            }
        }
        if (lines.size() > 0) {
            String dataType = lines.remove(0);
            if (dataType != null && dataType.startsWith("DataType:")) {
                dataType = dataType.substring(9);
                doc.add(new TextField("DataType", dataType, Field.Store.YES));
            }
        }
        if (lines.size() > 0) {
            String title = lines.remove(0);
            if (title != null && title.startsWith("Title:")) {
                title = title.substring(6);
                doc.add(new TextField("title", title, Field.Store.YES));
            }
        }
        String content = "";
        for (String s : lines) {
            content = content + s;
        }
        doc.add(new TextField("contents", content, Field.Store.NO));

        if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:de.jetsli.lumeo.util.LuceneHelperTest.java

License:Apache License

@Test
public void testTermMatching() throws Exception {
    RAMDirectory dir = new RAMDirectory();
    IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(RawLucene.VERSION, new KeywordAnalyzer()));
    Document d = new Document();

    FieldType ft = Mapping.getLongFieldType(true, true);
    d.add(new LongField("id", 1234, ft));
    d.add(new LongField("tmp", 1111, ft));
    w.addDocument(d);/*  w w w .  j ava  2s .co m*/

    d = new Document();
    d.add(new LongField("id", 1234, ft));
    d.add(new LongField("tmp", 2222, ft));
    w.updateDocument(getTerm("id", 1234), d);

    d = new Document();
    d.add(new LongField("id", 0, ft));
    w.addDocument(d);
    w.commit();

    IndexReader reader = DirectoryReader.open(w, true);
    IndexSearcher searcher = new IndexSearcher(reader);

    BytesRef bytes = new BytesRef();
    NumericUtils.longToPrefixCoded(1234, 0, bytes);
    TopDocs td = searcher.search(new TermQuery(new Term("id", bytes)), 10);
    assertEquals(1, td.totalHits);
    assertEquals(1234L, searcher.doc(td.scoreDocs[0].doc).getField("id").numericValue());
    assertEquals(2222L, searcher.doc(td.scoreDocs[0].doc).getField("tmp").numericValue());
    w.close();
}

From source file:edu.albany.ir.example.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is
 * given, recurses over files and directories found under the given
 * directory./* w ww  . ja va  2  s  .c o m*/
 * 
 * NOTE: This method indexes one document per input file. This is slow. For
 * good throughput, put multiple documents into your input file(s). An
 * example of this is in the benchmark module, which can create "line doc"
 * files, one document per line, using the <a href=
 * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 * 
 * @param writer
 *            Writer to the index where the given file/dir info will be
 *            stored
 * @param file
 *            The file to index, or the directory to recurse into to find
 *            files to index
 * @throws IOException
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this
                // exception with an "access denied" message
                // checking if the file can be read doesn't help
                fnfe.printStackTrace();
                return;
            }

            try {

                // our code
                // *********************************************************
                String record = null;
                int a, b, stringNum = 0, i = 0;
                // String[] docContents = new String[1000];
                // String[] docNos = new String[10000];
                String docName = null;
                // make a new, empty document
                Document doc = new Document();

                BufferedReader reader = new BufferedReader(new InputStreamReader(fis));

                record = new String();
                while ((record = reader.readLine()) != null) {
                    a = record.lastIndexOf("<DOCNO>");
                    b = record.indexOf("</DOCNO>");

                    if (a >= 0 && b > 0) // if this line contains the DOCNO
                    {
                        stringNum++;
                        // docNos[stringNum] = record.substring(a+7,b-1);
                        docName = record.substring(a + 7, b).trim();
                        // add a document
                        if (stringNum >= 1) {
                            // index previous document
                            if (stringNum >= 2)
                                writer.addDocument(doc);

                            // start new document
                            doc = new Document();
                            // doc.add(new Field("path", file.getPath()+
                            // "/"+docName,
                            // Add the path of the file as a field named
                            // "path". Use a
                            // field that is indexed (i.e. searchable), but
                            // don't tokenize
                            // the field into separate words and don't index
                            // term frequency
                            // or positional information:
                            Field pathField = new Field("path", docName, Field.Store.YES,
                                    Field.Index.NOT_ANALYZED_NO_NORMS);
                            pathField.setOmitTermFreqAndPositions(true);
                            doc.add(pathField);
                            // doc.add(new Field("path", docName,
                            // Field.Store.YES,
                            // Field.Index.UN_TOKENIZED));
                            // System.out.println("adding " +
                            // file.getPath()+ "/"+docName);
                            System.out.println("adding " + docName);

                            // Add the last modified date of the file a
                            // field named "modified".
                            // Use a NumericField that is indexed (i.e.
                            // efficiently filterable with
                            // NumericRangeFilter). This indexes to
                            // milli-second resolution, which
                            // is often too fine. You could instead create a
                            // number based on
                            // year/month/day/hour/minutes/seconds, down the
                            // resolution you require.
                            // For example the long value 2011021714 would
                            // mean
                            // February 17, 2011, 2-3 PM.
                            NumericField modifiedField = new NumericField("modified");
                            modifiedField.setLongValue(file.lastModified());
                            doc.add(modifiedField);

                            // doc.add(new Field("modified",
                            // DateField.timeToString(file.lastModified()),
                            // Field.Store.YES,
                            // Field.Index.UN_TOKENIZED));
                        }
                    } else {

                        doc.add(new Field("contents", record, Field.Store.YES, Field.Index.ANALYZED, // tokenized
                                Field.TermVector.YES));
                        // docContents[stringNum] = docContents[stringNum] +
                        // record;
                        // add contents to document
                        // Add the contents of the file to a field named
                        // "contents". Specify a Reader,
                        // so that the text of the file is tokenized and
                        // indexed, but not stored.
                        // Note that FileReader expects the file to be in
                        // UTF-8 encoding.
                        // If that's not the case searching for special
                        // characters will fail.
                        // doc.add(new Field("contents", new
                        // BufferedReader(new InputStreamReader(fis,
                        // "UTF-8"))));
                    }
                    a = 0;
                    b = 0;
                }

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old
                    // document can be there):
                    System.out.println("adding " + docName);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have
                    // been indexed) so
                    // we use updateDocument instead to replace the old one
                    // matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:edu.cmu.cs.in.search.HoopLuceneIndex.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * /*from  w  w w  .ja  va 2  s. com*/
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();

            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {
            FileInputStream fis;

            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {
                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a LongField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }
            } finally {
                fis.close();
            }
        }
    }
}

From source file:edu.cmu.lti.huiying.ir.rangedsearch.TableIndexer.java

License:Apache License

public void indexExplodedXml(IndexWriter writer, File file) throws IOException {
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexExplodedXml(writer, new File(file, files[i]));
                }//from ww  w.ja  va  2s .c o m
            }
        } else {
            FileInputStream fis = new FileInputStream(file);
            try {
                NumericFeatureGenerator nfg = new NumericFeatureGenerator();
                if (this.xmlreader == null) {
                    this.xmlreader = new XmlStAXReader();
                }
                Article a = xmlreader.readArticleFromXml(file.getAbsolutePath());
                for (Table t : a.tables) {
                    for (Group g : t.groups) {
                        for (Column col : g.columns) {
                            // index columns
                            Document coldoc = new Document();
                            ArrayList<Double> cfv = nfg.getFeatureVector(col.content);
                            if (cfv.get(0) != null) {
                                DoubleField intratio = new DoubleField("intratio", cfv.get(0), Field.Store.NO);
                                coldoc.add(intratio);
                            }
                            if (cfv.get(1) != null) {
                                DoubleField floatratio = new DoubleField("floatratio", cfv.get(1),
                                        Field.Store.NO);
                                coldoc.add(floatratio);
                            }
                            if (cfv.get(3) != null) {
                                DoubleField mean = new DoubleField("mean", cfv.get(3), Field.Store.NO);
                                coldoc.add(mean);
                            }
                            if (cfv.get(4) != null) {
                                DoubleField std = new DoubleField("std", cfv.get(4), Field.Store.NO);
                                coldoc.add(std);
                            }
                            if (cfv.get(6) != null) {
                                DoubleField min = new DoubleField("min", cfv.get(6), Field.Store.NO);
                                coldoc.add(min);
                            }
                            if (cfv.get(7) != null) {
                                DoubleField max = new DoubleField("max", cfv.get(7), Field.Store.NO);
                                coldoc.add(max);
                            }
                            if (cfv.get(8) != null) {
                                DoubleField acc = new DoubleField("acc", cfv.get(8), Field.Store.NO);
                                coldoc.add(acc);
                            }
                            if (cfv.get(11) != null) {
                                DoubleField colmag = new DoubleField("colmag", cfv.get(11), Field.Store.NO);
                                coldoc.add(colmag);
                            }

                            StringField wholegroup = new StringField("wholegroup", g.toString(),
                                    Field.Store.YES);
                            if (wholegroup.stringValue().getBytes().length > 32760) {
                                wholegroup.setStringValue("Table too large...");
                                System.err.println(
                                        "table too large:" + wholegroup.stringValue().getBytes().length);

                            }
                            String headers = "";
                            if (col.headers != null) {
                                for (Header hdr : col.headers) {
                                    headers += hdr.text.toLowerCase() + " ";
                                }
                            }
                            TextField header = new TextField("headerkeywords", headers.trim(), Field.Store.NO);
                            coldoc.add(header);
                            coldoc.add(wholegroup);
                            StringField fname = new StringField("filename", file.getAbsolutePath(),
                                    Field.Store.YES);
                            coldoc.add(fname);
                            StringField type = new StringField("type", "column", Field.Store.YES);
                            coldoc.add(type);
                            IntField bstart = new IntField("bytestart", col.content.get(0).byteStart,
                                    Field.Store.YES);
                            IntField bend = new IntField("byteend",
                                    col.content.get(col.content.size() - 1).byteEnd, Field.Store.YES);
                            String content = "";
                            for (edu.cmu.lti.huiying.domainclasses.Field f : col.content)
                                content += f.text + "|";
                            StringField colcontent = new StringField("colcontent",
                                    content.substring(0, content.length() - 1), Field.Store.YES);
                            coldoc.add(colcontent);
                            coldoc.add(bstart);
                            coldoc.add(bend);
                            if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                                writer.addDocument(coldoc);
                                totalDocAdded++;
                            } else {
                                writer.updateDocument(new Term("path", file.getPath()), coldoc);
                            }
                            for (edu.cmu.lti.huiying.domainclasses.Field f : col.content) {
                                // Index single cell
                                Document celldoc = new Document();
                                ArrayList<Double> fv = nfg.field2Features(f);
                                if (fv.get(0) == 1 || fv.get(0) == 2) {
                                    try {
                                        DoubleField df = new DoubleField("value", fv.get(1), Field.Store.YES);
                                        celldoc.add(df);
                                        StringField textf = new StringField("text", f.text, Field.Store.YES);
                                        celldoc.add(textf);
                                        if (fv.get(2) != null & fv.get(2) != Double.NaN) {
                                            DoubleField errf = new DoubleField("error", fv.get(2),
                                                    Field.Store.NO);
                                            celldoc.add(errf);
                                        }
                                        if (fv.get(5) != Double.NaN) {
                                            DoubleField magf = new DoubleField("cellmag", fv.get(5),
                                                    Field.Store.NO);
                                            celldoc.add(magf);
                                        }
                                        if (fv.get(4) != null) {
                                            DoubleField pvalue = new DoubleField("cellpvalue", fv.get(4),
                                                    Field.Store.NO);
                                            celldoc.add(pvalue);
                                        }
                                        StringField sf = new StringField("filename", file.getAbsolutePath(),
                                                Field.Store.YES);
                                        celldoc.add(sf);

                                        StringField ctype = new StringField("type", "cell", Field.Store.YES);
                                        celldoc.add(ctype);
                                        //StringField cwholegroup=new StringField("wholegroup", g.toString(), Field.Store.YES);
                                        //celldoc.add(cwholegroup);
                                        IntField cbstart = new IntField("bytestart", f.byteStart,
                                                Field.Store.YES);
                                        IntField cbend = new IntField("byteend", f.byteEnd, Field.Store.YES);
                                        celldoc.add(cbstart);
                                        celldoc.add(cbend);
                                    } catch (NullPointerException e) {
                                        e.printStackTrace();
                                        System.out.println(f.text);
                                    }
                                    if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                                        writer.addDocument(celldoc);
                                        totalDocAdded++;
                                    } else {
                                        writer.updateDocument(new Term("path", file.getPath()), celldoc);
                                    }
                                }
                            }
                        }
                    }
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:edu.cmu.lti.huiying.ir.rangedsearch.TableIndexer.java

License:Apache License

public void indexOffsetAnnotation(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    if (files[i].equals("NeuroScience.num.offset"))
                        indexOffsetAnnotation(writer, new File(file, files[i]));
                }/*from w ww.  j av a2  s.c  o m*/
            }
        } else {
            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();
                BufferedReader br = new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8));
                String line = null;
                String filename = null;
                while ((line = br.readLine()) != null) {
                    if (line.trim().length() == 0) {
                        doc.add((new StringField("filename", filename, Field.Store.YES)));
                        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                            writer.addDocument(doc);
                        } else {
                            System.out.println("updating " + file);
                            writer.updateDocument(new Term("path", file.getPath()), doc);
                        }
                        doc = new Document();
                        filename = null;
                        continue;
                    }
                    String[] spl = line.split("\t");
                    doc.add(new DoubleField(spl[3], Double.parseDouble(spl[5]), Field.Store.YES));
                    if (filename == null)
                        filename = spl[0];
                }
                br.close();
            } finally {
                fis.close();
            }
        }
    }
}

From source file:edu.harvard.iq.dvn.core.index.Indexer.java

License:Apache License

protected void updateDocument(Document doc, long studyId) throws IOException {
    try {//from  w  ww.  jav a 2s.c om
        IndexWriter writer = new IndexWriter(dir, getAnalyzer(), isIndexEmpty(),
                IndexWriter.MaxFieldLength.UNLIMITED);
        writer.updateDocument(new Term("id", Long.toString(studyId)), doc);
        // TODO: 
        // Figure out, eventually, what to do with the variable and file 
        // metadata searches here. 
        // -- L.A. 
        /*
         * our deleteDocument() method contains these 2 lines, below, 
         * in addition to the deleteDocument() method for the term based on 
         * "id", as above. 
        reader.deleteDocuments(new Term("varStudyId",Long.toString(studyId)));
        reader.deleteDocuments(new Term("versionStudyId",Long.toString(studyId)));
         */
        writer.commit();
        writer.close();
    } catch (IOException ex) {
        ex.printStackTrace();
    }
}

From source file:edu.harvard.iq.dvn.core.index.Indexer.java

License:Apache License

protected void updateStudyDocument(long studyId, String field, String value) throws IOException {
    IndexReader reader = IndexReader.open(dir, false);

    try {/*www .  jav a  2  s .  co  m*/
        if (reader != null) {
            TermDocs matchingDocuments = reader.termDocs();

            if (matchingDocuments != null) {
                int c = 1;
                if (matchingDocuments.next()) {
                    // We only expect 1 document when searching by study id.
                    Document studyDocument = reader.document(matchingDocuments.doc());

                    logger.fine("processing matching document number " + c++);
                    if (studyDocument != null) {
                        logger.fine("got a non-zero doc;");

                        reader.close();
                        reader = null;

                        logger.fine("deleted the document;");

                        //updateDocument(studyDocument, studyId);
                        IndexWriter localWriter = new IndexWriter(dir, getAnalyzer(), isIndexEmpty(),
                                IndexWriter.MaxFieldLength.UNLIMITED);
                        localWriter.updateDocument(new Term("id", Long.toString(studyId)), studyDocument);

                        localWriter.commit();
                        localWriter.close();
                        logger.fine("wrote the updated version of the document;");

                    }
                }
            }
        }

    } catch (IOException ex) {
        ex.printStackTrace();
    } finally {
        if (reader != null) {
            reader.close();
        }
    }
}