Example usage for org.apache.lucene.index IndexReader document

List of usage examples for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype




public final Document document(int docID) throws IOException 

Source Link

Document

Returns the stored fields of the nth Document in this index.

Usage

From source file:org.iis.ut.artificialplagiarismcreator.tools.FileFromIndexEctractor.java

public static void main(String[] args) throws IOException {
    IndexReader ireader = IndexReader.open(new SimpleFSDirectory(new File(indexPath)));

    for (int i = 0; i < ireader.numDocs(); i++) {
        String text = ireader.document(i).get("TEXT");

        BufferedWriter writer = new BufferedWriter(
                new FileWriter("/Users/Sam/Education/MyMasterThesis/Codes/evaluations/SimorghI/src/"
                        + ireader.document(i).get("DOCID")));
        writer.write(text);//from w  w  w  .j av  a2s  .c  o m
        writer.close();
    }
}

From source file:org.jab.docsearch.Index.java

License:Open Source License

/**
 * The location of a URL in an index; used in the algorithm for updating an
 * index./* www  .j  av  a2s  .  c  o  m*/
 *
 * @return the location of the SpiderUrl in a web oriented DocSearcherIndex,
 *         or -1 if the URL is not in the index
 */
public int spiderIndexNum(int lastFound, String fileName, IndexReader ir) {
    int returnInt = -1;
    synchronized (this) {
        if (lastFound == -1)
            lastFound = 0;
        try {
            Document doc;
            String compareName = "";
            int numDocs = ir.maxDoc();
            for (int i = lastFound; i < numDocs; i++) {
                if (!ir.isDeleted(i)) {
                    doc = ir.document(i);
                    if (doc != null) {
                        compareName = doc.get(FIELD_URL);
                        if (compareName.equals(fileName)) {
                            returnInt = i;
                            break;
                        }
                    }
                }
            }
            if (returnInt == -1) {
                for (int i = lastFound; i > 0; i--) {
                    if (!ir.isDeleted(i)) {
                        doc = ir.document(i);
                        if (doc != null) {
                            compareName = doc.get(FIELD_URL);
                            // System.out.println("Comparing "+compareName+"
                            // to "+fileName);
                            if (compareName.equals(fileName)) {
                                // System.out.println("MATCH FOUND AT "+i);
                                returnInt = i;
                                break;
                            }
                        }
                    }
                }
            }

            if (returnInt == -1)
                ds.setStatus("File " + fileName + " not found in index!");
        } catch (Exception e) {
            logger.error("spiderIndexNum() failed", e);
            ds.setStatus("Error determining if doc is already in index!");
        }
        // finally {
        return returnInt;
        // }
    }
}

From source file:org.jab.docsearch.Index.java

License:Open Source License

/**
 * Location of a file in a DocSearcher index; used by update algoritm to
 * update an index./*w  ww .j a  va  2s .c o m*/
 *
 * @return location of the document in the DocSearcherIndex or -1 if it is
 *         not in there
 */
public int indexNum(int lastFound, String fileName, IndexReader ir) {
    int returnInt = -1;
    synchronized (this) {
        if (lastFound == -1)
            lastFound = 0;
        try {
            Document doc;
            String compareName = "";
            int numDocs = ir.maxDoc();
            for (int i = lastFound; i < numDocs; i++) {
                if (!ir.isDeleted(i)) {
                    doc = ir.document(i);
                    if (doc != null) {
                        compareName = doc.get(FIELD_PATH);
                        if (compareName.equals(fileName)) {
                            returnInt = i;
                            break;
                        }
                    }
                }
            }
            if (returnInt == -1) {
                for (int i = lastFound; i > 0; i--) {
                    if (!ir.isDeleted(i)) {
                        doc = ir.document(i);
                        if (doc != null) {
                            compareName = doc.get(FIELD_PATH);
                            // System.out.println("Comparing "+compareName+"
                            // to "+fileName);
                            if (compareName.equals(fileName)) {
                                // System.out.println("MATCH FOUND AT "+i);
                                returnInt = i;
                                break;
                            }
                        }
                    }
                }
            }

            if (returnInt == -1)
                ds.setStatus("File " + fileName + " not found in index!");
        } catch (Exception e) {
            logger.error("indexNum() failed", e);
            ds.setStatus("Error determining if doc is already in index!");
        }
        return returnInt;
    }
}

From source file:org.jab.docsearch.Index.java

License:Open Source License

/**
 * Updates a DocSearcherIndex/*from w w w .j  av  a2  s.c  o  m*/
 *
 * @param di  DocSearcherIndex
 */
public void updateIndex(final DocSearcherIndex di) {
    notesBuf = new StringBuffer();
    newItsBuf = new StringBuffer();
    modItsItsBuf = new StringBuffer();
    delItsItsBuf = new StringBuffer();
    totalChanges = 0;
    long curFileSizeBytes = 0;
    int errNum = 0;
    StringBuffer noRobotsBuf = new StringBuffer();
    int numNoIndex = 0;
    // int numErrors = 0;
    StringBuffer failedBuf = new StringBuffer();
    int addedSuccessFully = 0;
    failedBuf.append("\n");
    synchronized (this) {
        if (di.isCdrom()) {
            // do nothing
        } else if (di.getIsSpider()) {
            doSpiderUpdate(di);
        } else if (!di.getPath().toLowerCase().endsWith(".zip")) { // not a zip
                                                                   // archive
            int numUpdates = 0;
            int numRemovals = 0;
            int numNew = 0;
            try {
                IndexReader ir = IndexReader.open(di.getIndexPath());
                int numDocs = ir.maxDoc();
                ds.setStatus(
                        "There are " + numDocs + " docs in index " + di.getName() + "(" + di.getPath() + ")");
                addHeader(di.getName());
                //ArrayList<String> allDocsInIndexx = new ArrayList<String>(); // indexed files
                // ArrayList allDocsInFolder = new ArrayList(); // current files
                // ArrayList newDocsToAdd = new ArrayList(); // files to be added that are new
                ds.setIsWorking(true);
                ds.setProgressMax(numDocs);
                ds.setCurProgressMSG("Updating Modified Files...");
                setInsertMode(1); // note we are looking for modified files

                logger.info("updateIndex() updating " + numDocs + " document from index");

                for (int i = 0; i < numDocs; i++) {
                    if (!ds.getIsWorking()) {
                        break;
                    }
                    if (!ir.isDeleted(i)) {
                        ds.setCurProgress(i);
                        Document doc = ir.document(i);
                        if (doc != null) {
                            String curFiName = doc.get(FIELD_PATH);
                            String curFiModDate = doc.get(FIELD_MODDATE);
                            File testFi = new File(curFiName);

                            // check file not found
                            if (testFi.exists()) {
                                //allDocsInIndex.add(curFiName);
                                String realFileModDate = DateTimeUtils
                                        .getTimeStringForIndex(testFi.lastModified());

                                // check file is changed
                                if (!realFileModDate.equals(curFiModDate)) {
                                    logger.info("updateIndex() updating " + curFiName + " in index");

                                    numUpdates++;
                                    // remove old document
                                    ir.deleteDocument(i);
                                    ir.close();
                                    // open writer to add document once again
                                    ds.setStatus("Reindexing: " + curFiName);
                                    IndexWriter iw = new IndexWriter(di.getIndexPath(), new StandardAnalyzer(),
                                            false);
                                    // next line should remove too many files open errors
                                    // iw.setUseCompoundFile(true);
                                    addedSuccessFully = addDocToIndex(curFiName, iw, di, di.isCdrom(), null);
                                    iw.close();
                                    // reopen
                                    ir = IndexReader.open(di.getIndexPath());
                                    switch (addedSuccessFully) {
                                    case 1: // error
                                        errNum++;
                                        if (errNum < 8) {
                                            failedBuf.append("\n");
                                            failedBuf.append(curFiName);
                                        }
                                        ds.setStatus(DocSearch.dsErrIdxgFi + " " + curFiName);
                                        break;
                                    case 2: // meta robots = noindex
                                        numNoIndex++;
                                        if (numNoIndex < 8) {
                                            noRobotsBuf.append("\n");
                                            noRobotsBuf.append(curFiName);
                                        }
                                        ds.setStatus("No Indexing Meta Requirement found in : " + curFiName);
                                        break;
                                    default: // OK
                                        numUpdates++;
                                        ds.setStatus("Indexing " + curFiName + " complete.");
                                        break;
                                    } // end of switch
                                }
                            } else {
                                ds.setStatus("Deleting: " + curFiName);
                                logger.info("updateIndex() remove " + curFiName + " from index");
                                ir.deleteDocument(i);
                                addDelNote(doc);
                                numRemovals++;
                            }
                        }
                    }
                    // end for not deleted
                    // else System.out.println("Document was null or
                    // deleted:"+i);
                }
                // end for getting gocs
                ds.resetProgress();

                // now add the new files
                setInsertMode(0);
                ArrayList<String> folderList = new ArrayList<String>();
                folderList.add(di.getPath());
                int startSubNum = Utils.countSlash(di.getPath());
                int maxSubNum = startSubNum + di.getDepth();
                int lastItemNo = 0;
                int curItemNo = 0;
                int lastFound = 0;
                do {
                    // create our folder file
                    if (!ds.getIsWorking()) {
                        break;
                    }
                    String curFolderString = folderList.get(curItemNo);
                    logger.debug("updateIndex() folder=" + curFolderString);

                    File curFolderFile = new File(curFolderString);
                    int curSubNum = Utils.countSlash(curFolderString);
                    // handle any subfolders --> add them to our folderlist
                    String[] foldersString = curFolderFile.list(DocSearch.ff);
                    int numFolders = foldersString.length;
                    for (int i = 0; i < numFolders; i++) {
                        // add them to our folderlist
                        String curFold = curFolderString + pathSep + foldersString[i] + pathSep;
                        curFold = Utils.replaceAll(pathSep + pathSep, curFold, pathSep);
                        folderList.add(curFold);
                        lastItemNo++;
                        // debug output
                    }
                    // end for having more than 0 folder
                    // add our files
                    String[] filesString = curFolderFile.list(DocSearch.wf);
                    int numFiles = filesString.length;
                    ds.setProgressMax(numDocs);
                    ds.setCurProgressMSG("Updating new Files...");

                    for (int i = 0; i < numFiles; i++) {
                        // add them to our folderlist
                        if (!ds.getIsWorking()) {
                            break;
                        }
                        String curFi = curFolderString + pathSep + filesString[i];
                        curFi = Utils.replaceAll(pathSep + pathSep, curFi, pathSep);
                        curFileSizeBytes = FileUtils.getFileSize(curFi);
                        if (curFileSizeBytes > ds.getMaxFileSize()) {
                            logger.debug("updateIndex() skipping " + curFi + " because is to big");
                            ds.setStatus(I18n.getString("skipping_file_too_big") + " (" + curFileSizeBytes
                                    + ") " + filesString[i]);
                        } else {
                            lastFound = indexNum(lastFound, curFi, ir);
                            if (lastFound == -1) {
                                logger.info("updateIndex() adding " + curFi + " to index");

                                ir.close();
                                // open writer to add document once again
                                IndexWriter iw = new IndexWriter(di.getIndexPath(), new StandardAnalyzer(),
                                        false);
                                addedSuccessFully = addDocToIndex(curFi, iw, di, di.isCdrom(), null);
                                switch (addedSuccessFully) {
                                case 1: // error
                                    errNum++;
                                    if (errNum < 8) {
                                        failedBuf.append("\n");
                                        failedBuf.append(curFi);
                                    }
                                    ds.setStatus(DocSearch.dsErrIdxg + " " + curFi);
                                    break;
                                case 2: // meta robots = noindex
                                    numNoIndex++;
                                    if (numNoIndex < 8) {
                                        noRobotsBuf.append("\n");
                                        noRobotsBuf.append(curFi);
                                    }
                                    ds.setStatus("Document Exlusion (robots = NOINDEX) : " + curFi);
                                    break;
                                default: // OK
                                    numNew++;
                                    ds.setStatus("New Document Added : " + curFi);
                                    break;
                                } // end of switch
                                iw.close();
                                // reopen
                                ir = IndexReader.open(di.getIndexPath());
                            } // end for lastfound not -1
                        } // end for file size not too big
                        ds.setCurProgress(i);
                        ds.resetProgress();
                    }
                    // end for having more than 0 folder
                    // increment our curItem
                    folderList.set(curItemNo, null); // remove memory overhead as you go!
                    curItemNo++;
                    if (curSubNum >= maxSubNum) {
                        break;
                    }
                    if (!ds.getIsWorking()) {
                        break;
                    }
                } while (curItemNo <= lastItemNo);
                //
                ir.close(); // always close!
                StringBuffer updateMSGBuf = new StringBuffer();
                updateMSGBuf.append('\n');
                updateMSGBuf.append(numRemovals).append(" files were removed from index.\n");
                updateMSGBuf.append(numUpdates).append(" files were reindexed.\n");
                updateMSGBuf.append(numNew).append(" new files were added to the index.\n");
                //
                totalChanges = numRemovals + numUpdates + numNew;
                // all our stuff to the notesBuf
                addNote(updateMSGBuf.toString(), "", true);
                // add our new and modified files
                if (numNew > 0) {
                    addNote(I18n.getString("new_files"), "", true);
                    notesBuf.append(newItsBuf);
                }
                //
                if (numUpdates > 0) {
                    addNote(I18n.getString("updated_files"), "", true);
                    notesBuf.append(modItsItsBuf);
                }
                //
                //
                if (numRemovals > 0) {
                    addNote(I18n.getString("deleted_files"), "", true);
                    notesBuf.append(delItsItsBuf);
                }
                //

                addFooter();
                if (errNum == 0) {
                    updateMSGBuf.append("No errors were encountered during this process.");
                    if (numNoIndex > 0) {
                        updateMSGBuf.append("\n\n").append(numNoIndex).append(
                                " files were not indexed due to meta data constraints (robots = NOINDEX), including:\n");
                        updateMSGBuf.append(noRobotsBuf);
                    }
                    ds.showMessage("Update of index " + di.getName() + " Completed", updateMSGBuf.toString());
                } else {
                    updateMSGBuf.append(errNum).append(
                            " errors were encountered during this process.\nThe following files had problems being indexed or re-indexed:\n")
                            .append(failedBuf);
                    if (numNoIndex > 0) {
                        updateMSGBuf.append("\n\n").append(numNoIndex).append(
                                " files were not indexed due to meta data constraints (robots = NOINDEX), including:\n");
                        updateMSGBuf.append(noRobotsBuf);
                    }

                    ds.showMessage("Errors during Update of index " + di.getName(), updateMSGBuf.toString());
                }
            }
            // end of try
            catch (Exception e) {
                logger.error("updateIndex() error during update index " + di.getName(), e);
                ds.showMessage("Error updating index " + di.getName(), e.toString());
            }

            addFooter();
            di.setLastIndexed(DateTimeUtils.getToday());
            ds.setStatus("Update of index " + di.getName() + " completed.");
            ds.setIsWorking(false);
        } else {
            ds.doZipArchiveUpdate(di);
        }
    }
}

From source file:org.jab.docsearch.utils.MetaReport.java

License:Open Source License

/**
 * doMetaDataReport//from  w w w .jav  a 2 s  .co  m
 *
 * @param di
 * @param listAll
 * @param pathRequired
 * @param pathText
 * @param authRequired
 * @param authText
 * @param reportFile
 * @param maxDocs
 * @param useDaysOld
 * @param maxDays
 */
private void doMetaDataReport(DocSearcherIndex di, boolean listAll, boolean pathRequired, String pathText,
        boolean authRequired, String authText, String reportFile, int maxDocs, boolean useDaysOld,
        int maxDays) {
    try {
        // intialize our metrics
        int numBadDocs = 0;
        int totalDocs = 0;
        int numGoodDocs = 0;
        String lineSep = Utils.LINE_SEPARATOR;
        StringBuffer documentBuffer = new StringBuffer();
        StringBuffer metaDataReport = new StringBuffer();

        // initialize the reader
        IndexReader ir = IndexReader.open(di.getIndexPath());
        int numDocs = ir.maxDoc();
        ds.setStatus(numDocs + " " + Messages.getString("DocSearch.numDox") + " " + di.getName());

        // write the start of the table
        documentBuffer.append("<table style=\"empty-cells:show\" border=\"1\">").append(lineSep);
        documentBuffer.append("<tr>").append(lineSep);
        int numHdrs = allFields.length;
        for (int z = 0; z < numHdrs; z++) {
            documentBuffer.append("<th valign=\"top\">");
            documentBuffer.append(allFields[z]);
            documentBuffer.append("</th>").append(lineSep);
        }
        documentBuffer.append("</tr>").append(lineSep);
        for (int i = 0; i < numDocs; i++) {
            if (!ir.isDeleted(i)) {
                Document doc = ir.document(i);
                if (doc != null) {
                    boolean curSkip = false;

                    // put in the docs values
                    String path;
                    if (di.getIsWeb()) {
                        path = doc.get(Index.FIELD_URL);
                    } else {
                        path = doc.get(Index.FIELD_PATH);
                    }

                    ds.setStatus("Examining document: " + path);
                    String type = doc.get(Index.FIELD_TYPE);
                    String author = doc.get(Index.FIELD_AUTHOR);
                    String summary = doc.get(Index.FIELD_SUMMARY);
                    String title = doc.get(Index.FIELD_TITLE);
                    String size = doc.get(Index.FIELD_SIZE);
                    String keywords = doc.get(Index.FIELD_KEYWORDS);
                    String date = DateTimeUtils.getDateParsedFromIndex(doc.get(Index.FIELD_MODDATE));

                    // determine if we even need to examine it
                    if (pathRequired) {
                        if (path.indexOf(pathText) == -1) {
                            curSkip = true;
                        }
                    }

                    if (authRequired) {
                        if (author.indexOf(authText) == -1) {
                            curSkip = true;
                        }
                    }

                    // determine if its bad of good
                    if (!curSkip) {
                        totalDocs++;
                        boolean isGood = goodMetaData(title, summary, author, date, keywords, type, useDaysOld,
                                maxDays);

                        // write to our file
                        if (!isGood || listAll) {
                            documentBuffer.append("<tr>").append(lineSep);
                            documentBuffer.append("<td valign=\"top\">"); // path
                            documentBuffer.append(path);
                            documentBuffer.append("</td>").append(lineSep);
                            documentBuffer.append("<td valign=\"top\"><small>");
                            documentBuffer.append(Utils.convertTextToHTML(title));
                            documentBuffer.append("</small></td>").append(lineSep);
                            documentBuffer.append("<td valign=\"top\">");
                            documentBuffer.append(author);
                            documentBuffer.append("</td>").append(lineSep);
                            documentBuffer.append("<td valign=\"top\">");
                            documentBuffer.append(date);
                            documentBuffer.append("</td>").append(lineSep);
                            documentBuffer.append("<td valign=\"top\"><small>");
                            documentBuffer.append(Utils.convertTextToHTML(summary));
                            documentBuffer.append("</small></td>").append(lineSep);
                            documentBuffer.append("<td valign=\"top\"><small>");
                            documentBuffer.append(keywords);
                            documentBuffer.append("</small></td>").append(lineSep);
                            documentBuffer.append("<td valign=\"top\">");
                            documentBuffer.append(size);
                            documentBuffer.append("</td>").append(lineSep);
                            documentBuffer.append("<td valign=\"top\">");
                            documentBuffer.append(type);
                            documentBuffer.append("</td>").append(lineSep);
                            documentBuffer.append("</tr>").append(lineSep);
                        }

                        if (isGood) {
                            ds.setStatus(path + " " + dsNotMsgMeta);
                            numGoodDocs++;
                        } else {
                            ds.setStatus(path + " " + dsMsgMeta);
                            numBadDocs++;
                        }
                    } else {
                        ds.setStatus(dsSkip + " " + path);
                    }
                }
            }

            if (i > maxDocs) {
                break;
            }
        }
        documentBuffer.append("</table>").append(lineSep);

        int percentGood = 0;
        if (totalDocs > 0) {
            percentGood = (numGoodDocs * 100) / totalDocs;
        }

        ds.setStatus("%  " + dsGood + ": " + percentGood + " (" + numGoodDocs + " / " + totalDocs + ", "
                + numBadDocs + " " + dsBad + ").");

        // write complete report with summary
        metaDataReport.append("<html>").append(lineSep);
        metaDataReport.append("<head>").append(lineSep);
        metaDataReport.append("<title>").append(dsMetaRpt).append(' ').append(di.getName()).append("</title>")
                .append(lineSep);
        metaDataReport.append(
                "<meta name=\"description\" content=\"lists documents with poorly searchable meta data\">")
                .append(lineSep);
        metaDataReport.append("<meta name=\"author\" content=\"DocSearcher\">").append(lineSep);
        metaDataReport.append("</head>").append(lineSep);
        metaDataReport.append("<body>").append(lineSep);
        metaDataReport.append("<h1>").append(dsMetaRpt).append(' ').append(di.getName()).append("</h1>")
                .append(lineSep);
        metaDataReport.append("<p align=\"left\"><b>");
        metaDataReport.append(numBadDocs);
        metaDataReport.append("</b> ");
        metaDataReport.append(dsPoorMeta);
        metaDataReport.append(" <br> &amp; <b>");
        metaDataReport.append(numGoodDocs);
        metaDataReport.append("</b> ");
        metaDataReport.append(dsGoodMetaNum);
        metaDataReport.append(".</p>").append(lineSep);
        metaDataReport.append("<p align=\"left\">");
        metaDataReport.append(dsMetaOO);
        metaDataReport.append(" <b>");
        metaDataReport.append(percentGood + "</b> % . </p>");
        metaDataReport.append("<p align=\"left\">");
        metaDataReport.append(dsTblDsc);
        metaDataReport.append(".</p>").append(lineSep);

        // add document buffer
        metaDataReport.append(documentBuffer);

        metaDataReport.append("</body>").append(lineSep);
        metaDataReport.append("</html>").append(lineSep);

        ds.curPage = Messages.getString("DocSearch.report");

        boolean fileSaved = FileUtils.saveFile(reportFile, metaDataReport);
        if (fileSaved) {
            ds.doExternal(reportFile);
        }
    } catch (IOException ioe) {
        logger.fatal("doMetaDataReport() create meta data report failed", ioe);
        ds.setStatus(Messages.getString("DocSearch.statusMetaDataError") + di.getName() + ":" + ioe.toString());
    }
}

From source file:org.jetbrains.idea.maven.server.embedder.Maven2ServerIndexerImpl.java

License:Apache License

@Override
public void processArtifacts(int indexId, MavenServerIndicesProcessor processor)
        throws MavenServerIndexerException {
    try {/*  w w w.  ja va 2  s  .c  o m*/
        final int CHUNK_SIZE = 10000;

        IndexReader r = getIndex(indexId).getIndexReader();
        int total = r.numDocs();

        List<IndexedMavenId> result = new ArrayList<IndexedMavenId>(Math.min(CHUNK_SIZE, total));
        for (int i = 0; i < total; i++) {
            if (r.isDeleted(i)) {
                continue;
            }

            Document doc = r.document(i);
            String uinfo = doc.get(ArtifactInfo.UINFO);
            if (uinfo == null) {
                continue;
            }
            String[] uInfoParts = uinfo.split("\\|");
            if (uInfoParts.length < 3) {
                continue;
            }
            String groupId = uInfoParts[0];
            String artifactId = uInfoParts[1];
            String version = uInfoParts[2];

            String packaging = doc.get(ArtifactInfo.PACKAGING);
            String description = doc.get(ArtifactInfo.DESCRIPTION);

            result.add(new IndexedMavenId(groupId, artifactId, version, packaging, description));

            if (result.size() == CHUNK_SIZE) {
                processor.processArtifacts(result);
                result.clear();
            }
        }

        if (!result.isEmpty()) {
            processor.processArtifacts(result);
        }
    } catch (Exception e) {
        throw new MavenServerIndexerException(wrapException(e));
    }
}

From source file:org.jetbrains.idea.maven.server.Maven3ServerIndexerImpl.java

License:Apache License

@Override
public void processArtifacts(int indexId, MavenServerIndicesProcessor processor)
        throws RemoteException, MavenServerIndexerException {
    try {//from  ww w  . j a  v a  2 s. c  o  m
        final int CHUNK_SIZE = 10000;

        IndexReader r = getIndex(indexId).getIndexReader();
        int total = r.numDocs();

        List<IndexedMavenId> result = new ArrayList<IndexedMavenId>(Math.min(CHUNK_SIZE, total));
        for (int i = 0; i < total; i++) {
            if (r.isDeleted(i)) {
                continue;
            }

            Document doc = r.document(i);
            String uinfo = doc.get(SEARCH_TERM_COORDINATES);
            if (uinfo == null) {
                continue;
            }

            String[] uInfoParts = uinfo.split("\\|");
            if (uInfoParts.length < 3) {
                continue;
            }

            String groupId = uInfoParts[0];
            String artifactId = uInfoParts[1];
            String version = uInfoParts[2];

            if (groupId == null || artifactId == null || version == null) {
                continue;
            }

            String packaging = doc.get(ArtifactInfo.PACKAGING);
            String description = doc.get(ArtifactInfo.DESCRIPTION);

            result.add(new IndexedMavenId(groupId, artifactId, version, packaging, description));

            if (result.size() == CHUNK_SIZE) {
                processor.processArtifacts(result);
                result.clear();
            }
        }

        if (!result.isEmpty()) {
            processor.processArtifacts(result);
        }
    } catch (Exception e) {
        throw new MavenServerIndexerException(wrapException(e));
    }
}

From source file:org.jhlabs.scany.engine.search.SearchModel.java

License:Open Source License

/**
 * ?   Document Column  ./*from  www. ja v a  2s . com*/
 *
 * @param reader the reader
 * @param docs the docs
 * @param start the start
 * @param end the end
 * @return List
 * @throws RecordKeyException the record key exception
 * @throws CorruptIndexException the corrupt index exception
 * @throws IOException Signals that an I/O exception has occurred.
 */
public RecordList populateRecordList(IndexReader reader, ScoreDoc[] docs, int start, int end)
        throws RecordKeyException, CorruptIndexException, IOException {
    RecordList recordList = new RecordList(end - start + 1);

    for (int i = start; i <= end; i++) {
        if (i >= docs.length)
            break;

        Document document = reader.document(docs[i].doc);
        Record record = createRecord(document);
        recordList.add(record);

        if (queryKeywords != null && summarizerMap != null && summarizerMap.size() > 0) {
            for (Map.Entry<String, Summarizer> entry : summarizerMap.entrySet()) {
                String key = entry.getKey();
                String value = record.getValue(key);

                if (value != null && value.length() > 0) {
                    Summarizer summarizer = entry.getValue();
                    value = summarizer.summarize(queryKeywords, value);
                    record.setValue(key, value);

                }
            }
        }
    }

    return recordList;
}

From source file:org.kimios.kernel.index.IndexManager.java

License:Open Source License

public void updateAcls(long docUid, List<DMEntityACL> acls) throws IndexException {
    IndexReader reader = null;
    try {/*from w w w. ja v a 2s.  c  o m*/
        reader = IndexReader.open(this.indexDirectory);
        log.trace("Updating ACL for document #" + docUid);
        Query q = new DocumentUidClause(docUid).getLuceneQuery();
        List<Integer> list = this.executeQuery(q);
        if (list.size() > 0) {
            org.apache.lucene.document.Document d = reader.document(list.get(0));
            this.indexModifier.deleteDocuments(q);
            d.removeFields("DocumentACL");

            for (int j = 0; j < acls.size(); j++) {
                d.add(IndexHelper.getUnanalyzedField("DocumentACL", acls.get(j).getRuleHash()));
            }

            this.indexModifier.addDocument(d);
        }
    } catch (Exception e) {
        throw new IndexException(e, e.getMessage());
    } finally {
        try {
            if (reader != null) {
                reader.close();
            }
        } catch (IOException e) {
            throw new IndexException(e, e.getMessage());
        }
    }
}

From source file:org.kimios.kernel.index.IndexManager.java

License:Open Source License

public void updatePath(String oldPath, String newPath) throws IndexException {
    IndexReader reader = null;
    try {//from   ww w  .  j  a  v  a  2  s. c  om
        if (oldPath.endsWith("/")) {
            oldPath = oldPath.substring(0, oldPath.lastIndexOf("/"));
        }
        if (!newPath.endsWith("/")) {
            newPath += "/";
        }
        reader = IndexReader.open(this.indexDirectory);
        Query q = new DocumentParentClause(oldPath).getLuceneQuery();
        List<Integer> list = this.executeQuery(q);
        Vector<org.apache.lucene.document.Document> docs = new Vector<org.apache.lucene.document.Document>();
        for (int i = 0; i < list.size(); i++) {
            docs.add(reader.document(list.get(i)));
        }
        this.indexModifier.deleteDocuments(q);
        for (int i = 0; i < docs.size(); i++) {
            String path = docs.get(i).get("DocumentParent");
            path = newPath + path.substring(oldPath.length() + 1);
            docs.get(i).removeField("DocumentParent");
            docs.get(i).add(IndexHelper.getUnanalyzedField("DocumentParent", path));
            this.indexModifier.addDocument(docs.get(i));
        }
        reader.close();
        commit();
    } catch (Exception ex) {
        throw new IndexException(ex, ex.getMessage());
    } finally {
        try {
            if (reader != null) {
                reader.close();
            }
        } catch (IOException e) {
            throw new IndexException(e, "Error while closing reader");
        }
    }
}