Example usage for org.apache.commons.net.ftp FTPCommand RETR

List of usage examples for org.apache.commons.net.ftp FTPCommand RETR

Introduction

In this page you can find the example usage for org.apache.commons.net.ftp FTPCommand RETR.

Prototype

int RETR

To view the source code for org.apache.commons.net.ftp FTPCommand RETR.

Click Source Link

Usage

From source file:com.cyberway.issue.crawler.fetcher.FetchFTP.java

/**
 * Fetches a document from an FTP server.
 * // w w  w.  j a  v a  2 s . co  m
 * @param curi      the URI of the document to fetch
 * @param client    the FTPClient to use for the fetch
 * @param recorder  the recorder to preserve the document in
 * @throws IOException  if a network or protocol error occurs
 * @throws InterruptedException  if the thread is interrupted
 */
private void fetch(CrawlURI curi, ClientFTP client, HttpRecorder recorder)
        throws IOException, InterruptedException {
    // Connect to the FTP server.
    UURI uuri = curi.getUURI();
    int port = uuri.getPort();
    if (port == -1) {
        port = 21;
    }
    client.connectStrict(uuri.getHost(), port);

    // Authenticate.
    String[] auth = getAuth(curi);
    client.loginStrict(auth[0], auth[1]);

    // The given resource may or may not be a directory.
    // To figure out which is which, execute a CD command to
    // the UURI's path.  If CD works, it's a directory.
    boolean dir = client.changeWorkingDirectory(uuri.getPath());
    if (dir) {
        curi.setContentType("text/plain");
    }

    // TODO: A future version of this class could use the system string to
    // set up custom directory parsing if the FTP server doesn't support 
    // the nlist command.
    if (logger.isLoggable(Level.FINE)) {
        String system = client.getSystemName();
        logger.fine(system);
    }

    // Get a data socket.  This will either be the result of a NLIST
    // command for a directory, or a RETR command for a file.
    int command = dir ? FTPCommand.NLST : FTPCommand.RETR;
    String path = dir ? "." : uuri.getPath();
    client.enterLocalPassiveMode();
    client.setBinary();
    Socket socket = client.openDataConnection(command, path);
    curi.setFetchStatus(client.getReplyCode());

    // Save the streams in the CURI, where downstream processors
    // expect to find them.
    try {
        saveToRecorder(curi, socket, recorder);
    } finally {
        recorder.close();
        close(socket);
    }

    curi.setFetchStatus(200);
    if (dir) {
        extract(curi, recorder);
    }
    addParent(curi);
}

From source file:org.apache.nutch.protocol.ftp.Client.java

/**
 * retrieve file for path/*  w w  w .java  2  s.c  o  m*/
 * 
 * @param path
 * @param os
 * @param limit
 * @throws IOException
 * @throws FtpExceptionCanNotHaveDataConnection
 * @throws FtpExceptionUnknownForcedDataClose
 * @throws FtpExceptionControlClosedByForcedDataClose
 */
public void retrieveFile(String path, OutputStream os, int limit)
        throws IOException, FtpExceptionCanNotHaveDataConnection, FtpExceptionUnknownForcedDataClose,
        FtpExceptionControlClosedByForcedDataClose {

    Socket socket = __openPassiveDataConnection(FTPCommand.RETR, path);

    if (socket == null)
        throw new FtpExceptionCanNotHaveDataConnection("RETR " + ((path == null) ? "" : path));

    InputStream input = socket.getInputStream();

    // 20040318, xing, treat everything as BINARY_FILE_TYPE for now
    // do we ever need ASCII_FILE_TYPE?
    // if (__fileType == ASCII_FILE_TYPE)
    // input = new FromNetASCIIInputStream(input);

    // fixme, should we instruct server here for binary file type?

    // force-close data channel socket
    // boolean mandatory_close = false;

    int len;
    int count = 0;
    byte[] buf = new byte[org.apache.commons.net.io.Util.DEFAULT_COPY_BUFFER_SIZE];
    while ((len = input.read(buf, 0, buf.length)) != -1) {
        count += len;
        // impose download limit if limit >= 0, otherwise no limit
        // here, cut off is exactly of limit bytes
        if (limit >= 0 && count > limit) {
            os.write(buf, 0, len - (count - limit));
            // mandatory_close = true;
            break;
        }
        os.write(buf, 0, len);
        os.flush();
    }

    // if (mandatory_close)
    // you always close here, no matter mandatory_close or not.
    // however different ftp servers respond differently, see below.
    socket.close();

    // scenarios:
    // (1) mandatory_close is false, download limit not reached
    // no special care here
    // (2) mandatory_close is true, download limit is reached
    // different servers have different reply codes:

    // do not need this
    // sendCommand("ABOR");

    try {
        int reply = getReply();
        if (!_notBadReply(reply))
            throw new FtpExceptionUnknownForcedDataClose(getReplyString());
    } catch (FTPConnectionClosedException e) {
        // some ftp servers will close control channel if data channel socket
        // is closed by our end before all data has been read out. Check:
        // tux414.q-tam.hp.com FTP server (hp.com version whp02)
        // so must catch FTPConnectionClosedException thrown by getReply() above
        // disconnect();
        throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
    }

}

From source file:org.archive.crawler.fetcher.FetchFTP.java

/**
 * Fetches a document from an FTP server.
 * // w  w  w. ja  va 2  s. c  om
 * @param curi      the URI of the document to fetch
 * @param client    the FTPClient to use for the fetch
 * @param recorder  the recorder to preserve the document in
 * @throws IOException  if a network or protocol error occurs
 * @throws InterruptedException  if the thread is interrupted
 */
private void fetch(CrawlURI curi, ClientFTP client, HttpRecorder recorder)
        throws IOException, InterruptedException {
    // Connect to the FTP server.
    UURI uuri = curi.getUURI();
    int port = uuri.getPort();
    if (port == -1) {
        port = 21;
    }

    client.connect(uuri.getHost(), port);

    // Authenticate.
    String[] auth = getAuth(curi);
    client.login(auth[0], auth[1]);

    // The given resource may or may not be a directory.
    // To figure out which is which, execute a CD command to
    // the UURI's path.  If CD works, it's a directory.
    boolean isDirectory = client.changeWorkingDirectory(uuri.getPath());

    // Get a data socket.  This will either be the result of a NLST
    // command for a directory, or a RETR command for a file.
    int command;
    String path;
    if (isDirectory) {
        curi.addAnnotation("ftpDirectoryList");
        command = FTPCommand.NLST;
        client.setFileType(FTP.ASCII_FILE_TYPE);
        path = ".";
    } else {
        command = FTPCommand.RETR;
        client.setFileType(FTP.BINARY_FILE_TYPE);
        path = uuri.getPath();
    }

    client.enterLocalPassiveMode();
    Socket socket = null;

    try {
        socket = client.openDataConnection(command, path);

        // if "227 Entering Passive Mode" these will get reset later
        curi.setFetchStatus(client.getReplyCode());
        curi.putString(A_FTP_FETCH_STATUS, client.getReplyStrings()[0]);

    } catch (IOException e) {
        // try it again, see AbstractFrontier.needsRetrying()
        curi.setFetchStatus(S_CONNECT_LOST);
    }

    // Save the streams in the CURI, where downstream processors
    // expect to find them.
    if (socket != null) {
        // Shall we get a digest on the content downloaded?
        boolean digestContent = ((Boolean) getUncheckedAttribute(curi, FetchHTTP.ATTR_DIGEST_CONTENT))
                .booleanValue();
        String algorithm = null;
        if (digestContent) {
            algorithm = ((String) getUncheckedAttribute(curi, FetchHTTP.ATTR_DIGEST_ALGORITHM));
            recorder.getRecordedInput().setDigest(algorithm);
            recorder.getRecordedInput().startDigest();
        } else {
            // clear
            recorder.getRecordedInput().setDigest((MessageDigest) null);
        }

        try {
            saveToRecorder(curi, socket, recorder);
        } finally {
            recorder.close();
            client.closeDataConnection(); // does socket.close()
            curi.setContentSize(recorder.getRecordedInput().getSize());

            // "226 Transfer complete."
            client.getReply();
            curi.setFetchStatus(client.getReplyCode());
            curi.putString(A_FTP_FETCH_STATUS, client.getReplyStrings()[0]);

            if (isDirectory) {
                curi.setContentType("text/plain");
            } else {
                curi.setContentType("application/octet-stream");
            }

            if (logger.isLoggable(Level.INFO)) {
                logger.info("read " + recorder.getRecordedInput().getSize() + " bytes from ftp data socket");
            }

            if (digestContent) {
                curi.setContentDigest(algorithm, recorder.getRecordedInput().getDigestValue());
            }
        }

        if (isDirectory) {
            extract(curi, recorder);
        }
    }

    addParent(curi);
}

From source file:org.archive.modules.fetcher.FetchFTP.java

/**
 * Fetches a document from an FTP server.
 * /*ww w.  j  av a  2s. c  o  m*/
 * @param curi      the URI of the document to fetch
 * @param client    the FTPClient to use for the fetch
 * @param recorder  the recorder to preserve the document in
 * @throws IOException  if a network or protocol error occurs
 * @throws InterruptedException  if the thread is interrupted
 */
private void fetch(CrawlURI curi, ClientFTP client, Recorder recorder)
        throws IOException, InterruptedException {
    // Connect to the FTP server.
    UURI uuri = curi.getUURI();
    int port = uuri.getPort();
    if (port == -1) {
        port = 21;
    }

    if (socketFactory == null) {
        socketFactory = new SocketFactoryWithTimeout();
    }
    socketFactory.setConnectTimeoutMs(getSoTimeoutMs());
    client.setSocketFactory(socketFactory);
    client.setConnectTimeout(getSoTimeoutMs());
    client.setDefaultTimeout(getSoTimeoutMs());
    client.setDataTimeout(getSoTimeoutMs());

    client.connect(uuri.getHost(), port);

    client.setSoTimeout(getSoTimeoutMs()); // must be after connect()

    // Authenticate.
    String[] auth = getAuth(curi);
    client.login(auth[0], auth[1]);

    // The given resource may or may not be a directory.
    // To figure out which is which, execute a CD command to
    // the UURI's path.  If CD works, it's a directory.
    boolean isDirectory = client.changeWorkingDirectory(uuri.getPath());

    // Get a data socket.  This will either be the result of a NLST
    // command for a directory, or a RETR command for a file.
    int command;
    String path;
    if (isDirectory) {
        curi.getAnnotations().add("ftpDirectoryList");
        command = FTPCommand.NLST;
        client.setFileType(FTP.ASCII_FILE_TYPE);
        path = ".";
    } else {
        command = FTPCommand.RETR;
        client.setFileType(FTP.BINARY_FILE_TYPE);
        path = uuri.getPath();
    }

    client.enterLocalPassiveMode();
    Socket socket = null;

    try {
        socket = client.openDataConnection(command, path);

        // if "227 Entering Passive Mode" these will get reset later
        curi.setFetchStatus(client.getReplyCode());
        curi.getData().put(A_FTP_FETCH_STATUS, client.getReplyStrings()[0]);
    } catch (IOException e) {
        // try it again, see AbstractFrontier.needsRetrying()
        curi.setFetchStatus(FetchStatusCodes.S_CONNECT_LOST);
    }

    // Save the streams in the CURI, where downstream processors
    // expect to find them.
    if (socket != null) {
        if (socket.getSoTimeout() != getSoTimeoutMs()) {
            logger.warning("data socket timeout " + socket.getSoTimeout() + "ms is not expected value "
                    + getSoTimeoutMs() + "ms");
        }
        // Shall we get a digest on the content downloaded?
        boolean digestContent = getDigestContent();
        String algorithm = null;
        if (digestContent) {
            algorithm = getDigestAlgorithm();
            recorder.getRecordedInput().setDigest(algorithm);
            recorder.getRecordedInput().startDigest();
        } else {
            // clear
            recorder.getRecordedInput().setDigest((MessageDigest) null);
        }

        try {
            saveToRecorder(curi, socket, recorder);
        } finally {
            recorder.close();
            client.closeDataConnection(); // does socket.close()
            curi.setContentSize(recorder.getRecordedInput().getSize());

            // "226 Transfer complete."
            client.getReply();
            curi.setFetchStatus(client.getReplyCode());
            curi.getData().put(A_FTP_FETCH_STATUS, client.getReplyStrings()[0]);

            if (isDirectory) {
                curi.setContentType("text/plain");
            } else {
                curi.setContentType("application/octet-stream");
            }

            if (logger.isLoggable(Level.FINE)) {
                logger.fine("read " + recorder.getRecordedInput().getSize() + " bytes from ftp data socket");
            }

            if (digestContent) {
                curi.setContentDigest(algorithm, recorder.getRecordedInput().getDigestValue());
            }
        }

        if (isDirectory) {
            extract(curi, recorder);
        }
    } else {
        // no data - without this, content size is -1
        curi.setContentSize(0);
    }

    addParent(curi);
}