List of usage examples for org.apache.commons.net.ftp FTPCommand RETR
int RETR
To view the source code for org.apache.commons.net.ftp FTPCommand RETR.
Click Source Link
From source file:com.cyberway.issue.crawler.fetcher.FetchFTP.java
/** * Fetches a document from an FTP server. * // w w w. j a v a 2 s . co m * @param curi the URI of the document to fetch * @param client the FTPClient to use for the fetch * @param recorder the recorder to preserve the document in * @throws IOException if a network or protocol error occurs * @throws InterruptedException if the thread is interrupted */ private void fetch(CrawlURI curi, ClientFTP client, HttpRecorder recorder) throws IOException, InterruptedException { // Connect to the FTP server. UURI uuri = curi.getUURI(); int port = uuri.getPort(); if (port == -1) { port = 21; } client.connectStrict(uuri.getHost(), port); // Authenticate. String[] auth = getAuth(curi); client.loginStrict(auth[0], auth[1]); // The given resource may or may not be a directory. // To figure out which is which, execute a CD command to // the UURI's path. If CD works, it's a directory. boolean dir = client.changeWorkingDirectory(uuri.getPath()); if (dir) { curi.setContentType("text/plain"); } // TODO: A future version of this class could use the system string to // set up custom directory parsing if the FTP server doesn't support // the nlist command. if (logger.isLoggable(Level.FINE)) { String system = client.getSystemName(); logger.fine(system); } // Get a data socket. This will either be the result of a NLIST // command for a directory, or a RETR command for a file. int command = dir ? FTPCommand.NLST : FTPCommand.RETR; String path = dir ? "." : uuri.getPath(); client.enterLocalPassiveMode(); client.setBinary(); Socket socket = client.openDataConnection(command, path); curi.setFetchStatus(client.getReplyCode()); // Save the streams in the CURI, where downstream processors // expect to find them. try { saveToRecorder(curi, socket, recorder); } finally { recorder.close(); close(socket); } curi.setFetchStatus(200); if (dir) { extract(curi, recorder); } addParent(curi); }
From source file:org.apache.nutch.protocol.ftp.Client.java
/** * retrieve file for path/* w w w .java 2 s.c o m*/ * * @param path * @param os * @param limit * @throws IOException * @throws FtpExceptionCanNotHaveDataConnection * @throws FtpExceptionUnknownForcedDataClose * @throws FtpExceptionControlClosedByForcedDataClose */ public void retrieveFile(String path, OutputStream os, int limit) throws IOException, FtpExceptionCanNotHaveDataConnection, FtpExceptionUnknownForcedDataClose, FtpExceptionControlClosedByForcedDataClose { Socket socket = __openPassiveDataConnection(FTPCommand.RETR, path); if (socket == null) throw new FtpExceptionCanNotHaveDataConnection("RETR " + ((path == null) ? "" : path)); InputStream input = socket.getInputStream(); // 20040318, xing, treat everything as BINARY_FILE_TYPE for now // do we ever need ASCII_FILE_TYPE? // if (__fileType == ASCII_FILE_TYPE) // input = new FromNetASCIIInputStream(input); // fixme, should we instruct server here for binary file type? // force-close data channel socket // boolean mandatory_close = false; int len; int count = 0; byte[] buf = new byte[org.apache.commons.net.io.Util.DEFAULT_COPY_BUFFER_SIZE]; while ((len = input.read(buf, 0, buf.length)) != -1) { count += len; // impose download limit if limit >= 0, otherwise no limit // here, cut off is exactly of limit bytes if (limit >= 0 && count > limit) { os.write(buf, 0, len - (count - limit)); // mandatory_close = true; break; } os.write(buf, 0, len); os.flush(); } // if (mandatory_close) // you always close here, no matter mandatory_close or not. // however different ftp servers respond differently, see below. socket.close(); // scenarios: // (1) mandatory_close is false, download limit not reached // no special care here // (2) mandatory_close is true, download limit is reached // different servers have different reply codes: // do not need this // sendCommand("ABOR"); try { int reply = getReply(); if (!_notBadReply(reply)) throw new FtpExceptionUnknownForcedDataClose(getReplyString()); } catch (FTPConnectionClosedException e) { // some ftp servers will close control channel if data channel socket // is closed by our end before all data has been read out. Check: // tux414.q-tam.hp.com FTP server (hp.com version whp02) // so must catch FTPConnectionClosedException thrown by getReply() above // disconnect(); throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage()); } }
From source file:org.archive.crawler.fetcher.FetchFTP.java
/** * Fetches a document from an FTP server. * // w w w. ja va 2 s. c om * @param curi the URI of the document to fetch * @param client the FTPClient to use for the fetch * @param recorder the recorder to preserve the document in * @throws IOException if a network or protocol error occurs * @throws InterruptedException if the thread is interrupted */ private void fetch(CrawlURI curi, ClientFTP client, HttpRecorder recorder) throws IOException, InterruptedException { // Connect to the FTP server. UURI uuri = curi.getUURI(); int port = uuri.getPort(); if (port == -1) { port = 21; } client.connect(uuri.getHost(), port); // Authenticate. String[] auth = getAuth(curi); client.login(auth[0], auth[1]); // The given resource may or may not be a directory. // To figure out which is which, execute a CD command to // the UURI's path. If CD works, it's a directory. boolean isDirectory = client.changeWorkingDirectory(uuri.getPath()); // Get a data socket. This will either be the result of a NLST // command for a directory, or a RETR command for a file. int command; String path; if (isDirectory) { curi.addAnnotation("ftpDirectoryList"); command = FTPCommand.NLST; client.setFileType(FTP.ASCII_FILE_TYPE); path = "."; } else { command = FTPCommand.RETR; client.setFileType(FTP.BINARY_FILE_TYPE); path = uuri.getPath(); } client.enterLocalPassiveMode(); Socket socket = null; try { socket = client.openDataConnection(command, path); // if "227 Entering Passive Mode" these will get reset later curi.setFetchStatus(client.getReplyCode()); curi.putString(A_FTP_FETCH_STATUS, client.getReplyStrings()[0]); } catch (IOException e) { // try it again, see AbstractFrontier.needsRetrying() curi.setFetchStatus(S_CONNECT_LOST); } // Save the streams in the CURI, where downstream processors // expect to find them. if (socket != null) { // Shall we get a digest on the content downloaded? boolean digestContent = ((Boolean) getUncheckedAttribute(curi, FetchHTTP.ATTR_DIGEST_CONTENT)) .booleanValue(); String algorithm = null; if (digestContent) { algorithm = ((String) getUncheckedAttribute(curi, FetchHTTP.ATTR_DIGEST_ALGORITHM)); recorder.getRecordedInput().setDigest(algorithm); recorder.getRecordedInput().startDigest(); } else { // clear recorder.getRecordedInput().setDigest((MessageDigest) null); } try { saveToRecorder(curi, socket, recorder); } finally { recorder.close(); client.closeDataConnection(); // does socket.close() curi.setContentSize(recorder.getRecordedInput().getSize()); // "226 Transfer complete." client.getReply(); curi.setFetchStatus(client.getReplyCode()); curi.putString(A_FTP_FETCH_STATUS, client.getReplyStrings()[0]); if (isDirectory) { curi.setContentType("text/plain"); } else { curi.setContentType("application/octet-stream"); } if (logger.isLoggable(Level.INFO)) { logger.info("read " + recorder.getRecordedInput().getSize() + " bytes from ftp data socket"); } if (digestContent) { curi.setContentDigest(algorithm, recorder.getRecordedInput().getDigestValue()); } } if (isDirectory) { extract(curi, recorder); } } addParent(curi); }
From source file:org.archive.modules.fetcher.FetchFTP.java
/** * Fetches a document from an FTP server. * /*ww w. j av a 2s. c o m*/ * @param curi the URI of the document to fetch * @param client the FTPClient to use for the fetch * @param recorder the recorder to preserve the document in * @throws IOException if a network or protocol error occurs * @throws InterruptedException if the thread is interrupted */ private void fetch(CrawlURI curi, ClientFTP client, Recorder recorder) throws IOException, InterruptedException { // Connect to the FTP server. UURI uuri = curi.getUURI(); int port = uuri.getPort(); if (port == -1) { port = 21; } if (socketFactory == null) { socketFactory = new SocketFactoryWithTimeout(); } socketFactory.setConnectTimeoutMs(getSoTimeoutMs()); client.setSocketFactory(socketFactory); client.setConnectTimeout(getSoTimeoutMs()); client.setDefaultTimeout(getSoTimeoutMs()); client.setDataTimeout(getSoTimeoutMs()); client.connect(uuri.getHost(), port); client.setSoTimeout(getSoTimeoutMs()); // must be after connect() // Authenticate. String[] auth = getAuth(curi); client.login(auth[0], auth[1]); // The given resource may or may not be a directory. // To figure out which is which, execute a CD command to // the UURI's path. If CD works, it's a directory. boolean isDirectory = client.changeWorkingDirectory(uuri.getPath()); // Get a data socket. This will either be the result of a NLST // command for a directory, or a RETR command for a file. int command; String path; if (isDirectory) { curi.getAnnotations().add("ftpDirectoryList"); command = FTPCommand.NLST; client.setFileType(FTP.ASCII_FILE_TYPE); path = "."; } else { command = FTPCommand.RETR; client.setFileType(FTP.BINARY_FILE_TYPE); path = uuri.getPath(); } client.enterLocalPassiveMode(); Socket socket = null; try { socket = client.openDataConnection(command, path); // if "227 Entering Passive Mode" these will get reset later curi.setFetchStatus(client.getReplyCode()); curi.getData().put(A_FTP_FETCH_STATUS, client.getReplyStrings()[0]); } catch (IOException e) { // try it again, see AbstractFrontier.needsRetrying() curi.setFetchStatus(FetchStatusCodes.S_CONNECT_LOST); } // Save the streams in the CURI, where downstream processors // expect to find them. if (socket != null) { if (socket.getSoTimeout() != getSoTimeoutMs()) { logger.warning("data socket timeout " + socket.getSoTimeout() + "ms is not expected value " + getSoTimeoutMs() + "ms"); } // Shall we get a digest on the content downloaded? boolean digestContent = getDigestContent(); String algorithm = null; if (digestContent) { algorithm = getDigestAlgorithm(); recorder.getRecordedInput().setDigest(algorithm); recorder.getRecordedInput().startDigest(); } else { // clear recorder.getRecordedInput().setDigest((MessageDigest) null); } try { saveToRecorder(curi, socket, recorder); } finally { recorder.close(); client.closeDataConnection(); // does socket.close() curi.setContentSize(recorder.getRecordedInput().getSize()); // "226 Transfer complete." client.getReply(); curi.setFetchStatus(client.getReplyCode()); curi.getData().put(A_FTP_FETCH_STATUS, client.getReplyStrings()[0]); if (isDirectory) { curi.setContentType("text/plain"); } else { curi.setContentType("application/octet-stream"); } if (logger.isLoggable(Level.FINE)) { logger.fine("read " + recorder.getRecordedInput().getSize() + " bytes from ftp data socket"); } if (digestContent) { curi.setContentDigest(algorithm, recorder.getRecordedInput().getDigestValue()); } } if (isDirectory) { extract(curi, recorder); } } else { // no data - without this, content size is -1 curi.setContentSize(0); } addParent(curi); }