ccn_uri.c

Go to the documentation of this file.
00001 /**
00002  * @file ccn_uri.c
00003  * @brief Support for ccnx:/URI/...
00004  * 
00005  * Part of the CCNx C Library.
00006  *
00007  * Copyright (C) 2008, 2009, 2010 Palo Alto Research Center, Inc.
00008  *
00009  * This library is free software; you can redistribute it and/or modify it
00010  * under the terms of the GNU Lesser General Public License version 2.1
00011  * as published by the Free Software Foundation.
00012  * This library is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00015  * Lesser General Public License for more details. You should have received
00016  * a copy of the GNU Lesser General Public License along with this library;
00017  * if not, write to the Free Software Foundation, Inc., 51 Franklin Street,
00018  * Fifth Floor, Boston, MA 02110-1301 USA.
00019  */
00020 #include <string.h>
00021 #include <ccn/ccn.h>
00022 #include <ccn/charbuf.h>
00023 #include <ccn/coding.h>
00024 #include <ccn/uri.h>
00025 
00026 /*********
00027 RFC 3986                   URI Generic Syntax               January 2005
00028 
00029 
00030       reserved    = gen-delims / sub-delims
00031 
00032       gen-delims  = ":" / "/" / "?" / "#" / "[" / "]" / "@"
00033 
00034       sub-delims  = "!" / "$" / "&" / "'" / "(" / ")"
00035                   / "*" / "+" / "," / ";" / "="
00036 ...
00037       unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
00038 
00039 *********/
00040 
00041 /**
00042  * This appends to c a percent-escaped representation of the component
00043  * passed in.  Only generic URI unreserved characters are not escaped.
00044  * Components that consist solely of zero or more dots are converted
00045  * by adding 3 more dots so there are no ambiguities with . or .. or whether
00046  * a component is empty or absent. (cf. ccn_uri_append)
00047  */
00048 
00049 
00050 void
00051 ccn_uri_append_percentescaped(struct ccn_charbuf *c,
00052                               const unsigned char *data, size_t size)
00053 {
00054     size_t i;
00055     unsigned char ch;
00056     for (i = 0; i < size && data[i] == '.'; i++)
00057         continue;
00058     /* For a component that consists solely of zero or more dots, add 3 more */
00059     if (i == size)
00060         ccn_charbuf_append(c, "...", 3);
00061     for (i = 0; i < size; i++) {
00062         ch = data[i];
00063         /*
00064          * Leave unescaped only the generic URI unreserved characters.
00065          * See RFC 3986. Here we assume the compiler uses ASCII.
00066          */
00067         if (('a' <= ch && ch <= 'z') ||
00068             ('A' <= ch && ch <= 'Z') ||
00069             ('0' <= ch && ch <= '9') ||
00070             ch == '-' || ch == '.' || ch == '_' || ch == '~')
00071             ccn_charbuf_append(c, &(data[i]), 1);
00072         else
00073             ccn_charbuf_putf(c, "%%%02X", (unsigned)ch);
00074     }
00075 }
00076 
00077 /**
00078  * This appends to c a URI representation of the ccnb-encoded Name element
00079  * passed in.  For convenience, it will also look inside of a ContentObject
00080  * or Interest object to find the Name.
00081  * Components that consist solely of zero or more dots are converted
00082  * by adding 3 more dots so there are no ambiguities with . or .. or whether
00083  * a component is empty or absent.
00084  * Will prepend "ccnx:" unless includescheme is 0
00085  */
00086 
00087 int
00088 ccn_uri_append(struct ccn_charbuf *c,
00089                const unsigned char *ccnb,
00090                size_t size,
00091                int includescheme)
00092 {
00093     int ncomp = 0;
00094     const unsigned char *comp = NULL;
00095     size_t compsize = 0;
00096     struct ccn_buf_decoder decoder;
00097     struct ccn_buf_decoder *d = ccn_buf_decoder_start(&decoder, ccnb, size);
00098     if (ccn_buf_match_dtag(d, CCN_DTAG_Interest)    ||
00099         ccn_buf_match_dtag(d, CCN_DTAG_ContentObject)) {
00100         ccn_buf_advance(d);
00101         if (ccn_buf_match_dtag(d, CCN_DTAG_Signature))
00102             ccn_buf_advance_past_element(d);
00103     }
00104     if (!ccn_buf_match_dtag(d, CCN_DTAG_Name))
00105         return(-1);
00106     if (includescheme)
00107         ccn_charbuf_append_string(c, "ccnx:");
00108     ccn_buf_advance(d);
00109     while (ccn_buf_match_dtag(d, CCN_DTAG_Component)) {
00110         ccn_buf_advance(d);
00111         compsize = 0;
00112         if (ccn_buf_match_blob(d, &comp, &compsize))
00113             ccn_buf_advance(d);
00114         ccn_buf_check_close(d);
00115         if (d->decoder.state < 0)
00116             return(d->decoder.state);
00117         ncomp += 1;
00118         ccn_charbuf_append(c, "/", 1);
00119         ccn_uri_append_percentescaped(c, comp, compsize);
00120     }
00121     ccn_buf_check_close(d);
00122     if (d->decoder.state < 0)
00123         return (d->decoder.state);
00124     if (ncomp == 0)
00125         ccn_charbuf_append(c, "/", 1);
00126     return(ncomp);
00127 }
00128 
00129 static int
00130 hexit(int c)
00131 {
00132     if ('0' <= c && c <= '9')
00133         return(c - '0');
00134     if ('A' <= c && c <= 'F')
00135         return(c - 'A' + 10);
00136     if ('a' <= c && c <= 'f')
00137         return(c - 'a' + 10);
00138     return(-1);
00139 }
00140 
00141 /*
00142  * ccn_append_uri_component:
00143  * This takes as input the escaped URI component at s and appends it
00144  * to c.  This does not do any ccnb-related stuff.
00145  * Processing stops at an error or if an unescaped nul, '/', '?', or '#' is found.
00146  * A component that consists solely of dots gets special treatment to reverse
00147  * the addition of ... by ccn_uri_append_percentescaped.  Since '.' is an unreserved
00148  * character, percent-encoding is not supposed to change meaning and hence
00149  * the dot processing happens after percent-encoding is removed.
00150  * A positive return value indicates there were unescaped reserved or
00151  * non-printable characters found.  This might warrant some extra checking
00152  * by the caller.
00153  * A return value of -1 indicates the component was "..", so the caller
00154  * will need to do something extra to handle this as appropriate.
00155  * A return value of -2 indicates the component was empty or ".", so the caller
00156  * should do nothing with it.
00157  * A return value of -3 indicates a bad %-escaped sequence.
00158  * If cont is not NULL, *cont is set to the number of input characters processed.
00159  */
00160 static int
00161 ccn_append_uri_component(struct ccn_charbuf *c, const char *s, size_t limit, size_t *cont)
00162 {
00163     size_t start = c->length;
00164     size_t i;
00165     int err = 0;
00166     int d1, d2;
00167     unsigned char ch;
00168     for (i = 0; i < limit; i++) {
00169         ch = s[i];
00170         switch (ch) {
00171             case 0:
00172             case '/':
00173             case '?':
00174             case '#':
00175                 limit = i;
00176                 break;
00177             case '%':
00178                 if (i + 3 > limit || (d1 = hexit(s[i+1])) < 0 ||
00179                                      (d2 = hexit(s[i+2])) < 0   ) {
00180                     return(-3);
00181                 }
00182                 ch = d1 * 16 + d2;
00183                 i += 2;
00184                 ccn_charbuf_append(c, &ch, 1);
00185                 break;
00186             case ':': case '[': case ']': case '@':
00187             case '!': case '$': case '&': case '\'': case '(': case ')':
00188             case '*': case '+': case ',': case ';': case '=':
00189                 err++;
00190                 /* FALLTHROUGH */
00191             default:
00192                 if (ch <= ' ' || ch > '~')
00193                     err++;
00194                 ccn_charbuf_append(c, &ch, 1);
00195                 break;
00196         }
00197     }
00198     for (i = start; i < c->length && c->buf[i] == '.'; i++)
00199         continue;
00200     if (i == c->length) {
00201         /* all dots */
00202         i -= start;
00203         if (i <= 1) {
00204             c->length = start;
00205             err = -2;
00206         }
00207         else if (i == 2) {
00208             c->length = start;
00209             err = -1;
00210         }
00211         else
00212             c->length -= 3;
00213     }
00214     if (cont != NULL)
00215         *cont = limit;
00216     return(err);
00217 }
00218 
00219 static int
00220 ccn_name_last_component_offset(const unsigned char *ccnb, size_t size)
00221 {
00222     struct ccn_buf_decoder decoder;
00223     struct ccn_buf_decoder *d = ccn_buf_decoder_start(&decoder, ccnb, size);
00224     int res = -1;
00225     if (ccn_buf_match_dtag(d, CCN_DTAG_Name)) {
00226         ccn_buf_advance(d);
00227         res = d->decoder.token_index; /* in case of 0 components */
00228         while (ccn_buf_match_dtag(d, CCN_DTAG_Component)) {
00229             res = d->decoder.token_index;
00230             ccn_buf_advance(d);
00231             if (ccn_buf_match_blob(d, NULL, NULL))
00232                 ccn_buf_advance(d);
00233             ccn_buf_check_close(d);
00234         }
00235         ccn_buf_check_close(d);
00236     }
00237     return ((d->decoder.state >= 0) ? res : -1);
00238 }
00239 
00240 /**
00241  * Convert a ccnx-scheme URI to a ccnb-encoded Name.
00242  * The converted result is placed in c.
00243  * On input, c may contain a base name, in which case relative URIs are allowed.
00244  * Otherwise c should start out empty, and the URI must be absolute.
00245  * @returns -1 if an error is found, otherwise returns the number of characters
00246  *          that were processed.
00247  */
00248 int
00249 ccn_name_from_uri(struct ccn_charbuf *c, const char *uri)
00250 {
00251     int res = 0;
00252     struct ccn_charbuf *compbuf = NULL;
00253     const char *stop = uri + strlen(uri);
00254     const char *s = uri;
00255     size_t cont = 0;
00256     
00257     compbuf = ccn_charbuf_create();
00258     if (compbuf == NULL) return(-1);
00259     if (s[0] != '/') {
00260         res = ccn_append_uri_component(compbuf, s, stop - s, &cont);
00261         if (res < -2)
00262             goto Done;
00263         ccn_charbuf_reserve(compbuf, 1)[0] = 0;
00264         if ((0 == strcasecmp((const char *)(compbuf->buf), "ccnx:") ||
00265              0 == strcasecmp((const char *)(compbuf->buf), "ccn:")) &&
00266             s[cont-1] == ':') {
00267             s += cont;
00268             cont = 0;
00269         }
00270         /// @bug XXX - need to error out on other uri schemes
00271     }
00272     if (s[0] == '/') {
00273         ccn_name_init(c);
00274         if (s[1] == '/') {
00275             /* Skip over hostname part - not used in ccnx scheme */
00276             s += 2;
00277             compbuf->length = 0;
00278             res = ccn_append_uri_component(compbuf, s, stop - s, &cont);
00279             if (res < 0 && res != -2)
00280                 goto Done;
00281             s += cont; cont = 0;
00282         }
00283     }
00284     while (s[0] != 0 && s[0] != '?' && s[0] != '#') {
00285         if (s[0] == '/')
00286             s++;
00287         compbuf->length = 0;
00288         res = ccn_append_uri_component(compbuf, s, stop - s, &cont);
00289         s += cont; cont = 0;
00290         if (res < -2)
00291             goto Done;
00292         if (res == -2) {
00293             res = 0; /* process . or equiv in URI */
00294             continue;
00295         }
00296         if (res == -1) {
00297             /* process .. in URI - discard last name component */
00298             res = ccn_name_last_component_offset(c->buf, c->length);
00299             if (res < 0)
00300                 goto Done;
00301             c->length = res;
00302             ccn_charbuf_append_closer(c);
00303             continue;
00304         }
00305         res = ccn_name_append(c, compbuf->buf, compbuf->length);
00306         if (res < 0)
00307             goto Done;
00308     }
00309 Done:
00310     ccn_charbuf_destroy(&compbuf);
00311     if (res < 0)
00312         return(-1);
00313     if (c->length < 2 || c->buf[c->length-1] != CCN_CLOSE)
00314         return(-1);
00315     return(s - uri);
00316 }
Generated on Fri May 13 16:27:03 2011 for Content-Centric Networking in C by  doxygen 1.6.3