pastebin - collaborative debugging tool
nrubsig.kpaste.net RSS


urlparser1.c - simple URL parser
Posted by Anonymous on Wed 24th Jan 2024 10:56
raw | new post
view followups (newest first): urlparser1.c - simple URL parser by Anonymous
modification of post by Anonymous (view diff)

  1. /*
  2.  * MIT License
  3.  *
  4.  * Copyright (c) 2024 Roland Mainz <roland.mainz@nrubsig.org>
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a copy
  7.  * of this software and associated documentation files (the "Software"), to deal
  8.  * in the Software without restriction, including without limitation the rights
  9.  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10.  * copies of the Software, and to permit persons to whom the Software is
  11.  * furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice shall be included in all
  14.  * copies or substantial portions of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19.  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21.  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22.  * SOFTWARE.
  23.  */
  24.  
  25. /* urlparser1.c - simple URL parser */
  26.  
  27. #include <stdlib.h>
  28. #include <stdbool.h>
  29. #include <string.h>
  30. #include <stdio.h>
  31.  
  32. /*
  33.  * Original extended regular expression:
  34.  *
  35.  * "^"
  36.  * "(.+?)"                       // scheme
  37.  * "://"                         // '://'
  38.  * "("                           // login
  39.  *       "(?:"
  40.  *               "(.+?)"         // user (optional)
  41.  *               "(?::(.+))?"    // password (optional)
  42.  *               "@"
  43.  *       ")?"
  44.  *       "("                     // hostport
  45.  *               "(.+?)"         // host
  46.  *               "(?::([[:digit:]]+))?" // port (optional)
  47.  *       ")"
  48.  * ")"
  49.  * "(?:/(.*?))?"                 // path (optional)
  50.  * "$"
  51.  */
  52.  
  53. #define DBGNULLSTR(s) (((s)!=NULL)?(s):"<NULL>")
  54.  
  55.  
  56. typedef
  57. struct _url_parser_context {
  58.         char *in_url;
  59.  
  60.         char *scheme;
  61.         struct {
  62.                 char *username;
  63.                 char *passwd;
  64.         } login;
  65.         struct {
  66.                 char *hostname;
  67.                 signed int port;
  68.         } hostport;
  69.         char *path;
  70. } url_parser_context;
  71.  
  72. url_parser_context *url_parser_create_context(const char *in_url, unsigned int flags)
  73. {
  74.         url_parser_context *uctx;
  75.         char *s;
  76.         size_t in_url_len;
  77.         size_t context_len;
  78.  
  79.         if (!in_url)
  80.                 return NULL;
  81.  
  82.         in_url_len = strlen(in_url);   
  83.        
  84.         context_len = sizeof(url_parser_context) +
  85.                 ((in_url_len+1)*5);
  86.         uctx = malloc(context_len);
  87.         if (!uctx)
  88.                 return NULL;
  89.                
  90.         s = (void *)(uctx+1);
  91.         uctx->in_url = s;               s+= in_url_len+1;
  92.         (void)strcpy(uctx->in_url, in_url);
  93.         uctx->scheme = s;               s+= in_url_len+1;
  94.         uctx->login.username = s;       s+= in_url_len+1;
  95.         uctx->hostport.hostname = s;    s+= in_url_len+1;
  96.         uctx->path = s;                 s+= in_url_len+1;
  97.         uctx->hostport.port = -1;
  98.  
  99.         return uctx;
  100. }
  101.  
  102. int url_parser_parse(url_parser_context *uctx)
  103. {
  104.         (void)fprintf(stderr, "## parser in_url='%s'\n", uctx->in_url);
  105.  
  106.         char *s;
  107.         const char *urlstr = uctx->in_url;
  108.         size_t slen;
  109.  
  110.         s = strstr(urlstr, "://");
  111.         if (!s) {
  112.                 (void)fprintf(stderr, "url_parser: Not an URL\n");
  113.                 return -1;
  114.         }
  115.  
  116.         slen = s-urlstr;
  117.         (void)memcpy(uctx->scheme, urlstr, slen);
  118.         uctx->scheme[slen] = '\0';
  119.         urlstr += slen + 3;
  120.  
  121.         (void)fprintf(stdout, "scheme='%s', rest='%s'\n", uctx->scheme, urlstr);
  122.  
  123.         s = strstr(urlstr, "@");
  124.         if (s) {
  125.                 /* URL has user/password */
  126.                 slen = s-urlstr;
  127.                 (void)memcpy(uctx->login.username, urlstr, slen);
  128.                 uctx->login.username[slen] = '\0';
  129.                 urlstr += slen + 1;
  130.  
  131.                 s = strstr(uctx->login.username, ":");
  132.                 if (s) {
  133.                         /* found passwd */
  134.                         uctx->login.passwd = s+1;
  135.                         *s = '\0';
  136.                 }
  137.                 else
  138.                 {
  139.                         uctx->login.passwd = NULL;
  140.                 }
  141.                
  142.                 /* catch password-only URLs */
  143.                 if (uctx->login.username[0] == '\0')
  144.                         uctx->login.username = NULL;
  145.         }
  146.         else
  147.         {
  148.                 uctx->login.username = NULL;
  149.         }
  150.  
  151.         (void)fprintf(stdout, "login='%s', passwd='%s', rest='%s'\n",
  152.                 DBGNULLSTR(uctx->login.username),
  153.                 DBGNULLSTR(uctx->login.passwd),
  154.                 DBGNULLSTR(urlstr));
  155.  
  156.         s = strstr(urlstr, "/");
  157.         if (s) {
  158.                 /* URL has hostport */
  159.                 slen = s-urlstr;
  160.                 (void)memcpy(uctx->hostport.hostname, urlstr, slen);
  161.                 uctx->hostport.hostname[slen] = '\0';
  162.                 urlstr += slen + 1;
  163.  
  164.                 s = strstr(uctx->hostport.hostname, ":");
  165.                 if (s) {
  166.                         /* found port number */
  167.                         uctx->hostport.port = atoi(s+1);
  168.                         *s = '\0';
  169.                 }
  170.         }
  171.         else
  172.         {
  173.                 (void)strcpy(uctx->hostport.hostname, urlstr);
  174.                 uctx->path = NULL;
  175.                 urlstr = NULL;
  176.         }
  177.  
  178.         (void)fprintf(stdout, "hostport='%s', port=%d, rest='%s'\n",
  179.                 DBGNULLSTR(uctx->hostport.hostname),
  180.                 uctx->hostport.port,
  181.                 DBGNULLSTR(urlstr));
  182.  
  183.         if (!urlstr) {
  184.                 return 0;
  185.         }
  186.  
  187.         (void)strcpy(uctx->path, urlstr);
  188.         (void)fprintf(stdout, "path='%s'\n", uctx->path);
  189.  
  190.         return 0;
  191. }
  192.  
  193. void url_parser_free_context(url_parser_context *c)
  194. {
  195.         free(c);
  196. }
  197.  
  198. void test_url_parser(const char *instr)
  199. {
  200.         url_parser_context *c;
  201.  
  202.         c = url_parser_create_context(instr, 0);
  203.  
  204.         (void)url_parser_parse(c);
  205.        
  206.         (void)fputc('\n', stdout);
  207.        
  208.         url_parser_free_context(c);
  209. }
  210.  
  211. int main(int ac, char *av[])
  212. {
  213.         (void)puts("#start");
  214.        
  215.         (void)setvbuf(stdout, NULL, _IONBF, 0);
  216.         (void)setvbuf(stderr, NULL, _IONBF, 0);
  217.  
  218.         (void)test_url_parser("foo://hostbar/baz");
  219.         (void)test_url_parser("foo://myuser@hostbar/baz");
  220.         (void)test_url_parser("foo://myuser:mypasswd@hostbar/baz");
  221.         (void)test_url_parser("foo://myuser:mypasswd@hostbar:666/baz");
  222.         (void)test_url_parser("foo://:mypasswd2@hostbar2:667/baf");
  223.         (void)test_url_parser("foo://hostbar");
  224.         (void)test_url_parser("foo://hostbar:93");
  225.         (void)test_url_parser("foo://");
  226.         (void)test_url_parser("typo:/hostbar");
  227.         (void)test_url_parser("wrong");
  228.  
  229.         (void)puts("#done");
  230.  
  231.         return EXIT_SUCCESS;
  232. }

Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.

Syntax highlighting:

To highlight particular lines, prefix each line with {%HIGHLIGHT}




All content is user-submitted.
The administrators of this site (kpaste.net) are not responsible for their content.
Abuse reports should be emailed to us at