pastebin - collaborative debugging tool
nrubsig.kpaste.net RSS


urlparser1.c - simple URL parser
Posted by Anonymous on Wed 24th Jan 2024 10:29
raw | new post
view followups (newest first): urlparser1.c - simple URL parser by Anonymous

  1. /*
  2.  * MIT License
  3.  *
  4.  * Copyright (c) 2024 Roland Mainz <roland.mainz@nrubsig.org>
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a copy
  7.  * of this software and associated documentation files (the "Software"), to deal
  8.  * in the Software without restriction, including without limitation the rights
  9.  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10.  * copies of the Software, and to permit persons to whom the Software is
  11.  * furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice shall be included in all
  14.  * copies or substantial portions of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19.  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21.  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22.  * SOFTWARE.
  23.  */
  24.  
  25. #include <stdlib.h>
  26. #include <stdbool.h>
  27. #include <string.h>
  28. #include <stdio.h>
  29.  
  30. /*
  31.  * Original extended regular expression:
  32.  *
  33.  * "^"
  34.  * "(.+?)"                       // scheme
  35.  * "://"                         // '://'
  36.  * "("                           // login
  37.  *       "(?:"
  38.  *               "(.+?)"         // user (optional)
  39.  *               "(?::(.+))?"    // password (optional)
  40.  *               "@"
  41.  *       ")?"
  42.  *       "("                     // hostport
  43.  *               "(.+?)"         // host
  44.  *               "(?::([[:digit:]]+))?" // port (optional)
  45.  *       ")"
  46.  * ")"
  47.  * "(?:/(.*?))?"                 // path (optional)
  48.  * "$"
  49.  */
  50.  
  51. typedef
  52. struct _url_parser_context {
  53.         char *in_url;
  54.  
  55.         char *scheme;
  56.         struct {
  57.                 char *login;
  58.         } login;
  59.         struct {
  60.                 char *hostname;
  61.                 signed int port;
  62.         } hostport;
  63.         char *path;
  64. } url_parser_context;
  65.  
  66. url_parser_context *url_parser_create_context(const char *in_url)
  67. {
  68.         url_parser_context *uctx;
  69.         char *s;
  70.         size_t in_url_len;
  71.         size_t context_len;
  72.  
  73.         if (!in_url)
  74.                 return NULL;
  75.  
  76.         in_url_len = strlen(in_url);   
  77.        
  78.         context_len = sizeof(url_parser_context) +
  79.                 ((in_url_len+1)*5);
  80.         uctx = malloc(context_len);
  81.         if (!uctx)
  82.                 return NULL;
  83.                
  84.         s = (void *)(uctx+1);
  85.         uctx->in_url = s;       s+= in_url_len+1;
  86.         (void)strcpy(uctx->in_url, in_url);
  87.         uctx->scheme = s;       s+= in_url_len+1;
  88.         uctx->login.login = s;  s+= in_url_len+1;
  89.         uctx->hostport.hostname = s;    s+= in_url_len+1;
  90.         uctx->path = s; s+= in_url_len+1;
  91.  
  92.         return uctx;
  93. }
  94.  
  95. int url_parser(url_parser_context *uctx)
  96. {
  97.         (void)fprintf(stderr, "## parser in_url='%s'\n", uctx->in_url);
  98.  
  99.         char *s;
  100.         const char *urlstr = uctx->in_url;
  101.         size_t slen;
  102.  
  103.         s = strstr(urlstr, "://");
  104.         if (!s) {
  105.                 (void)fprintf(stderr, "url_parser: Not an URL\n");
  106.                 return -1;
  107.         }
  108.  
  109.         slen = s-urlstr;
  110.         (void)memcpy(uctx->scheme, urlstr, slen);
  111.         uctx->scheme[slen] = '\0';
  112.         urlstr += slen + 3;
  113.  
  114.         (void)fprintf(stdout, "scheme='%s', rest='%s'\n", uctx->scheme, urlstr);
  115.  
  116.         s = strstr(urlstr, "@");
  117.         if (s) {
  118.                 /* URL has user/password */
  119.                 slen = s-urlstr;
  120.                 (void)memcpy(uctx->login.login, urlstr, slen);
  121.                 uctx->login.login[slen] = '\0';
  122.                 urlstr += slen + 1;
  123.  
  124.                 (void)fprintf(stdout, "login='%s', rest='%s'\n", uctx->login.login, urlstr);
  125.         }
  126.         else
  127.         {
  128.                 uctx->login.login = NULL;
  129.         }
  130.  
  131.         s = strstr(urlstr, "/");
  132.         if (s) {
  133.                 /* URL has hostport */
  134.                 slen = s-urlstr;
  135.                 (void)memcpy(uctx->hostport.hostname, urlstr, slen);
  136.                 uctx->hostport.hostname[slen] = '\0';
  137.                 urlstr += slen + 1;
  138.  
  139.                 (void)fprintf(stdout, "hostport='%s', rest='%s'\n", uctx->hostport.hostname, urlstr);
  140.                
  141.                 s = strstr(uctx->hostport.hostname, ":");
  142.                 if (s) {
  143.                         /* found port number */
  144.                         uctx->hostport.port = atoi(s+1);
  145.                         *s = '\0';
  146.                 }
  147.                 else
  148.                 {
  149.                         uctx->hostport.port = -1;
  150.                 }
  151.  
  152.  
  153.                 (void)fprintf(stdout, "hostport='%s', port=%d, rest='%s'\n",
  154.                         uctx->hostport.hostname,
  155.                         uctx->hostport.port,
  156.                         urlstr);
  157.         }
  158.         else
  159.         {
  160.                 (void)strcpy(uctx->hostport.hostname, urlstr);
  161.                 uctx->path = NULL;
  162.                 urlstr = NULL;
  163.         }
  164.  
  165.         if (!urlstr) {
  166.                 return 0;
  167.         }
  168.  
  169.         (void)strcpy(uctx->path, urlstr);
  170.         (void)fprintf(stdout, "path='%s'\n", uctx->path);
  171.  
  172.         return 0;
  173. }
  174.  
  175. void test_url_parser(const char *instr)
  176. {
  177.         url_parser_context *c;
  178.  
  179.         c = url_parser_create_context(instr);
  180.  
  181.         url_parser(c);
  182. }
  183.  
  184. int main(int ac, char *av[])
  185. {
  186.         (void)puts("#start");
  187.        
  188.         (void)setvbuf(stdout, NULL, _IONBF, 0);
  189.         (void)setvbuf(stderr, NULL, _IONBF, 0);
  190.  
  191.         (void)test_url_parser("foo://hostbar/baz");
  192.         (void)test_url_parser("foo://myuser@hostbar/baz");
  193.         (void)test_url_parser("foo://myuser:passwd@hostbar/baz");
  194.         (void)test_url_parser("foo://myuser:passwd@hostbar:666/baz");
  195.         (void)test_url_parser("foo://hostbar");
  196.         (void)test_url_parser("foo://hostbar:93");
  197.         (void)test_url_parser("typo:/hostbar");
  198.         (void)test_url_parser("wrong");
  199.  
  200.         (void)puts("#done");
  201.  
  202.         return EXIT_SUCCESS;
  203. }

Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.

Syntax highlighting:

To highlight particular lines, prefix each line with {%HIGHLIGHT}




All content is user-submitted.
The administrators of this site (kpaste.net) are not responsible for their content.
Abuse reports should be emailed to us at