pastebin - collaborative debugging tool
nrubsig.kpaste.net RSS


urlparser1.c - simple URL parser
Posted by Anonymous on Wed 24th Jan 2024 16:57
raw | new post
modification of post by Anonymous (view diff)

  1. /* urlparser1.c */
  2. /*
  3.  * MIT License
  4.  *
  5.  * Copyright (c) 2024 Roland Mainz <roland.mainz@nrubsig.org>
  6.  *
  7.  * Permission is hereby granted, free of charge, to any person obtaining a copy
  8.  * of this software and associated documentation files (the "Software"), to deal
  9.  * in the Software without restriction, including without limitation the rights
  10.  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11.  * copies of the Software, and to permit persons to whom the Software is
  12.  * furnished to do so, subject to the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice shall be included in all
  15.  * copies or substantial portions of the Software.
  16.  *
  17.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20.  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22.  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23.  * SOFTWARE.
  24.  */
  25.  
  26. /* urlparser1.c - simple URL parser */
  27.  
  28. #include <stdlib.h>
  29. #include <stdbool.h>
  30. #include <string.h>
  31. #include <stdio.h>
  32.  
  33. #include "urlparser1.h"
  34.  
  35. /*
  36.  * Original extended regular expression:
  37.  *
  38.  * "^"
  39.  * "(.+?)"                       // scheme
  40.  * "://"                         // '://'
  41.  * "("                           // login
  42.  *       "(?:"
  43.  *               "(.+?)"         // user (optional)
  44.  *               "(?::(.+))?"    // password (optional)
  45.  *               "@"
  46.  *       ")?"
  47.  *       "("                     // hostport
  48.  *               "(.+?)"         // host
  49.  *               "(?::([[:digit:]]+))?" // port (optional)
  50.  *       ")"
  51.  * ")"
  52.  * "(?:/(.*?))?"                 // path (optional)
  53.  * "$"
  54.  */
  55.  
  56. #define DBGNULLSTR(s) (((s)!=NULL)?(s):"<NULL>")
  57. #if 1
  58. #define D(x) x
  59. #else
  60. #define D(x)
  61. #endif
  62.  
  63. url_parser_context *url_parser_create_context(const char *in_url, unsigned int flags)
  64. {
  65.         url_parser_context *uctx;
  66.         char *s;
  67.         size_t in_url_len;
  68.         size_t context_len;
  69.  
  70.         if (!in_url)
  71.                 return NULL;
  72.  
  73.         in_url_len = strlen(in_url);   
  74.        
  75.         context_len = sizeof(url_parser_context) +
  76.                 ((in_url_len+1)*5);
  77.         uctx = malloc(context_len);
  78.         if (!uctx)
  79.                 return NULL;
  80.                
  81.         s = (void *)(uctx+1);
  82.         uctx->in_url = s;               s+= in_url_len+1;
  83.         (void)strcpy(uctx->in_url, in_url);
  84.         uctx->scheme = s;               s+= in_url_len+1;
  85.         uctx->login.username = s;       s+= in_url_len+1;
  86.         uctx->hostport.hostname = s;    s+= in_url_len+1;
  87.         uctx->path = s;                 s+= in_url_len+1;
  88.         uctx->hostport.port = -1;
  89.  
  90.         return uctx;
  91. }
  92.  
  93. int url_parser_parse(url_parser_context *uctx)
  94. {
  95.         D((void)fprintf(stderr, "## parser in_url='%s'\n", uctx->in_url));
  96.  
  97.         char *s;
  98.         const char *urlstr = uctx->in_url;
  99.         size_t slen;
  100.  
  101.         s = strstr(urlstr, "://");
  102.         if (!s) {
  103.                 D((void)fprintf(stderr, "url_parser: Not an URL\n"));
  104.                 return -1;
  105.         }
  106.  
  107.         slen = s-urlstr;
  108.         (void)memcpy(uctx->scheme, urlstr, slen*sizeof(char));
  109.         uctx->scheme[slen] = '\0';
  110.         urlstr += slen + 3;
  111.  
  112.         D((void)fprintf(stdout, "scheme='%s', rest='%s'\n", uctx->scheme, urlstr));
  113.  
  114.         s = strstr(urlstr, "@");
  115.         if (s) {
  116.                 /* URL has user/password */
  117.                 slen = s-urlstr;
  118.                 (void)memcpy(uctx->login.username, urlstr, slen*sizeof(char));
  119.                 uctx->login.username[slen] = '\0';
  120.                 urlstr += slen + 1;
  121.  
  122.                 s = strstr(uctx->login.username, ":");
  123.                 if (s) {
  124.                         /* found passwd */
  125.                         uctx->login.passwd = s+1;
  126.                         *s = '\0';
  127.                 }
  128.                 else
  129.                 {
  130.                         uctx->login.passwd = NULL;
  131.                 }
  132.                
  133.                 /* catch password-only URLs */
  134.                 if (uctx->login.username[0] == '\0')
  135.                         uctx->login.username = NULL;
  136.         }
  137.         else
  138.         {
  139.                 uctx->login.username = NULL;
  140.                 uctx->login.passwd = NULL;
  141.         }
  142.  
  143.         D((void)fprintf(stdout, "login='%s', passwd='%s', rest='%s'\n",
  144.                 DBGNULLSTR(uctx->login.username),
  145.                 DBGNULLSTR(uctx->login.passwd),
  146.                 DBGNULLSTR(urlstr)));
  147.  
  148.         s = strstr(urlstr, "/");
  149.         if (s) {
  150.                 /* URL has hostport */
  151.                 slen = s-urlstr;
  152.                 (void)memcpy(uctx->hostport.hostname, urlstr, slen*sizeof(char));
  153.                 uctx->hostport.hostname[slen] = '\0';
  154.                 urlstr += slen;
  155.  
  156.                 /*
  157.                  * check for addresses within '[' and ']', like
  158.                  * IPv6 addresses
  159.                  */
  160.                 s = uctx->hostport.hostname;
  161.                 if (s[0] == '[')
  162.                         s = strstr(s, "]");
  163.  
  164.                 if (s == NULL) {
  165.                         D((void)fprintf(stderr, "url_parser: Unmatched '[' in hostname\n"));
  166.                         return -1;
  167.                 }
  168.  
  169.                 s = strstr(s, ":");
  170.                 if (s) {
  171.                         /* found port number */
  172.                         uctx->hostport.port = atoi(s+1);
  173.                         *s = '\0';
  174.                 }
  175.         }
  176.         else
  177.         {
  178.                 (void)strcpy(uctx->hostport.hostname, urlstr);
  179.                 uctx->path = NULL;
  180.                 urlstr = NULL;
  181.         }
  182.  
  183.         D((void)fprintf(stdout, "hostport='%s', port=%d, rest='%s'\n",
  184.                 DBGNULLSTR(uctx->hostport.hostname),
  185.                 uctx->hostport.port,
  186.                 DBGNULLSTR(urlstr)));
  187.  
  188.         if (!urlstr) {
  189.                 return 0;
  190.         }
  191.  
  192.         (void)strcpy(uctx->path, urlstr);
  193.         D((void)fprintf(stdout, "path='%s'\n", uctx->path));
  194.  
  195.         return 0;
  196. }
  197.  
  198. void url_parser_free_context(url_parser_context *c)
  199. {
  200.         free(c);
  201. }
  202.  
  203. void test_url_parser(const char *instr)
  204. {
  205.         url_parser_context *c;
  206.  
  207.         c = url_parser_create_context(instr, 0);
  208.  
  209.         (void)url_parser_parse(c);
  210.        
  211.         (void)fputc('\n', stdout);
  212.        
  213.         url_parser_free_context(c);
  214. }
  215.  
  216. int main(int ac, char *av[])
  217. {
  218.         (void)puts("#start");
  219.  
  220.         (void)setvbuf(stdout, NULL, _IONBF, 0);
  221.         (void)setvbuf(stderr, NULL, _IONBF, 0);
  222.  
  223.         (void)test_url_parser("foo://hostbar/baz");
  224.         (void)test_url_parser("foo://myuser@hostbar/baz");
  225.         (void)test_url_parser("foo://myuser:mypasswd@hostbar/baz");
  226.         (void)test_url_parser("foo://myuser:mypasswd@hostbar:666/baz");
  227.         (void)test_url_parser("foo://myuser:mypasswd@[fe80::21b:1bff:fec3:7713]:666/baz");
  228.         (void)test_url_parser("foo://:mypasswd2@hostbar2:667/baf");
  229.         (void)test_url_parser("foo://hostbar");
  230.         (void)test_url_parser("foo://hostbar:93");
  231.         (void)test_url_parser("foo://");
  232.         (void)test_url_parser("typo:/hostbar");
  233.         (void)test_url_parser("wrong");
  234.  
  235.         (void)puts("#done");
  236.  
  237.         return EXIT_SUCCESS;
  238. }
  239. /* urlparser1.h */
  240. /*
  241.  * MIT License
  242.  *
  243.  * Copyright (c) 2024 Roland Mainz <roland.mainz@nrubsig.org>
  244.  *
  245.  * Permission is hereby granted, free of charge, to any person obtaining a copy
  246.  * of this software and associated documentation files (the "Software"), to deal
  247.  * in the Software without restriction, including without limitation the rights
  248.  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  249.  * copies of the Software, and to permit persons to whom the Software is
  250.  * furnished to do so, subject to the following conditions:
  251.  *
  252.  * The above copyright notice and this permission notice shall be included in all
  253.  * copies or substantial portions of the Software.
  254.  *
  255.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  256.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  257.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  258.  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  259.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  260.  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  261.  * SOFTWARE.
  262.  */
  263.  
  264. /* urlparser1.h - header for simple URL parser */
  265.  
  266. #ifndef __URLPARSER1_H__
  267. #define __URLPARSER1_H__
  268.  
  269. #include <stdlib.h>
  270.  
  271. typedef
  272. struct _url_parser_context {
  273.         char *in_url;
  274.  
  275.         char *scheme;
  276.         struct {
  277.                 char *username;
  278.                 char *passwd;
  279.         } login;
  280.         struct {
  281.                 char *hostname;
  282.                 signed int port;
  283.         } hostport;
  284.         char *path;
  285. } url_parser_context;
  286.  
  287. /* Prototypes */
  288. url_parser_context *url_parser_create_context(const char *in_url, unsigned int flags);
  289. int url_parser_parse(url_parser_context *uctx);
  290. void url_parser_free_context(url_parser_context *c);
  291.  
  292. #endif /* !__URLPARSER1_H__ */

Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.

Syntax highlighting:

To highlight particular lines, prefix each line with {%HIGHLIGHT}




All content is user-submitted.
The administrators of this site (kpaste.net) are not responsible for their content.
Abuse reports should be emailed to us at