pastebin - collaborative debugging tool
nrubsig.kpaste.net RSS


urlparser1.c - simple URL parser
Posted by Anonymous on Wed 24th Jan 2024 15:58
raw | new post
view followups (newest first): urlparser1.c - simple URL parser by Anonymous
modification of post by Anonymous (view diff)

  1. /* urlparser1.c */
  2. /*
  3.  * MIT License
  4.  *
  5.  * Copyright (c) 2024 Roland Mainz <roland.mainz@nrubsig.org>
  6.  *
  7.  * Permission is hereby granted, free of charge, to any person obtaining a copy
  8.  * of this software and associated documentation files (the "Software"), to deal
  9.  * in the Software without restriction, including without limitation the rights
  10.  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11.  * copies of the Software, and to permit persons to whom the Software is
  12.  * furnished to do so, subject to the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice shall be included in all
  15.  * copies or substantial portions of the Software.
  16.  *
  17.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20.  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22.  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23.  * SOFTWARE.
  24.  */
  25.  
  26. /* urlparser1.c - simple URL parser */
  27.  
  28. #include <stdlib.h>
  29. #include <stdbool.h>
  30. #include <string.h>
  31. #include <stdio.h>
  32.  
  33. #include "urlparser1.h"
  34.  
  35. /*
  36.  * Original extended regular expression:
  37.  *
  38.  * "^"
  39.  * "(.+?)"                       // scheme
  40.  * "://"                         // '://'
  41.  * "("                           // login
  42.  *       "(?:"
  43.  *               "(.+?)"         // user (optional)
  44.  *               "(?::(.+))?"    // password (optional)
  45.  *               "@"
  46.  *       ")?"
  47.  *       "("                     // hostport
  48.  *               "(.+?)"         // host
  49.  *               "(?::([[:digit:]]+))?" // port (optional)
  50.  *       ")"
  51.  * ")"
  52.  * "(?:/(.*?))?"                 // path (optional)
  53.  * "$"
  54.  */
  55.  
  56. #define DBGNULLSTR(s) (((s)!=NULL)?(s):"<NULL>")
  57. #if 1
  58. #define D(x) x
  59. #else
  60. #define D(x)
  61. #endif
  62.  
  63. url_parser_context *url_parser_create_context(const char *in_url, unsigned int flags)
  64. {
  65.         url_parser_context *uctx;
  66.         char *s;
  67.         size_t in_url_len;
  68.         size_t context_len;
  69.  
  70.         if (!in_url)
  71.                 return NULL;
  72.  
  73.         in_url_len = strlen(in_url);   
  74.        
  75.         context_len = sizeof(url_parser_context) +
  76.                 ((in_url_len+1)*5);
  77.         uctx = malloc(context_len);
  78.         if (!uctx)
  79.                 return NULL;
  80.                
  81.         s = (void *)(uctx+1);
  82.         uctx->in_url = s;               s+= in_url_len+1;
  83.         (void)strcpy(uctx->in_url, in_url);
  84.         uctx->scheme = s;               s+= in_url_len+1;
  85.         uctx->login.username = s;       s+= in_url_len+1;
  86.         uctx->hostport.hostname = s;    s+= in_url_len+1;
  87.         uctx->path = s;                 s+= in_url_len+1;
  88.         uctx->hostport.port = -1;
  89.  
  90.         return uctx;
  91. }
  92.  
  93. int url_parser_parse(url_parser_context *uctx)
  94. {
  95.         D((void)fprintf(stderr, "## parser in_url='%s'\n", uctx->in_url));
  96.  
  97.         char *s;
  98.         const char *urlstr = uctx->in_url;
  99.         size_t slen;
  100.  
  101.         s = strstr(urlstr, "://");
  102.         if (!s) {
  103.                 D((void)fprintf(stderr, "url_parser: Not an URL\n"));
  104.                 return -1;
  105.         }
  106.  
  107.         slen = s-urlstr;
  108.         (void)memcpy(uctx->scheme, urlstr, slen*sizeof(char));
  109.         uctx->scheme[slen] = '\0';
  110.         urlstr += slen + 3;
  111.  
  112.         D((void)fprintf(stdout, "scheme='%s', rest='%s'\n", uctx->scheme, urlstr));
  113.  
  114.         s = strstr(urlstr, "@");
  115.         if (s) {
  116.                 /* URL has user/password */
  117.                 slen = s-urlstr;
  118.                 (void)memcpy(uctx->login.username, urlstr, slen*sizeof(char));
  119.                 uctx->login.username[slen] = '\0';
  120.                 urlstr += slen + 1;
  121.  
  122.                 s = strstr(uctx->login.username, ":");
  123.                 if (s) {
  124.                         /* found passwd */
  125.                         uctx->login.passwd = s+1;
  126.                         *s = '\0';
  127.                 }
  128.                 else
  129.                 {
  130.                         uctx->login.passwd = NULL;
  131.                 }
  132.                
  133.                 /* catch password-only URLs */
  134.                 if (uctx->login.username[0] == '\0')
  135.                         uctx->login.username = NULL;
  136.         }
  137.         else
  138.         {
  139.                 uctx->login.username = NULL;
  140.                 uctx->login.passwd = NULL;
  141.         }
  142.  
  143.         D((void)fprintf(stdout, "login='%s', passwd='%s', rest='%s'\n",
  144.                 DBGNULLSTR(uctx->login.username),
  145.                 DBGNULLSTR(uctx->login.passwd),
  146.                 DBGNULLSTR(urlstr)));
  147.  
  148.         s = strstr(urlstr, "/");
  149.         if (s) {
  150.                 /* URL has hostport */
  151.                 slen = s-urlstr;
  152.                 (void)memcpy(uctx->hostport.hostname, urlstr, slen*sizeof(char));
  153.                 uctx->hostport.hostname[slen] = '\0';
  154.                 urlstr += slen;
  155.  
  156.                 s = strstr(uctx->hostport.hostname, ":");
  157.                 if (s) {
  158.                         /* found port number */
  159.                         uctx->hostport.port = atoi(s+1);
  160.                         *s = '\0';
  161.                 }
  162.         }
  163.         else
  164.         {
  165.                 (void)strcpy(uctx->hostport.hostname, urlstr);
  166.                 uctx->path = NULL;
  167.                 urlstr = NULL;
  168.         }
  169.  
  170.         D((void)fprintf(stdout, "hostport='%s', port=%d, rest='%s'\n",
  171.                 DBGNULLSTR(uctx->hostport.hostname),
  172.                 uctx->hostport.port,
  173.                 DBGNULLSTR(urlstr)));
  174.  
  175.         if (!urlstr) {
  176.                 return 0;
  177.         }
  178.  
  179.         (void)strcpy(uctx->path, urlstr);
  180.         D((void)fprintf(stdout, "path='%s'\n", uctx->path));
  181.  
  182.         return 0;
  183. }
  184.  
  185. void url_parser_free_context(url_parser_context *c)
  186. {
  187.         free(c);
  188. }
  189.  
  190. void test_url_parser(const char *instr)
  191. {
  192.         url_parser_context *c;
  193.  
  194.         c = url_parser_create_context(instr, 0);
  195.  
  196.         (void)url_parser_parse(c);
  197.        
  198.         (void)fputc('\n', stdout);
  199.        
  200.         url_parser_free_context(c);
  201. }
  202.  
  203. int main(int ac, char *av[])
  204. {
  205.         (void)puts("#start");
  206.  
  207.         (void)setvbuf(stdout, NULL, _IONBF, 0);
  208.         (void)setvbuf(stderr, NULL, _IONBF, 0);
  209.  
  210.         (void)test_url_parser("foo://hostbar/baz");
  211.         (void)test_url_parser("foo://myuser@hostbar/baz");
  212.         (void)test_url_parser("foo://myuser:mypasswd@hostbar/baz");
  213.         (void)test_url_parser("foo://myuser:mypasswd@hostbar:666/baz");
  214.         (void)test_url_parser("foo://:mypasswd2@hostbar2:667/baf");
  215.         (void)test_url_parser("foo://hostbar");
  216.         (void)test_url_parser("foo://hostbar:93");
  217.         (void)test_url_parser("foo://");
  218.         (void)test_url_parser("typo:/hostbar");
  219.         (void)test_url_parser("wrong");
  220.  
  221.         (void)puts("#done");
  222.  
  223.         return EXIT_SUCCESS;
  224. }
  225. /* urlparser1.h */
  226. #ifndef __URLPARSER1_H__
  227. #define __URLPARSER1_H__
  228.  
  229. #include <stdlib.h>
  230.  
  231. typedef
  232. struct _url_parser_context {
  233.         char *in_url;
  234.  
  235.         char *scheme;
  236.         struct {
  237.                 char *username;
  238.                 char *passwd;
  239.         } login;
  240.         struct {
  241.                 char *hostname;
  242.                 signed int port;
  243.         } hostport;
  244.         char *path;
  245. } url_parser_context;
  246.  
  247. /* Prototypes */
  248. url_parser_context *url_parser_create_context(const char *in_url, unsigned int flags);
  249. int url_parser_parse(url_parser_context *uctx);
  250. void url_parser_free_context(url_parser_context *c);
  251.  
  252. #endif /* !__URLPARSER1_H__ */

Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.

Syntax highlighting:

To highlight particular lines, prefix each line with {%HIGHLIGHT}




All content is user-submitted.
The administrators of this site (kpaste.net) are not responsible for their content.
Abuse reports should be emailed to us at