/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/* url-util.c : utility functions to parse URLs */


/* 
 * Author : 
 *  Bertrand Guiheneuf <bertrand@helixcode.com>
 *
 * Copyright 1999, 2000 HelixCode (http://www.helixcode.com)
 *
 * This program is free software; you can redistribute it and/or 
 * modify it under the terms of the GNU General Public License as 
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 * USA
 */



/* 
   Here we deal with URL following the general scheme:
   protocol://user:password@host:port/name
   where name is a path-like string (ie dir1/dir2/....)
   See rfc1738 for the complete description of 
   Uniform Ressource Locators 
   
     Bertrand. */
/*
  XXX TODO: recover the words between #'s or ?'s after the path */

#include <config.h>
#include "url-util.h"

/* general item finder */
/* it begins the search at position @position in @url,
   returns true when the item is found, amd set position after the item */
typedef gboolean find_item_func(const gchar *url, gchar **item, guint *position, gboolean *error);

/* used to find one item (protocol, then user .... */
typedef struct {
	char *item_name;           /* item name : for debug only */
	gchar **item_value;      /* where the item value will go */
	find_item_func *find_func; /* item finder */
} FindStepStruct;

static gboolean _find_protocol (const gchar *url, gchar **item, guint *position, gboolean *error);
static gboolean _find_user (const gchar *url, gchar **item, guint *position, gboolean *error);
static gboolean _find_passwd (const gchar *url, gchar **item, guint *position, gboolean *error);
static gboolean _find_host (const gchar *url, gchar **item, guint *position, gboolean *error);
static gboolean _find_port (const gchar *url, gchar **item, guint *position, gboolean *error);
static gboolean _find_path (const gchar *url, gchar **item, guint *position, gboolean *error);



/**
 * g_url_new: create an Gurl object from a string
 *
 * @url_string: The string containing the URL to scan
 * 
 * This routine takes a gchar and parses it as an
 * URL of the form:
 * protocol://user:password@host:port/path
 * there is no test on the values. For example,
 * "port" can be a string, not only a number !
 * The Gurl structure fields ar filled with
 * the scan results. When a member of the 
 * general URL can not be found, the corresponding
 * Gurl member is NULL  
 * Fields filled in the Gurl structure are allocated
 * and url_string is not modified. 
 * 
 * Return value: a Gurl structure containing the URL items.
 **/
Gurl *g_url_new (const gchar* url_string)
{
	Gurl *g_url;
	
	gchar *protocol;
	gchar *user;
	gchar *passwd;
	gchar *host;
	gchar *port;
	gchar *path;
	
	guint position = 0;
	gboolean error;
	gboolean found;
	guint i;
	
	g_url = g_new (Gurl,1);
	
#define NB_STEP_URL  6
	{
		FindStepStruct step[NB_STEP_URL] = {
			{ "protocol", &(g_url->protocol), _find_protocol},
			{ "user", &(g_url->user), _find_user},
			{ "password", &(g_url->passwd), _find_passwd},
			{ "host", &(g_url->host), _find_host},
			{ "port", &(g_url->port), _find_port},
			{ "path", &(g_url->path), _find_path}
		};
		
		for (i = 0; i < NB_STEP_URL; i++) {
			found = step[i].find_func (url_string, 
						   step[i].item_value, 
						   &position, 
						   &error);
		}
	}
	
	return g_url;
}



void
g_url_free (Gurl *url)
{
	g_assert (url);

	g_free (url->protocol);
	g_free (url->user);
	g_free (url->passwd);
	g_free (url->host);
	g_free (url->port);
	g_free (url->path);

	g_free (url);
	
}

/**** PARSING FUNCTIONS ****/

/* So, yes, I must admit there would have been more elegant
    ways to do this, but it works, and quite well :)  */


static gboolean 
_find_protocol (const gchar *url, gchar **item, guint *position, gboolean *error)
{

	guint i;
	gint len_url;

	g_assert (url);
	g_assert (item);
	g_assert (position);

	len_url = strlen (url);
	
	*item = NULL;
	*error = FALSE;
	i = *position;
	
	/* find a ':' */
	while ((i < len_url) && (url[i] != ':')) i++;
	
	if (i == len_url) return FALSE;
	i++;

	/* check if it is followed by a "//" */
	if  ((i < len_url) && (url[i++] == '/'))
		if ((i < len_url) && (url[i++] == '/'))
		{
			*item = g_strndup (url, i-3);
			*position = i;
			return TRUE;
		}
	
	return FALSE;
}




static gboolean
_find_user (const gchar *url, gchar **item, guint *position, gboolean *error)
{
	guint i;
	guint at_pos;
	gint len_url;

	g_assert (url);
	g_assert (item);
	g_assert (position);

	len_url = strlen (url);	
	*item = NULL;
	i = *position;
	
	/* find a '@' */
	while ((i < len_url) && (url[i] != '@')) i++;
	
	if (i == len_url) return FALSE;
	at_pos = i;
	i = *position;

	/* find a ':' */
	while ((i < at_pos) && (url[i] != ':')) i++;

	/* now if i has not been incremented at all, there is no user */
	if (i == *position) {
		(*position)++;
		return FALSE;
	}
	
	*item = g_strndup (url+ *position, i - *position);
	if (i < at_pos) *position = i + 1; /* there was a ':', skip it */
	else *position = i;
	
	return TRUE;	
}

static gboolean
_find_passwd (const gchar *url, gchar **item, guint *position, gboolean *error)
{
	guint i;	
	gint len_url;
	gchar *str_passwd;

	g_assert (url);
	g_assert (item);
	g_assert (position);

	len_url = strlen (url);
	*item = NULL;
	i = *position;
	
	/* find a '@' */
	while ((i < len_url) && (url[i] != '@')) i++;
	
	if (i == len_url) return FALSE;
	/*i has not been incremented at all, there is no passwd */
	if (i == *position) {
		*position = i + 1;
		return FALSE;
	}
	
	*item = g_strndup (url + *position, i - *position);
	*position = i + 1; /* skip it the '@' */
	
	return TRUE;
}



static gboolean
_find_host (const gchar *url, gchar **item, guint *position, gboolean *error)
{
	guint i;
	guint slash_pos;
	gint len_url;
	
	g_assert (url);
	g_assert (item);
	g_assert (position);

	len_url = strlen (url);	
	*item = NULL;
	i = *position;
	
	/* find a '/' */
	while ((i < len_url) && (url[i] != '/')) i++;
	
	slash_pos = i;
	i = *position;

	/* find a ':' */
	while ( (i < slash_pos) && (url[i] != ':') ) i++;

	/* at this point if i has not been incremented at all, 
	   there is no host */
	if (i == *position) {
		/* if we have not met / or \0, we have : and must skip it */
		if (i < slash_pos) (*position)++;
		return FALSE;
	}
	
	*item = g_strndup (url + *position, i - *position);
	if (i < slash_pos) *position = i + 1; /* there was a ':', skip it */
	else *position=i;
	
	return TRUE;
}


static gboolean
_find_port (const gchar *url, gchar **item, guint *position, gboolean *error)
{
	guint i;
	guint slash_pos;
	gint len_url;
	
	g_assert (url);
	g_assert (item);
	g_assert (position);

	len_url = strlen (url);	
	*item = NULL;
	i=*position;
	
	/* find a '/' */
	while ((i < len_url) && (url[i] != '/')) i++;
	
	slash_pos = i;
	i = *position;

	/* find a ':' */
	while ((i < slash_pos) && (url[i] != ':')) i++;

	/* at this point if i has not been incremented at all, */
	/*   there is no port */
	if (i == *position) return FALSE;

	*item = g_strndup (url+ *position, i - *position);
	*position = i;
	return TRUE;
}


static gboolean
_find_path (const gchar *url, gchar **item, guint *position, gboolean *error)
{
	guint i;
	gint len_url;
	
	g_assert (url);
	g_assert (item);
	g_assert (position);

	len_url = strlen (url);
	*item = NULL;
	i = *position;
	

	/* find a '#' */
	while ((i < len_url) && (url[i] != '#') && (url[i] != '?')) i++;
	
	/*i has not been incremented at all, there is no path */
	if (i == *position) return FALSE;
	
	*item = g_strndup (url + *position, i - *position);
	*position=i;
	
	return TRUE;
}





/**** TEST ROUTINE - NOT COMPILED BY DEFAULT ****/

/* to tests this file :
   gcc -o test_url_util `glib-config --cflags`  -I.. -DTEST_URL_UTIL url-util.c `glib-config --libs`
   ./test_url_util URL
*/
#ifdef TEST_URL_UTIL



int 
main (int argc, char **argv)
{

	gchar *url;
	gchar *protocol;
	gchar *user;
	gchar *passwd;
	gchar *host;
	gchar *port;
	gchar *path;
	guint position=0;
	gboolean error;
	gboolean found;
	guint i;
	guint i_pos;

#define NB_STEP_TEST  6
	FindStepStruct test_step[NB_STEP_TEST] = {
		{ "protocol", &protocol, _find_protocol},
		{ "user", &user, _find_user},
		{ "password", &passwd, _find_passwd},
		{ "host", &host, _find_host},
		{ "port", &port, _find_port},
		{ "path", &path, _find_path}
	};
	url = argv[1];
	printf("URL to test : %s\n\n", url);
	for (i=0; i<NB_STEP_TEST; i++) {
		found = test_step[i].find_func (url, 
						test_step[i].item_value, 
						&position, 
						&error);
		if (found) {
			printf ("\t\t\t\t** %s found : %s\n",
				test_step[i].item_name,
				*(test_step[i].item_value));
		} else printf ("** %s not found in URL\n", test_step[i].item_name);
		printf ("next item position:\n");
		printf ("%s\n", url);
		for (i_pos = 0; i_pos < position; i_pos++) printf (" ");
		printf ("^\n");
		
	}
	 
}

#endif /* TEST_URL_UTIL */