(* $Id: neturl.ml,v 2.1 2001/09/14 14:22:34 stolpmann Exp $
 * ----------------------------------------------------------------------
 *
 *)

exception Malformed_URL

type url_syntax_option =
    Url_part_not_recognized
  | Url_part_allowed
  | Url_part_required


type url_syntax =
    { url_enable_scheme    : url_syntax_option;
      url_enable_user      : url_syntax_option;
      url_enable_password  : url_syntax_option;
      url_enable_host      : url_syntax_option;
      url_enable_port      : url_syntax_option;
      url_enable_path      : url_syntax_option;
      url_enable_param     : url_syntax_option;
      url_enable_query     : url_syntax_option;
      url_enable_fragment  : url_syntax_option;
      url_enable_other     : url_syntax_option;
      url_accepts_8bits    : bool;
      url_is_valid         : url -> bool;
    }

and url =
    { 
      url_syntax   : url_syntax;
      mutable url_validity : bool;
      url_scheme   : string option;
      url_user     : string option;
      url_password : string option;
      url_host     : string option;
      url_port     : int option;
      url_path     : string list;
      url_param    : string list;
      url_query    : string option;
      url_fragment : string option;
      url_other    : string option;
    }
;;


type char_category =
    Accepted
  | Rejected
  | Separator



let scan_url_part s k_from k_to cats accept_8bits =
  (* Scans the longest word of accepted characters from position 'k_from'
   * in 's' until at most position 'k_to'. The character following the
   * word (if any) must be a separator character.
   * On success, the function returns the position of the last character
   * of the word + 1.
   * If there is any rejected character before the separator or the end
   * of the string (i.e. position 'k_to') is reached, the exception
   * Malformed_URL is raised.
   * Furthermore, if the character '%' is accepted it is checked whether
   * two hexadecimal digits follow (which must be accepted, too). If this
   * is not true, the exception Malformed_URL is raised, too.
   * 'cats': contains for every character code (0 to 255) the category
   * of the character.
   *)
  let check_hex c =
    if cats.( Char.code c ) <> Accepted then raise Malformed_URL;
    match c with
	('0'..'9'|'A'..'F'|'a'..'f') -> ()
      | _ -> raise Malformed_URL
  in

  let rec scan k =
    if k >= k_to then
      k
    else begin
      let c = s.[k] in
      let cat = cats.(Char.code c) in
      match cat with
	  Accepted -> 
	    if c = '%' then begin
	      if k+2 >= k_to then raise Malformed_URL;
	      let c1 = s.[k+1] in
	      let c2 = s.[k+2] in
	      check_hex c1;
	      check_hex c2;
	      scan (k+3)
	    end
	    else
	      scan (k+1)
	| Separator -> k
	| Rejected -> 
	    if accept_8bits && c >= '\128' 
	    then scan (k+1)
	    else raise Malformed_URL
    end
  in

  assert (Array.length cats = 256);
  assert (k_from >= 0);
  assert (k_from <= k_to);
  assert (k_to <= String.length s);
  
  scan k_from
;;

  
(* Create a categorization: *)

let lalpha = [ 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm';
	       'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z' ]

let ualpha = [ 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M';
	       'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z' ]

let digit = [ '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9' ]

let safe = [ '$'; '-'; '_'; '.'; '+' ]

let extra = [ '!'; '*'; '\''; '('; ')'; ',' ]

let make_cats accepted separators =
  (* create a categorization:
   * - All characters listed in 'separators' are separators.
   * - All characters listed in 'accepted' and which do not occur in
   *   'separators' are accepted characters.
   * - All other characters are rejected.
   *)
  let cats = Array.make 256 Rejected in
  List.iter
    (fun c ->
       cats.(Char.code c) <- Accepted
    )
    accepted;

  List.iter
    (fun c ->
       cats.(Char.code c) <- Separator
    )
    separators;
  cats
;;


let scheme_cats =
  make_cats (lalpha @ ualpha @ ['+'; '-'; '.']) [':'] ;;

    (* scheme_cats: character categorization to _extract_ the URL scheme *)


let login_cats =
  make_cats 
    (lalpha @ ualpha @ digit @ safe @ extra @ [';'; '?'; '&'; '='; '%'])  
    [':'; '@'; '/'; '#' ]
;;

    (* login_cats: character categorization to _extract_ user name, password,
     * host name, and port.
     *)

let host_cats =
  make_cats
    (lalpha @ ualpha @ digit @ ['.'; '-'])
    []
;;

    (* host_cats: character categorization to _check_ whether the host name
     * is formed only by legal characters.
     * Especially '%' is not allowed here!
     *)

let port_cats =
  make_cats
    digit
    []
;;

    (* port_cats: character categorization to _check_ whether the port number
     * is formed only by legal characters.
     * Especially '%' is not allowed here!
     *)

let path_cats separators =
  make_cats
    (lalpha @ ualpha @ digit @ safe @ extra @ 
              ['?'; ':'; '@'; '&'; '='; ';'; '%'; '/'; '~'])
    separators
;;


let separators_from_syntax syn =
  let include_if syn_option clist =
    if syn_option <> Url_part_not_recognized then
      clist
    else
      []
  in
  (include_if syn.url_enable_param [';']) @
  (include_if syn.url_enable_query ['?']) @
  (include_if syn.url_enable_fragment ['#'])
;;


let path_cats_from_syntax syn extraseps =
  let separators = separators_from_syntax syn in
  path_cats (separators @ extraseps)
;;

(* path_cats_from_syntax:
 * Computes a character categorization to extract the path from an URL.
 * This depends on the syntax because the list of possible separators
 * contains the characters that may begin the next URL clause.
 *
 * Notes:
 * - The '#' is rejected unless fragments are enabled. 
 * - The '~' is accepted although this violates RFC 1738.
 *)


let other_cats_from_syntax syn =
  let include_if syn_option clist =
    if syn_option <> Url_part_not_recognized then
      clist
    else
      []
  in
  let separators =
    (include_if syn.url_enable_param [';']) @
    (include_if syn.url_enable_query ['?']) @
    (include_if syn.url_enable_fragment ['#'])
  in

  make_cats
    (lalpha @ ualpha @ digit @ safe @ extra @ 
              (separators @ ['?'; ':'; '@'; '&'; '='; ';'; '%'; '/']))
    []
;;

    (* other_cats: character categorization to extract or check the
     * "other" part of the URL.
     *)



let extract_url_scheme s = 
  let l = String.length s in
  let k = scan_url_part s 0 l scheme_cats false in
          (* or raise Malformed_URL *)
  if k = l then raise Malformed_URL;
  assert (s.[k] = ':');
  String.lowercase(String.sub s 0 k)
;;


let ( => ) a b = not a or b;;   (* implication *)

let ( <=> ) (a:bool) b = ( a = b );;  (* equivalence *)

let url_syntax_is_valid syn =
  let recognized x = x <> Url_part_not_recognized in
  let not_recognized x = x = Url_part_not_recognized in
  (recognized syn.url_enable_password => recognized syn.url_enable_user) &
  (recognized syn.url_enable_port     => recognized syn.url_enable_host) &
  (recognized syn.url_enable_user     => recognized syn.url_enable_host) &
  not ( (recognized syn.url_enable_user ||
	 recognized syn.url_enable_password ||
	 recognized syn.url_enable_host ||
	 recognized syn.url_enable_port ||
	 recognized syn.url_enable_path) &&
	(recognized syn.url_enable_other))
;;


let partial_url_syntax syn =
  let weaken =
    function
	Url_part_not_recognized -> Url_part_not_recognized
      | Url_part_allowed        -> Url_part_allowed
      | Url_part_required       -> Url_part_allowed
  in
  { url_enable_scheme    = weaken syn.url_enable_scheme;
    url_enable_user      = weaken syn.url_enable_user;
    url_enable_password  = weaken syn.url_enable_password;
    url_enable_host      = weaken syn.url_enable_host;
    url_enable_port      = weaken syn.url_enable_port;
    url_enable_path      = weaken syn.url_enable_path;
    url_enable_param     = weaken syn.url_enable_param;
    url_enable_query     = weaken syn.url_enable_query;
    url_enable_fragment  = weaken syn.url_enable_fragment;
    url_enable_other     = weaken syn.url_enable_other;
    url_accepts_8bits    = syn.url_accepts_8bits;
    url_is_valid         = syn.url_is_valid;
  }
;;



let file_url_syntax =
  { url_enable_scheme    = Url_part_required;
    url_enable_user      = Url_part_not_recognized;
    url_enable_password  = Url_part_not_recognized;
    url_enable_host      = Url_part_allowed;
    url_enable_port      = Url_part_not_recognized;
    url_enable_path      = Url_part_required;
    url_enable_param     = Url_part_not_recognized;
    url_enable_query     = Url_part_not_recognized;
    url_enable_fragment  = Url_part_not_recognized;
    url_enable_other     = Url_part_not_recognized;
    url_accepts_8bits    = false;
    url_is_valid         = (fun _ -> true);
  }
;;


let ftp_url_syntax =
  { url_enable_scheme    = Url_part_required;
    url_enable_user      = Url_part_allowed;
    url_enable_password  = Url_part_allowed;
    url_enable_host      = Url_part_required;
    url_enable_port      = Url_part_allowed;
    url_enable_path      = Url_part_allowed;
    url_enable_param     = Url_part_allowed;
    url_enable_query     = Url_part_not_recognized;
    url_enable_fragment  = Url_part_not_recognized;
    url_enable_other     = Url_part_not_recognized;
    url_accepts_8bits    = false;
    url_is_valid         = (fun _ -> true);
  }
;;


let http_url_syntax =
  { url_enable_scheme    = Url_part_required;
    url_enable_user      = Url_part_allowed;
    url_enable_password  = Url_part_allowed;
    url_enable_host      = Url_part_required;
    url_enable_port      = Url_part_allowed;
    url_enable_path      = Url_part_allowed;
    url_enable_param     = Url_part_not_recognized;
    url_enable_query     = Url_part_allowed;
    url_enable_fragment  = Url_part_not_recognized;
    url_enable_other     = Url_part_not_recognized;
    url_accepts_8bits    = false;
    url_is_valid         = (fun _ -> true);
  }
;;


let mailto_url_syntax =
  { url_enable_scheme    = Url_part_required;
    url_enable_user      = Url_part_not_recognized;
    url_enable_password  = Url_part_not_recognized;
    url_enable_host      = Url_part_not_recognized;
    url_enable_port      = Url_part_not_recognized;
    url_enable_path      = Url_part_not_recognized;
    url_enable_param     = Url_part_not_recognized;
    url_enable_query     = Url_part_not_recognized;
    url_enable_fragment  = Url_part_not_recognized;
    url_enable_other     = Url_part_required;
    url_accepts_8bits    = false;
    url_is_valid         = (fun _ -> true);
  }
;;


let null_url_syntax =
  { url_enable_scheme    = Url_part_not_recognized;
    url_enable_user      = Url_part_not_recognized;
    url_enable_password  = Url_part_not_recognized;
    url_enable_host      = Url_part_not_recognized;
    url_enable_port      = Url_part_not_recognized;
    url_enable_path      = Url_part_not_recognized;
    url_enable_param     = Url_part_not_recognized;
    url_enable_query     = Url_part_not_recognized;
    url_enable_fragment  = Url_part_not_recognized;
    url_enable_other     = Url_part_not_recognized;
    url_accepts_8bits    = false;
    url_is_valid         = (fun _ -> true);
  }
;;


let ip_url_syntax =
  { url_enable_scheme    = Url_part_allowed;
    url_enable_user      = Url_part_allowed;
    url_enable_password  = Url_part_allowed;
    url_enable_host      = Url_part_allowed;
    url_enable_port      = Url_part_allowed;
    url_enable_path      = Url_part_allowed;
    url_enable_param     = Url_part_allowed;
    url_enable_query     = Url_part_allowed;
    url_enable_fragment  = Url_part_allowed;
    url_enable_other     = Url_part_not_recognized;
    url_accepts_8bits    = false;
    url_is_valid         = (fun _ -> true);
  }
;;


let common_url_syntax =
  let h = Hashtbl.create 10 in
  Hashtbl.add h "file"   file_url_syntax;
  Hashtbl.add h "ftp"    ftp_url_syntax;
  Hashtbl.add h "http"   http_url_syntax;
  Hashtbl.add h "mailto" mailto_url_syntax;
  h
;;


let url_conforms_to_syntax url =
  let recognized x = x <> Url_part_not_recognized in
  let required x = x = Url_part_required in
  let present x    = x <> None in
  let syn = url.url_syntax in
  (present url.url_scheme   => recognized syn.url_enable_scheme)   &
  (present url.url_user     => recognized syn.url_enable_user)     &
  (present url.url_password => recognized syn.url_enable_password) &
  (present url.url_host     => recognized syn.url_enable_host)     &
  (present url.url_port     => recognized syn.url_enable_port)     &
  ((url.url_path <> [])     => recognized syn.url_enable_path)     &
  ((url.url_param <> [])    => recognized syn.url_enable_param)    &
  (present url.url_query    => recognized syn.url_enable_query)    &
  (present url.url_fragment => recognized syn.url_enable_fragment) &
  (present url.url_other    => recognized syn.url_enable_other)    &
  (required syn.url_enable_scheme   => present url.url_scheme)     &
  (required syn.url_enable_user     => present url.url_user)       &
  (required syn.url_enable_password => present url.url_password)   &
  (required syn.url_enable_host     => present url.url_host)       &
  (required syn.url_enable_port     => present url.url_port)       &
  (required syn.url_enable_path     => (url.url_path <> []))       &
  (required syn.url_enable_param    => (url.url_param <> []))      &
  (required syn.url_enable_query    => present url.url_query)      &
  (required syn.url_enable_fragment => present url.url_fragment)   &
  (required syn.url_enable_other    => present url.url_other)      &
  (url.url_validity or syn.url_is_valid url)
;;


let url_syntax_of_url url = url.url_syntax
;;


let modify_url
      ?syntax
      ?(encoded = false)
      ?scheme
      ?user
      ?password
      ?host
      ?port
      ?path
      ?param
      ?query
      ?fragment
      ?other
      url 
  =

  let encode = Netencoding.Url.encode ~plus:true in
  let enc x =
    if encoded then
      x
    else
      match x with
	  None -> None
	| Some x' -> Some (encode x')
  in
  let enc_list l = 
    if encoded then
      l
    else
      List.map encode l 
  in

  let new_syntax =
    match syntax with
	None -> url.url_syntax
      | Some syn -> syn
  in

  let check_string s_opt cats =
    match s_opt with
	None   -> ()
      | Some s ->
	  let l = String.length s in
	  let k = scan_url_part s 0 l cats new_syntax.url_accepts_8bits in
	          (* or raise Malformed_URL *)
	  if k <> l then raise Malformed_URL
  in

  let check_string_list p cats sep =
    List.iter
      (fun p_component ->
	 let l = String.length p_component in
	 let k = 
	   scan_url_part p_component 0 l cats new_syntax.url_accepts_8bits in
	   (* or raise Malformed_URL *)
	 if k <> l then raise Malformed_URL;
	 if String.contains p_component sep then raise Malformed_URL;
      )
      p
  in

  (* Create the modified record: *)
  let url' =
    { 
      url_syntax   = new_syntax;
      url_validity = false;
      url_scheme   = if scheme   = None then url.url_scheme   else scheme;
      url_user     = if user     = None then url.url_user     else enc user;
      url_password = if password = None then url.url_password else enc password;
      url_host     = if host     = None then url.url_host     else host;
      url_port     = if port     = None then url.url_port     else port;
      url_path     = (match path with
			  None -> url.url_path
			| Some p -> enc_list p);
      url_param    = (match param with
			  None -> url.url_param
			| Some p -> enc_list p);
      url_query    = if query    = None then url.url_query    else enc query;
      url_fragment = if fragment = None then url.url_fragment else enc fragment;
      url_other    = if other    = None then url.url_other    else enc other;
    }
  in
  (* Check whether the URL conforms to the syntax:
   *)
  if not (url_conforms_to_syntax url') then raise Malformed_URL;
  if url'.url_password <> None && url'.url_user = None then raise Malformed_URL;
  if url'.url_user <> None && url'.url_host = None then raise Malformed_URL;
  if url'.url_port <> None && url'.url_host = None then raise Malformed_URL;
  (* Check every part: *)
  check_string url'.url_scheme   scheme_cats;
  check_string url'.url_user     login_cats;
  check_string url'.url_password login_cats;
  check_string url'.url_host     host_cats;
  (match url'.url_port with 
       None -> ()
     | Some p -> if p < 0 || p > 65535 then raise Malformed_URL
  );
  let path_cats  = path_cats_from_syntax  new_syntax [] in
  let other_cats = other_cats_from_syntax new_syntax in
  check_string url'.url_query    path_cats;
  check_string url'.url_fragment path_cats;
  check_string url'.url_other    other_cats;
  (* Check the lists: *)
  check_string_list url'.url_param path_cats ';';
  check_string_list url'.url_path  path_cats '/';
  (* Further path checks: *)
  begin match url'.url_path with
      [] ->
	(* The path is empty: There must not be a 'param' or 'query' *)
	if url'.url_host <> None then begin
	  if url'.url_param <> [] then raise Malformed_URL;
	  if url'.url_query <> None then raise Malformed_URL;
	end
    | ["";""] ->
	(* This is illegal. *)
	raise Malformed_URL;
    | "" :: p' ->
	(* The path is absolute: always ok *)
	()
    | _ ->
	(* The path is relative: there must not be a host *)
	if url'.url_host <> None then raise Malformed_URL;
  end;
  begin match url'.url_path with
      _ :: rest ->              (* "//" ambiguity *)
	begin match List.rev rest with
	    _ :: rest' -> 
	      if List.exists (fun p -> p = "") rest' then
		raise Malformed_URL;
	  | [] ->
	      ()
	end
    | [] ->
	()
  end;
  (* Cache that the URL is valid: *)
  url'.url_validity <- true;

  url'
;;


let null_url =
  { 
    url_syntax   = null_url_syntax;
    url_validity = true;
    url_scheme   = None;
    url_user     = None;
    url_password = None;
    url_host     = None;
    url_port     = None;
    url_path     = [];
    url_param    = [];
    url_query    = None;
    url_fragment = None;
    url_other    = None;
  }
;;


let make_url
      ?(encoded = false)
      ?scheme
      ?user
      ?password
      ?host
      ?port
      ?path
      ?param
      ?query
      ?fragment
      ?other
      url_syntax
  =

  if not (url_syntax_is_valid url_syntax) then
    invalid_arg "Neturl.make_url";

  modify_url
    ~encoded:encoded
    ~syntax:url_syntax
    ?scheme:scheme
    ?user:user
    ?password:password
    ?host:host
    ?port:port
    ?path:path
    ?param:param
    ?query:query
    ?fragment:fragment
    ?other:other
    null_url
;;


let remove_from_url
      ?(scheme = false)
      ?(user = false)
      ?(password = false)
      ?(host = false)
      ?(port = false)
      ?(path = false)
      ?(param = false)
      ?(query = false)
      ?(fragment = false)
      ?(other = false)
      url
  =

  make_url
    ~encoded:  true
    ?scheme:   (if scheme   then None else url.url_scheme)
    ?user:     (if user     then None else url.url_user)
    ?password: (if password then None else url.url_password)
    ?host:     (if host     then None else url.url_host)
    ?port:     (if port     then None else url.url_port)
    ?path:     (if path     then None else Some url.url_path)
    ?param:    (if param    then None else Some url.url_param)
    ?query:    (if query    then None else url.url_query)
    ?fragment: (if fragment then None else url.url_fragment)
    ?other:    (if other    then None else url.url_other)
    url.url_syntax
;;


let default_url
      ?(encoded = false)
      ?scheme
      ?user
      ?password
      ?host
      ?port
      ?(path = [])
      ?(param = [])
      ?query
      ?fragment
      ?other
      url
  =

  let encode = Netencoding.Url.encode ~plus:true in

  let enc x =
    if encoded then
      x
    else
      match x with
	  None -> None
	| Some x' -> Some (encode x')
  in

  let enc_list l = 
    if encoded then
      l
    else
      List.map encode l 
  in

  let pass_if_missing current arg =
    match current with
	None -> arg
      | _    -> current
  in

  make_url
    ~encoded:  true
    ?scheme:   (pass_if_missing url.url_scheme   scheme)
    ?user:     (pass_if_missing url.url_user     (enc user))
    ?password: (pass_if_missing url.url_password (enc password))
    ?host:     (pass_if_missing url.url_host     host)
    ?port:     (pass_if_missing url.url_port     port)
    ~path:     (if url.url_path  = [] then enc_list path  else url.url_path)
    ~param:    (if url.url_param = [] then enc_list param else url.url_param)
    ?query:    (pass_if_missing url.url_query    (enc query))
    ?fragment: (pass_if_missing url.url_fragment (enc fragment))
    ?other:    (pass_if_missing url.url_other    (enc other))
    url.url_syntax
;;


let undefault_url
      ?scheme
      ?user
      ?password
      ?host
      ?port
      ?path
      ?param
      ?query
      ?fragment
      ?other
      url
  =

  let remove_if_matching current arg =
    match current with
	None -> None
      | Some x -> 
	  (match arg with
	       None -> current
	     | Some x' ->
		 if x=x' then
		   None
		 else
		   current)
  in

  make_url
    ~encoded:  true
    ?scheme:   (remove_if_matching url.url_scheme   scheme)
    ?user:     (remove_if_matching url.url_user     user)
    ?password: (remove_if_matching url.url_password password)
    ?host:     (remove_if_matching url.url_host     host)
    ?port:     (remove_if_matching url.url_port     port)
    ~path:     (match path with
		     None -> url.url_path
		   | Some x ->
		       if x = url.url_path then
			 []
		       else
			 url.url_path)
    ~param:    (match param with
		     None -> url.url_param
		   | Some x ->
		       if x = url.url_param then
			 []
		       else
			 url.url_param)
    ?query:    (remove_if_matching url.url_query    query)
    ?fragment: (remove_if_matching url.url_fragment fragment)
    ?other:    (remove_if_matching url.url_other    other)
    url.url_syntax
;;


let url_provides 
      ?(scheme = false)
      ?(user = false)
      ?(password = false)
      ?(host = false)
      ?(port = false)
      ?(path = false)
      ?(param = false)
      ?(query = false)
      ?(fragment = false)
      ?(other = false)
      url
  =
  
  (scheme   => (url.url_scheme   <> None)) &
  (user     => (url.url_user     <> None)) &
  (password => (url.url_password <> None)) &
  (host     => (url.url_host     <> None)) &
  (port     => (url.url_port     <> None)) &
  (path     => (url.url_path     <> []))   &
  (param    => (url.url_param    <> [])) &
  (query    => (url.url_query    <> None)) &
  (fragment => (url.url_fragment <> None)) &
  (other    => (url.url_other    <> None))
;;
  

let return_if value =
  match value with
      None -> raise Not_found
    | Some x -> x
;;


let decode_if want_encoded value =
  let value' = return_if value in
  if want_encoded then
    value'
  else
    Netencoding.Url.decode value'     (* WARNING: not thread-safe! *)
;;


let decode_path_if want_encoded value =
  if want_encoded then
    value
  else
    List.map (Netencoding.Url.decode ~plus:true) value 
        (* WARNING: not thread-safe! *)
;;


let url_scheme                    url = return_if url.url_scheme;;
let url_user     ?(encoded=false) url = decode_if encoded url.url_user;;
let url_password ?(encoded=false) url = decode_if encoded url.url_password;;
let url_host                      url = return_if url.url_host;;
let url_port                      url = return_if url.url_port;;
let url_path     ?(encoded=false) url = decode_path_if encoded url.url_path;;
let url_param    ?(encoded=false) url = decode_path_if encoded url.url_param;;
let url_query    ?(encoded=false) url = decode_if encoded url.url_query;;
let url_fragment ?(encoded=false) url = decode_if encoded url.url_fragment;;
let url_other    ?(encoded=false) url = decode_if encoded url.url_other;;


let string_of_url url =
  if not (url.url_validity) then
    failwith "Neturl.string_of_url: URL not flagged as valid";
  (match url.url_scheme with
       None -> ""
     | Some s -> s ^ ":") ^ 
  (match url.url_host with
       None -> ""
     | Some host ->
	 "//" ^ 
	 (match url.url_user with
	      None -> "" 
	    | Some user -> 
		user ^ 
		(match url.url_password with
		     None -> ""
		   | Some password ->
		       ":" ^ password 
		) ^ 
		"@") ^ 
	 host ^ 
	 (match url.url_port with
	      None -> ""
	    | Some port ->
		":" ^ string_of_int port)) ^ 
  (match url.url_path with
     | [""] ->
	 "/"
     | x :: p  when  url.url_scheme = None &&
                     url.url_host = None &&
	             String.contains x ':' 
	->
	  (* Really a special case: The colon contained in 'x' may cause
	   * that a prefix of 'x' is interpreted as URL scheme. In this
	   * case, "./" is prepended (as recommended in RFC 1808, 5.3).
	   *)
	  "./"
     | _ ->
	 ""
  ) ^
  String.concat "/" url.url_path ^ 
  (match url.url_other with
       None -> ""
     | Some other ->
	 other) ^ 
  String.concat ""  (List.map (fun s -> ";" ^ s) url.url_param) ^ 
  (match url.url_query with
       None -> ""
     | Some query ->
	 "?" ^ query) ^ 
  (match url.url_fragment with
       None -> ""
     | Some fragment ->
	 "#" ^ fragment)
;;


let url_of_string url_syntax s =
  let l = String.length s in
  let recognized x = x <> Url_part_not_recognized in

  let rec collect_words terminators eof_char cats k =
    (* Collect words as recognized by 'cats', starting at position 'k' in
     * 's'. Collection stops if one the characters listed in 'terminators'
     * is found. If the end of the string is reached, it is treated as
     * 'eof_char'.
     *)
    let k' = scan_url_part s k l cats url_syntax.url_accepts_8bits in  
             (* or raise Malformed_URL *)
    let word, sep =
      String.sub s k (k'-k), (if k'<l then s.[k'] else eof_char) in
    if List.mem sep terminators then
      [word, sep], k'
    else
      let word_sep_list', k'' = 
	collect_words terminators eof_char cats (k'+1) in
      ((word, sep) :: word_sep_list'), k''
  in

  (* Try to extract the scheme name: *)
  let scheme, k1 =
    if recognized url_syntax.url_enable_scheme then
      try
	let k = scan_url_part s 0 l scheme_cats false in
        (* or raise Malformed_URL *)
	if k = l then raise Malformed_URL;
	assert (s.[k] = ':');
	Some (String.sub s 0 k), (k+1)
      with
	  Malformed_URL -> None, 0
    else
      None, 0
  in

  (* If there is a "//", a host will follow: *)
  let host, port, user, password, k2 =
    if recognized url_syntax.url_enable_host  &&
       k1 + 2 <= l  &&  s.[k1]='/'  && s.[k1+1]='/' then begin

      let word_sep_list, k' = collect_words [ '/'; '#' ] '/' login_cats (k1+2) 
      in
          (* or raise Malformed_URL *)

      let int x =
	try int_of_string x with _ -> raise Malformed_URL in

      match word_sep_list with
	  [ host, ('/'|'#') ] ->
	    Some host, None, None, None, k'
	| [ host, ':'; port, ('/'|'#') ] ->
	    Some host, Some (int port), None, None, k'
	| [ user, '@'; host, ('/'|'#') ] ->
	    Some host, None, Some user, None, k'
	| [ user, '@'; host, ':'; port, ('/'|'#') ] ->
	    Some host, Some (int port), Some user, None, k'
	| [ user, ':'; password, '@'; host, ('/'|'#') ] ->
	    Some host, None, Some user, Some password, k'
	| [ user, ':'; password, '@'; host, ':'; port, ('/'|'#') ] ->
	    Some host, Some (int port), Some user, Some password, k'
	| _ ->
	    raise Malformed_URL
    end
    else
      None, None, None, None, k1
  in

  let path, k3 =
    if recognized url_syntax.url_enable_path  &&
       k2 < l  (*  &&  s.[k2]='/'  *)
    then begin
      let cats = path_cats_from_syntax url_syntax [ '/' ] in
      let seps = separators_from_syntax url_syntax in

      (* Note: '>' is not allowed within URLs; because of this we can use
       * it as end-of-string character.
       *)

      let word_sep_list, k' = collect_words ('>'::seps) '>' cats k2 in
          (* or raise Malformed_URL *)
      match word_sep_list with
	  [ "", '/'; "", _ ] ->
	    [ "" ], k'
	| [ "", _ ] ->
	    [], k'
	| _ ->
	    List.map fst word_sep_list, k'
    end
    else begin
      (* If there is a single '/': skip it *)
      if not (recognized url_syntax.url_enable_other) &&
	 k2 < l  &&  s.[k2]='/'
      then
	[], (k2+1)
      else
	[], k2
    end
  in

  let other, k4 =
    if recognized url_syntax.url_enable_other  &&
       k3 < l 
    then begin
      
      let cats = other_cats_from_syntax url_syntax in

      (* Note: '>' is not allowed within URLs; because of this we can use
       * it as end-of-string character.
       *)

      let word_sep_list, k' = collect_words ['>';'#'] '>' cats k3 in
          (* or raise Malformed_URL *)

      match word_sep_list with
	  [ other, _ ] -> Some other, k'
	| _ -> assert false
    end
    else
      None, k3
  in

  let param, k5 =
    if recognized url_syntax.url_enable_param  &&
       k4 < l  &&  s.[k4]=';' 
    then begin
      let cats  = path_cats_from_syntax url_syntax [] in
      let seps  = separators_from_syntax url_syntax in
      let seps' = List.filter (fun c -> c <> ';') seps in

      (* Note: '>' is not allowed within URLs; because of this we can use
       * it as end-of-string character.
       *)

      let word_sep_list, k' = collect_words ('>'::seps') '>' cats (k4+1) in
          (* or raise Malformed_URL *)
      
      List.map fst word_sep_list, k'
    end
    else
      [], k4
  in

  let query, k6 =
    if recognized url_syntax.url_enable_query  &&
       k5 < l  &&  s.[k5]='?'
    then begin
      let cats  = path_cats_from_syntax url_syntax [] in
      let seps  = separators_from_syntax url_syntax in
      
      (* Note: '>' is not allowed within URLs; because of this we can use
       * it as end-of-string character.
       *)

      let word_sep_list, k' = collect_words ('>'::seps) '>' cats (k5+1) in
          (* or raise Malformed_URL *)

      match word_sep_list with
	  [ query, _ ] -> Some query, k'
	| _ -> assert false
    end
    else
      None, k5
  in

  let fragment, k7 =
    if recognized url_syntax.url_enable_fragment  &&
       k6 < l  &&  s.[k6]='#'
    then begin
      let cats  = path_cats_from_syntax url_syntax [] in
      let seps  = separators_from_syntax url_syntax in
      
      (* Note: '>' is not allowed within URLs; because of this we can use
       * it as end-of-string character.
       *)

      let word_sep_list, k' = collect_words ('>'::seps) '>' cats (k6+1) in
          (* or raise Malformed_URL *)

      match word_sep_list with
	  [ fragment, _ ] -> Some fragment, k'
	| _ -> assert false
    end
    else
      None, k6
  in

  if k7 <> l then raise Malformed_URL;

  make_url
    ~encoded:true
    ?scheme:scheme
    ?user:user
    ?password:password
    ?host:host
    ?port:port
    ~path:path
    ~param:param
    ?query:query
    ?fragment:fragment
    ?other:other
    url_syntax
;;


let split_path s =
  let l = String.length s in
  let rec collect_words k =
    let k' = 
      try
	String.index_from s k '/'
      with
	  Not_found -> l
    in
    let word = String.sub s k (k'-k) in
    if k' >= l then
      [word]
    else
      word :: collect_words (k'+1)
  in
  match collect_words 0 with
      [ "" ] -> []
    | [ "";"" ] -> [ "" ]
    | other -> other
;;


let join_path l = 
  match l with
      [ "" ] -> "/"
    | _      -> String.concat "/" l;;


let norm_path l = 

  let rec remove_slash_slash l first =
    match l with
      | [ "" ] ->
	  [ "" ]
      | [ ""; "" ] when first ->
	  [ "" ]
      | "" :: l' when not first ->
	  remove_slash_slash l' false
      | x :: l' ->
	  x :: remove_slash_slash l' false
      | [] ->
	  []
  in

  let rec remove_dot l first =
    match l with
      | ([ "." ] | ["."; ""]) ->
	  if first then [] else [ "" ]
      |	"." :: x :: l' ->
	  remove_dot (x :: l') false
      | x :: l' ->
	  x :: remove_dot l' false
      | [] ->
	  []
  in

  let rec remove_dot_dot_once l first =
    match l with
	x :: ".." :: [] when x <> "" && x <> ".." && not first ->
	  [ "" ]
      |	x :: ".." :: l' when x <> "" && x <> ".." ->
	  l'
      | x :: l' ->
	  x :: remove_dot_dot_once l' false
      | [] ->
	  raise Not_found
  in

  let rec remove_dot_dot l =
    try
      let l' = remove_dot_dot_once l true in
      remove_dot_dot l'
    with
	Not_found -> l
  in

  let l' = remove_dot_dot (remove_dot (remove_slash_slash l true) true) in
  match l' with
      [".."] -> [".."; ""]
    | ["";""] -> [ "" ]
    | _      -> l'
;;


let apply_relative_url baseurl relurl =
  if not (baseurl.url_validity) or not (relurl.url_validity) then
    failwith "Neturl.apply_relative_url: URL not flagged as valid";

  if relurl.url_scheme <> None then
    modify_url 
      ~syntax:baseurl.url_syntax           (* inherit syntax *)
      relurl
  else
    if relurl.url_host <> None then
      modify_url 
	~syntax:baseurl.url_syntax         (* inherit syntax and scheme *)
	?scheme:baseurl.url_scheme
	relurl
    else
      match relurl.url_path with
	  "" :: other ->
	    (* An absolute path *)
	    modify_url 
	      ~syntax:baseurl.url_syntax   (* inherit syntax, scheme, and *)
	      ~encoded:true
	      ?scheme:baseurl.url_scheme   (* login info *)
	      ?host:baseurl.url_host
	      ?port:baseurl.url_port
	      ?user:baseurl.url_user
	      ?password:baseurl.url_password
	      relurl
	| [] ->
	    (* Empty: Inherit also path, params, query, and fragment *)
	    let new_params, new_query, new_fragment =
	      match relurl.url_param, relurl.url_query, relurl.url_fragment
	      with
		  [], None, None ->
		    (* Inherit all three *)
		    baseurl.url_param, baseurl.url_query, baseurl.url_fragment
		| [], None, f ->
		    (* Inherit params and query *)
		    baseurl.url_param, baseurl.url_query, f
		| [], q, f ->
		    (* Inherit params *)
		    baseurl.url_param, q, f
		| p, q, f ->
		    (* Inherit none of them *)
		    p, q, f
	    in
	    modify_url 
	      ~syntax:baseurl.url_syntax
	      ~encoded:true
	      ?scheme:baseurl.url_scheme
	      ?host:baseurl.url_host
	      ?port:baseurl.url_port
	      ?user:baseurl.url_user
	      ?password:baseurl.url_password
	      ~path:baseurl.url_path
	      ~param:new_params
	      ?query:new_query
	      ?fragment:new_fragment
	      relurl
	| relpath ->
	    (* A relative path *)
	    let rec change_path basepath =
	      match basepath with
		| [] ->
		    relpath
		| [ "" ] ->
		    "" :: relpath
		| [ x ] ->
		    relpath
		| x :: basepath' ->
		    x :: change_path basepath'
	    in
	    let new_path = norm_path (change_path baseurl.url_path) in
	    modify_url 
	      ~syntax:baseurl.url_syntax   (* inherit syntax, scheme, and *)
	      ~encoded:true
	      ?scheme:baseurl.url_scheme   (* login info *)
	      ?host:baseurl.url_host
	      ?port:baseurl.url_port
	      ?user:baseurl.url_user
	      ?password:baseurl.url_password
	      ~path:new_path               (* and change path *)
	      relurl

;;


let print_url url =
  Format.print_string ("<URL:" ^ string_of_url url ^ ">")
;;


(* ======================================================================
 * History:
 * 
 * $Log: neturl.ml,v $
 * Revision 2.1  2001/09/14 14:22:34  stolpmann
 * 	Initial revision (sourceforge)
 *
 *
 * ======================================================================
 * Revision 1.6  2001/08/30 19:46:16  gerd
 * 	Follow-up.
 *
 * Revision 1.5  2001/08/21 21:33:16  gerd
 * 	Fix: apply_relative_url works now for the case that the
 * baseurl is absolute with path = "/", and relurl is relative, e.g.
 * baseurl = file://host/, relurl = file.
 *
 * Revision 1.4  2000/07/04 21:50:51  gerd
 * 	Fixed typo.
 *
 * Revision 1.3  2000/06/26 22:57:49  gerd
 * 	Change: The record 'url_syntax' has an additional component
 * 'url_accepts_8bits'. Setting this option to 'true' causes that
 * the bytes >= 0x80 are no longer rejected.
 *
 * Revision 1.2  2000/06/25 19:39:48  gerd
 * 	Lots of Bugfixes.
 *
 * Revision 1.1  2000/06/24 20:19:59  gerd
 * 	Initial revision.
 *
 * 
 *)
