########################################################################
#
# File Name:            Uri.py
#
# Documentation:        http://docs.ftsuite.com/4Suite/Uri.py.html
#
"""
WWW: http://4suite.org/4Suite         e-mail: support@4suite.org

Copyright (c) 2000-2001 Fourthought Inc., USA.   All Rights Reserved.
See  http://4suite.org/COPYRIGHT  for license and copyright information
"""

import string, urlparse, urllib, os
from Ft.Lib import Uuid

class BaseUriResolver:
    def normalize(self, uri, base=''):
        #scheme, netloc, path, params, query, fragment
        scheme = urlparse.urlparse(uri)[0]
        if scheme in ['', 'http', 'ftp', 'file', 'gopher']:
            uri = urllib.basejoin(base, uri)
        return uri

    def resolve(self, uri, base=''):
        uri = self.normalize(uri, base)
        if os.access(uri, os.F_OK):
            #Hack because urllib breaks on Windows paths
            stream = open(uri)
        else:
            stream = urllib.urlopen(uri)
        return stream

    def generate(self, hint=None):
        """
        The hint is an object that helps decide what to generate
        Default action is to generate a UUID URN
        """
        return 'urn:uuid:'+Uuid.UuidAsString(Uuid.GenerateUuid())


BASIC_RESOLVER = BaseUriResolver()


import os, urlparse, urllib

def FileNameToUrl(fname):
    urlbase = urlparse.urlunparse(("file", '', os.getcwd()+'/', '', '', ''))
    url = urllib.basejoin(urlbase, fname)
    return url


##From: "Mark D. Anderson" <mda@discerning.com>
##To: <xml-sig@python.org>
##Subject: [XML-SIG] file urls in urllib
##Date: Wed, 7 Mar 2001 10:25:56 -0800 (11:25 MST)

##(was: "saxlib, xml, _xmlplus, etc.")

##Martin v. Loewis says:
##> Likely, there should be, yes - but there appears to be no expert that
##> can say for sure what the "right way" is. 

##true enough. a lot has happened since rfc1738.

##>In any case, you'll need to
##> pass URLs to urllib, and as system identifiers to XML libraries. On
##> Unix, passing file names should "work" in most cases; on Windows,
##> things are a bit more complicated.

##and unfortunately often the effort to make the unix case "work" makes the
##windows case work less often. i've had the same difficulty with various java tools.
##they check for a leading slash or a "^\w:" match to determine whether the 
##string which is passed in is a uri or a host path.

##> If you can give a consistent story of how things *should* work, I'll
##> start a FAQ list (since your message is the third instance of this
##> question during this year - which makes it frequent :-). Out of
##> curiosity: how do you interpret RFC 1738 with regard to drive letters?
##> I.e. what is the URL referring to C:\autoexec.bat?

##it really is a morass. here are some notes which mostly just serve to clarify how awful it is....

##rfc 1738 states:

##   A file URL takes the form:
##       file://<host>/<path>
##   where <host> is the fully qualified domain name of the system on
##   which the <path> is accessible, and <path> is a hierarchical
##   directory path of the form <directory>/<directory>/.../<name>.
##   [...]
##   As a special case, <host> can be the string "localhost" or the empty
##   string; this is interpreted as `the machine from which the URL is
##   being interpreted'.


##So this would mean that if localhost is implied, all file urls should have (at least) three slashes.
##Assuming that the rfc means that the "/" is purely syntactic, what you should expect to work is:
##   file:////etc/passwd        (4 slashes, because of the leading "/")
##   file:///c:\autoexec.bat
##   file:///\\drv\autoexec.bat
##   file://///drv/autoexec.bat       (5 slashes, since forward slashes work on win32 too)

##but:
##- there is sometimes the convention (not rfc that i know of) of allowing "|" for ":"
##- there is sometimes the convention (not rfc that i know of) of allowing file:<path> without the 3 slashes
##- most software gives unhelpful errors if someone attempts to specify a host in the file url
##- relative urls (i.e. without a scheme; see rfcs 1808 and 2396) complicate matters; in particular
##  they indicate that absolute urls are signaled with a leading slash, suggesting "/c:/autoexec.bat",
##  which rarely works in any software.
##- existing software usually treats the url "/" before the path to be part of the path, using 3 slashes, not 4
##  and in particular most url libraries return the leading slash in their path() function, and CGI variables
##  like SCRIPT_PATH usually do too. It seems clear though that the original intent was for the path
##  to not include the url syntactic separator, and in fact the rfc for NFS urls (rfc2224) makes this explicit

##personally, what i'd suggest is:
##1. RFC-compliant urls must be handled. 
##2. Any code which attempts to accept a string which may be either a url or a local path,
##should be as flexible on win32 as unix. That is, if the code accepts "/etc/passwd", it should
##also accept "c:/autoexec.bat", even though "c:" might be mistaken as a url scheme.
##there is zero chance of a single-letter url scheme being standardized, and anyway it actually
##isn't ambiguous because win32 paths are never of the form "c://", so the double slash can
##distinguish things.
##3. When not introducing conflicts with current standards or other platforms, software should
##match the defacto behavior of internet explorer when parsing file: urls.
##4. URL libraries must at least document what they choose to return as the path for the strings
##   file:, http://localhost, file:/, http://localhost/

##Today, python urllib is not doing any of these, rejecting file:///c:\autoexec.bat and c:/autoexec.bat

