# A store abstraction: basically we care about
# the classes 'story' and 'category', and the
# relation 'contains' (category->category|story)

from sets import Set

from resources import *

# A store of RDF triples.

class AbstractStore:

    # Return a set of object for which there is an asserted triple
    # (subject, predicate, object)
    # in the store.
    def queryForObjects(self, subject, predicate):
        raise "AbstractStore: not implemented"

    # Return a set of subject for which there is an asserted triple
    # (subject, predicate, object)
    # in the store.
    def queryForSubjects(self, predicate, object):
        raise "AbstractStore: Not implemented"

    def subjects(self):
        raise "AbstractStore: Not implemented"

    # Assert a relationship between three er, things
    # Technically the subject and predicate must be resources,
    # but we trust people =)
    def relate(self, subject, predicate, object):
        raise "AbstractStore: Not implemented"
    

# We leave the subjects() and _load*() methods to be implemented by
# subclasses; the rest is generic enough.
class BaseStore(AbstractStore):

    class Map:
        def __init__(self, map):
            self._map = map
            self._loaded = 0

    def __init__(self):
        self._subjects = {}
        self._objects = {}
        self._loadSchema()

    # All of the things we know about our types, schema-wise
    def _loadSchema(self):
        pass

    def _subjectMap(self, subject):
        return self._subjects.setdefault(subject, BaseStore.Map({}))

    def _objectMap(self, object):
        return self._objects.setdefault(object, BaseStore.Map({}))

    # Return a Map for this subject with an internal map of:
    # predicate -> object list
    def _loadSubjectMap(self, subject):
        return self._subjectMap(subject)

    # Return a Map for this object with an internal map of:
    # predicate: -> subject list
    def _loadObjectMap(self, object):
        return self._objectMap(object)

    # Return a set of object for which there is an asserted triple
    # (subject, predicate, object)
    # in the store.
    def queryForObjects(self, subject, predicate):
        #print "Query: (%s, %s, <object>)" % (subject, predicate)
        map = self._loadSubjectMap(subject)._map
        return map.get(predicate, Set([]))

    # Return a set of subject for which there is an asserted triple
    # (subject, predicate, object)
    # in the store.
    def queryForSubjects(self, predicate, object):
        #print "Query: (<subject>, %s, %s)" % (predicate, object)
        map = self._loadObjectMap(object)._map
        return map.get(predicate, Set([]))

    def subjects(self):
        return self._subjects.iterkeys()

    # Assert a relationship between three er, things
    # Technically the subject and predicate must be resources,
    # but we trust people =)
    def relate(self, subject, predicate, object):
        #print "relate: %s %s %s" % (subject, predicate, object)
        s = self._subjectMap(subject)._map
        if s.has_key(predicate):
            s[predicate].add(object)
        else:
            s[predicate] = Set([object])
        o = self._objectMap(object)._map
        if o.has_key(predicate):
            o[predicate].add(subject)
        else:
            o[predicate] = Set([subject])


# A store that aggregates results from (usually) more than one other
# store.

class AggregatedStore (BaseStore):

    def __init__(self, *stores):
        BaseStore.__init__(self)
        self._stores = list(stores)

    def addStore(self, store):
        self._stores.append(store)

    def queryForObjects(self, subject, predicate):
        return reduce(lambda r, s: r | s.queryForObjects(subject, predicate),
                      self._stores,
                      BaseStore.queryForObjects(self, subject, predicate))

    def queryForSubjects(self, predicate, object):
        return reduce(lambda r, s: r | s.queryForSubjects(predicate, object),
                      self._stores,
                      BaseStore.queryForSubjects(self, predicate, object))

    def subjects(self):
        # Keep a running set of the results, so that
        # we don't return one more than once
        already = {}
        for s in BaseStore.subjects(self):
            already[s] = 1
            yield s
        for store in self._stores:
            for s in store.subjects():
                if not already.has_key(s):
                    already[s] = 1
                    yield s


import os
from urllib import pathname2url, url2pathname
from rfc822 import Message # So we don't conflict with our prefix below

# For MIME headers
rfc822 = Prefix("MIME:")
stat = Prefix('stat:')

# True if the name isn't dotted (hidden by the naming convention of
# starting with a '.'), used by GNU/Arch or CVS.
def isNotDottedCVSorArch(dirname):
    return not (dirname.startswith('.') or dirname=='CVS' or dirname=='{arch}')

def extensionOf(path):
    dot = path.rfind('.')
    return dot > -1 and path[dot+1:] or ''

# %%% TODO Make this take a header -> property map
def loadRFC822File(subject, path, store):
    try:
        s = os.stat(path)
    except OSError:
        return
    
    store.relate(subject, stat.mtime, int(s.st_mtime))
    filename = os.path.basename(path)
    name = filename[:- (len(extensionOf(filename)) + 1)]
    store.relate(subject, stat.name, name)
    
    f = open(path)
    headers = Message(f)
    for (key, val) in headers.items():
        store.relate(subject, getattr(rfc822, key.lower()), val)
    store.relate(subject, rfc822.body, f.read())
    f.close()

TEXT_EXTENSION_MAP = {'txt': (fuschia.story, loadRFC822File,)}

# A class that knows about how to interpret files as stories.
class FileStoryFactory :

    # extensionMap is a dictionary of
    # file extension -> (rdf:type, factory procedure)
    def __init__(self, extensionMap = TEXT_EXTENSION_MAP):
        typemap = {}
        factorymap = {}
        for key in extensionMap.keys():
            (type, factory) = extensionMap[key]
            typemap[key] = type
            factorymap[key] = factory
        self._typemap = typemap
        self._factorymap = factorymap

    def isKnownExtension(self, extension):
        return extension in self.knownExtensions()

    def knownExtensions(self):
        return self._typemap.keys()

    def knownTypes(self):
        return self._typemap.values()

    def isKnownType(self, type_resource):
        return type_resource in self.knownTypes()

    def loadFileProperties(self, subject, path, store):
        ext = extensionOf(path)
        if self._factorymap.has_key(ext):
            factory = self._factorymap[ext]
            factory(subject, path, store)
        else:
            pass # Read my lips: no new properties!

    def typeOf(self, ext):
        return self._typemap.get(ext, None) # or UNKNOWN_TYPE maybe?

# A store that gets its subjects from the filesystem somewhere, treating directories
# as categories and files as stories
class FilesystemStore (BaseStore):
    
    # fsroot is a directory path of the top-level directory to look in;
    # rootURL is the URL mapped to the fsroot
    def __init__(self, fsroot, rootURL, factory = FileStoryFactory(), dirfilter=isNotDottedCVSorArch):
        BaseStore.__init__(self)
        self._root = fsroot
        while rootURL.endswith('/'):
            rootURL = rootURL[:-1]
        self._prefix = rootURL
        self._factory = factory
        self._filter = dirfilter
        self._urls = None

    def subjects(self):
        filt = self._filter

        if self._urls is None:
            urls = [Resource(self._toURL(self._root))]
            for dir, dirs, files in os.walk(self._root):
                for d in list(dirs): # copy so we don't upset iteration
                    if self._isCategoryFilename(d):
                        urls.append(Resource(self._toURL(os.path.join(dir, d, ''))))
                    else:
                        dirs.remove(d)
                for f in files:
                    if self._isStoryFilename(f):
                        urls.append(Resource(self._toURL(os.path.join(dir, f))))
            self._urls = urls
        return self._urls.__iter__()

    def _loadSubjectMap(self, subject):
        if not isinstance(subject, Resource):
            raise "A subject must be a Resource"

        sm = self._subjectMap(subject)
        if sm._loaded:
            return sm

        path = self._toPath(subject.uri())

        if self._isStory(path):
            factory = self._factory
            ext = extensionOf(path)
            self.relate(subject, rdf.type, factory.typeOf(ext))
            factory.loadFileProperties(subject, path, self)
            
        elif self._isCategory(path):
            self.relate(subject, rdf.type, fuschia.category)
            for f in os.listdir(path):
                if self._isSubjectFilename(f):
                    self.relate(subject, fuschia.contains, Resource(self._toURL(os.path.join(path, f))))
        sm._loaded = 1
        return sm

    # A more relaxed check on storyness, for when we already know where it is
    def _isStoryFilename(self, filename):
        return self._factory.isKnownExtension(extensionOf(filename))

    def _isStory(self, path):
        return path.startswith(self._root) and \
               os.path.isfile(path) and \
               self._isStoryFilename(os.path.basename(path))

    def _isCategoryFilename(self, filename):
        return self._filter(filename)

    def _isCategory(self, path):
        return path.startswith(self._root) and \
               os.path.isdir(path) and \
               self._isCategoryFilename(os.path.basename(path))

    def _isSubjectFilename(self, filename):
        return  self._isCategoryFilename(filename) or self._isStoryFilename(filename)
    
    def _loadTypes(self):
        for thing in self.subjects():
            p = self._toPath(thing.uri())
            # We ought not to have anything in subjects that
            # isn't one of these, but I'm going to check
            # explicitly anyway
            if self._isCategory(p):
                self.relate(thing, rdf.type, fuschia.category)
            else:
                ext = extensionOf(p)
                if self._factory.isKnownExtension(ext):
                    self.relate(thing, rdf.type, self._factory.typeOf(ext))
                else:
                    raise "Found a subject that is not a known type: %s" % thing

    def _loadObjectMap(self, object):
        om = self._objectMap(object)
        if om._loaded:
            return om

        if isinstance(object, Resource):
            if self._factory.isKnownType(object):
                # Damn, we have to find out everything
                self._loadTypes()

            # %%% TODO Here's where we might check to see if we're asked
            # about some other property we know about (somehow)
            #
            # elif ...:
            #   ...
            
            else: # We definitely know about CONTAINS -- find whatever contains object
                path = self._toPath(object.uri())
                if self._isSubjectFilename(os.path.basename(path)):
                    parent = os.path.dirname(path)
                    if self._isCategory(parent):
                        self.relate(Resource(self._toURL(parent)), fuschia.contains, object)

        om._loaded = 1
        return om
    
    # convert a path including the fs root given into a URL
    def _toURL(self, path):
        relpath = ''
        
        if self._isStory(path):
            relpath = path[len(self._root):]
            relpath = relpath[: - (len(extensionOf(relpath)) + 1)]
        elif self._isCategory(path):
            relpath = path[len(self._root):]
            if not relpath.endswith('/'): relpath += "/"
        else: return ''
        
        while relpath.startswith('/'):
            relpath = relpath[1:]
        return "%s/%s" % (self._prefix, pathname2url(relpath))

    # Convert a URL to a path including the fs root
    def _toPath(self, url):
        if not url.startswith(self._prefix):
            return ''
        else:
            relurl = url[len(self._prefix):]
            while relurl.startswith('/'):
                relurl = relurl[1:]
            path = os.path.join(self._root, url2pathname(relurl))
            
            if url.endswith('/') and self._isCategory(path):
                return os.path.normpath(path)
            else:
                # We need help from the story factory -- what extensions can this have?
                # %%% Should this work ther other way, from the factory? Concerns are
                # muddled.
                # Arbitrarily, just test in order of what we get back here
                for ext in self._factory.knownExtensions():
                    fullpath = '%s.%s' % (path, ext,)
                    if os.path.isfile(fullpath):
                        return fullpath
            raise "URL does not belong in this store: %s" % url
