MobileBlur

Artifact Content
Login

Artifact 8fc01ec0e689c7787aa7ee18f09f103222a31497:


#!/usr/bin/python
# -*- coding: latin-1 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation; either version 3, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY
# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.

"Simple XML manipulation"

__author__ = "Mariano Reingart (reingart@gmail.com)"
__copyright__ = "Copyright (C) 2008/009 Mariano Reingart"
__license__ = "LGPL 3.0"
__version__ = "1.02c"

import xml.dom.minidom
from decimal import Decimal
import datetime
import time

DEBUG = False

# Functions to serialize/unserialize special immutable types:
datetime_u = lambda s: datetime.datetime.strptime(s, "%Y-%m-%dT%H:%M:%S")
datetime_m = lambda dt: dt.isoformat('T')
date_u = lambda s: datetime.datetime.strptime(s[0:10], "%Y-%m-%d").date()
date_m = lambda d: d.strftime("%Y-%m-%d")
time_u = lambda s: datetime.datetime.strptime(s, "%H:%M:%S").time()
time_m = lambda d: d.strftime("%H%M%S")
bool_u = lambda s: {'0':False, 'false': False, '1': True, 'true': True}[s]

# aliases:
class Alias():
    def __init__(self, py_type, xml_type):
        self.py_type, self.xml_type = py_type, xml_type
    def __call__(self, value):
        return self.py_type(value)
    def __repr__(self):
        return "<alias '%s' for '%s'>" % (self.xml_type, self.py_type)

byte = Alias(str,'byte')
short = Alias(int,'short')
double = Alias(float,'double')
integer = Alias(long,'integer')
DateTime = datetime.datetime
Date = datetime.date
Time = datetime.time

# Define convertion function (python type): xml schema type
TYPE_MAP = {str:'string',unicode:'string',
            bool:'boolean', short:'short', byte:'byte',
            int:'int', long:'long', integer:'integer',
            float:'float', double:'double',
            Decimal:'decimal',
            datetime.datetime:'dateTime', datetime.date:'date',
            }
TYPE_MARSHAL_FN = {datetime.datetime:datetime_m, datetime.date:date_m,}
TYPE_UNMARSHAL_FN = {datetime.datetime:datetime_u, datetime.date:date_u,
                     bool:bool_u,
            }


class OrderedDict(dict):
    "Minimal ordered dictionary for xsd:sequences"
    def __init__(self):
        self.__keys = []
        self.array = False
    def __setitem__(self, key, value):
        if key not in self.__keys:
            self.__keys.append(key)
        dict.__setitem__(self, key, value)
    def insert(self, key, value, index=0):
        if key not in self.__keys:
            self.__keys.insert(index, key)
        dict.__setitem__(self, key, value)
    def __delitem__(self, key):
        if key in self.__keys:
            self.__keys.remove(key)
        dict.__delitem__(self, key)
    def __iter__(self):
        return iter(self.__keys)
    def keys(self):
        return self.__keys
    def items(self):
        return [(key, self[key]) for key in self.__keys]
    def update(self, other):
        for k,v in other.items():
            self[k] = v
        if isinstance(other, OrderedDict):
            self.array = other.array
    def __str__(self):
        return "*%s*" % dict.__str__(self)
    def __repr__(self):
        s= "*{%s}*" % ", ".join(['%s: %s' % (repr(k),repr(v)) for k,v in self.items()])
        if self.array and False:
            s = "[%s]" % s
        return s


class SimpleXMLElement(object):
    "Simple XML manipulation (simil PHP)"

    def __init__(self, text = None, elements = None, document = None, namespace = None, prefix=None):
        self.__ns = namespace
        self.__prefix = prefix
        if text:
            try:
                self.__document = xml.dom.minidom.parseString(text)
            except:
                if DEBUG: print text
                raise
            self.__elements = [self.__document.documentElement]
        else:
            self.__elements = elements
            self.__document = document

    def add_child(self,name,text=None,ns=True):
        "Adding a child tag to a node"
        if not ns or not self.__ns:
            if DEBUG: print "adding %s" % (name)
            element = self.__document.createElement(name)
        else:
            if DEBUG: print "adding %s ns %s %s" % (name, self.__ns,ns)
            if self.__prefix:
                element = self.__document.createElementNS(self.__ns, "%s:%s" % (self.__prefix, name))
            else:
                element = self.__document.createElementNS(self.__ns, name)
        if text:
            if isinstance(text, unicode):
                element.appendChild(self.__document.createTextNode(text))
            else:
                element.appendChild(self.__document.createTextNode(str(text)))
        self._element.appendChild(element)
        return SimpleXMLElement(
                    elements=[element],
                    document=self.__document,
                    namespace=self.__ns,
                    prefix=self.__prefix)

    def __setattr__(self, tag, text):
        "Add text child tag node (short form)"
        if tag.startswith("_"):
            object.__setattr__(self, tag, text)
        else:
            if DEBUG: print "__setattr__(%s,%s)" % (tag, text)
            self.add_child(tag,text)

    def add_comment(self, data):
        "Add an xml comment to this child"
        comment = self.__document.createComment(data)
        self._element.appendChild(comment)

    def as_xml(self,filename=None,pretty=False):
        "Return the XML representation of the document"
        if not pretty:
            return self.__document.toxml('UTF-8')
        else:
            return self.__document.toprettyxml(encoding='UTF-8')

    def __repr__(self):
        "Return the XML representation of this tag"
        return self._element.toxml('UTF-8')

    def get_name(self):
        "Return the tag name of this node"
        return self._element.tagName

    def get_local_name(self):
        "Return the tag loca name (prefix:name) of this node"
        return self._element.localName

    def get_prefix(self):
        "Return the namespace prefix of this node"
        return self._element.prefix

    def get_namespace_uri(self, ns):
        "Return the namespace uri for a prefix"
        v = self.__document.documentElement.attributes['xmlns:%s' % ns]
        return v.value

    def attributes(self):
        "Return a dict of attributes for this tag"
        #TODO: use slice syntax [:]?
        return self._element.attributes

    def __getitem__(self, item):
        "Return xml tag attribute value or a slice of attributes (iter)"
        if DEBUG: print "__getitem__(%s)" % item
        if isinstance(item,basestring):
            if self._element.hasAttribute(item):
                return self._element.attributes[item].value
        elif isinstance(item, slice):
            # return a list with name:values
            return self._element.attributes.items()[item]
        else:
            # return element by index (position)
            element = self.__elements[item]
            return SimpleXMLElement(
                    elements=[element],
                    document=self.__document,
                    namespace=self.__ns,
                    prefix=self.__prefix)

    def add_attribute(self, name, value):
        "Set an attribute value from a string"
        self._element.setAttribute(name, value)

    def __setitem__(self, item, value):
        "Set an attribute value"
        if isinstance(item,basestring):
            self.add_attribute(item, value)
        elif isinstance(item, slice):
            # set multiple attributes at once
            for k, v in value.items():
                self.add_attribute(k, v)

    def __call__(self, tag=None, ns=None, children=False, error=True):
        "Search (even in child nodes) and return a child tag by name"
        try:
            if tag is None:
                # if no name given, iterate over siblings (same level)
                return self.__iter__()
            if children:
                # future: filter children? by ns?
                return self.children()
            elements = None
            if isinstance(tag, int):
                # return tag by index
                elements=[self.__elements[tag]]
            if ns and not elements:
                for ns_uri in isinstance(ns, (tuple, list)) and ns or (ns, ):
                    if DEBUG: print "searching %s by ns=%s" % (tag,ns_uri)
                    elements = self._element.getElementsByTagNameNS(ns_uri, tag)
                    if elements:
                        break
            if self.__ns and not elements:
                if DEBUG: print "searching %s by ns=%s" % (tag, self.__ns)
                elements = self._element.getElementsByTagNameNS(self.__ns, tag)
            if not elements:
                if DEBUG: print "searching %s " % (tag)
                elements = self._element.getElementsByTagName(tag)
            if not elements:
                if DEBUG: print self._element.toxml()
                if error:
                    raise AttributeError("No elements found")
                else:
                    return
            return SimpleXMLElement(
                elements=elements,
                document=self.__document,
                namespace=self.__ns,
                prefix=self.__prefix)
        except AttributeError, e:
            raise AttributeError("Tag not found: %s (%s)" % (tag, str(e)))

    def __getattr__(self, tag):
        "Shortcut for __call__"
        return self.__call__(tag)

    def __iter__(self):
        "Iterate over xml tags at this level"
        try:
            for __element in self.__elements:
                yield SimpleXMLElement(
                    elements=[__element],
                    document=self.__document,
                    namespace=self.__ns,
                    prefix=self.__prefix)
        except:
            raise

    def __dir__(self):
        "List xml children tags names"
        return [node.tagName for node
                in self._element.childNodes
                if node.nodeType != node.TEXT_NODE]

    def children(self):
        "Return xml children tags element"
        elements=[__element for __element in self._element.childNodes
                          if __element.nodeType == __element.ELEMENT_NODE]
        if not elements:
            return None
            #raise IndexError("Tag %s has no children" % self._element.tagName)
        return SimpleXMLElement(
                elements=elements,
                document=self.__document,
                namespace=self.__ns,
                prefix=self.__prefix)

    def __len__(self):
        "Return elements count"
        return len(self.__elements)

    def __contains__( self, item):
        "Search for a tag name in this element or child nodes"
        return self._element.getElementsByTagName(item)

    def __unicode__(self):
        "Returns the unicode text nodes of the current element"
        if self._element.childNodes:
            rc = u""
            for node in self._element.childNodes:
                if node.nodeType == node.TEXT_NODE:
                    rc = rc + node.data
            return rc
        return ''

    def __str__(self):
        "Returns the str text nodes of the current element"
        return unicode(self).encode("utf8","ignore")

    def __int__(self):
        "Returns the integer value of the current element"
        return int(self.__str__())

    def __float__(self):
        "Returns the float value of the current element"
        try:
            return float(self.__str__())
        except:
            raise IndexError(self._element.toxml())

    _element = property(lambda self: self.__elements[0])

    def unmarshall(self, types):
        "Convert to python values the current serialized xml element"
        # types is a dict of {tag name: convertion function}
        # example: types={'p': {'a': int,'b': int}, 'c': [{'d':str}]}
        #   expected xml: <p><a>1</a><b>2</b></p><c><d>hola</d><d>chau</d>
        #   returnde value: {'p': {'a':1,'b':2}, `'c':[{'d':'hola'},{'d':'chau'}]}
        d = {}
        for node in self():
            name = str(node.get_local_name())
            try:
                fn = types[name]
            except (KeyError, ), e:
                raise TypeError("Tag: %s invalid" % (name,))
            if isinstance(fn,list):
                value = []
                children = node.children()
                for child in children and children() or []:
                    value.append(child.unmarshall(fn[0]))
            elif isinstance(fn,dict):
                children = node.children()
                value = children and children.unmarshall(fn)
            else:
                if fn is None: # xsd:anyType not unmarshalled
                    value = node
                elif str(node) or fn == str:
                    try:
                        # get special desserialization function (if any)
                        fn = TYPE_UNMARSHAL_FN.get(fn,fn)
                        value = fn(unicode(node))
                    except (ValueError, TypeError), e:
                        raise ValueError("Tag: %s: %s" % (name, unicode(e)))
                else:
                    value = None
            d[name] = value
        return d

    def marshall(self, name, value, add_child=True, add_comments=False, ns=False):
        "Analize python value and add the serialized XML element using tag name"
        if isinstance(value, dict):  # serialize dict (<key>value</key>)
            child = add_child and self.add_child(name,ns=ns) or self
            for k,v in value.items():
                child.marshall(k, v, add_comments=add_comments, ns=ns)
        elif isinstance(value, tuple):  # serialize tuple (<key>value</key>)
            child = add_child and self.add_child(name,ns=ns) or self
            for k,v in value:
                getattr(self,name).marshall(k, v, add_comments=add_comments, ns=ns)
        elif isinstance(value, list): # serialize lists
            child=self.add_child(name,ns=ns)
            if add_comments:
                child.add_comment("Repetitive array of:")
            for t in value:
                child.marshall(name,t, False, add_comments=add_comments, ns=ns)
        elif isinstance(value, basestring): # do not convert strings or unicodes
            self.add_child(name,value,ns=ns)
        elif value is None: # sent a empty tag?
            self.add_child(name,ns=ns)
        elif value in TYPE_MAP.keys():
            # add commented placeholders for simple tipes (for examples/help only)
            child = self.add_child(name,ns=ns)
            child.add_comment(TYPE_MAP[value])
        else: # the rest of object types are converted to string
            # get special serialization function (if any)
            fn = TYPE_MARSHAL_FN.get(type(value),str)
            self.add_child(name,fn(value),ns=ns)

    def import_node(self, other):
        x = self.__document.importNode(other._element, True)  # deep copy
        self._element.appendChild(x)


if __name__ == "__main__":
    span = SimpleXMLElement('<span><a href="python.org.ar">pyar</a><prueba><i>1</i><float>1.5</float></prueba></span>')
    assert str(span.a)==str(span('a'))==str(span.a(0))=="pyar"
    assert span.a['href']=="python.org.ar"
    assert int(span.prueba.i)==1 and float(span.prueba.float)==1.5
    span1 = SimpleXMLElement('<span><a href="google.com">google</a><a>yahoo</a><a>hotmail</a></span>')
    assert [str(a) for a in span1.a()] == ['google', 'yahoo', 'hotmail']
    span1.add_child('a','altavista')
    span1.b = "ex msn"
    d = {'href':'http://www.bing.com/', 'alt': 'Bing'}
    span1.b[:] = d
    assert sorted([(k,v) for k,v in span1.b[:]]) == sorted(d.items())
    print span1.as_xml()
    assert 'b' in span1
    span.import_node(span1)
    print span.as_xml()