""" Dumb path is like XPath, but dumb. Syntax: ``$(id)``: Start at the element of id ``id``. ```` Go to the ``tag`` tag by class ``class``. The tag and class are both optional (but you must give at least one). Without ``:n`` it will select the first element. If ``n`` is greater than the number tags, the last tag will be selected. Negative indexes are also allowed, counting from the end. Counting starts at 0, only decendents are matched; if nothing is matched, then this does nothing. ``+`` and ``-``: Go to the next/previous sibling; ``+`` on the last element, or ``-`` on the first element, will do nothing. ``p`` and ``c``: Select the parent or child of the current node. If ``c``, the first child will be selected. If there is no parent or child, this will do nothing. ``#(...)``: A comment. """ import re __all__ = ['DumbSyntaxError', 'find_path'] def find_path(doc, expr, log=None): """ Find a node ``expr``. If ``log`` is given, it should be a callable that takes two arguments -- a node and a message about the parsing. """ if not log: log = lambda n, x : 0 tokens = list(dumb_tokenize(expr)) node = doc for token, args in tokens: node = token(node, log, *args) return node _id_regex = re.compile(r'[$][(]([^)]+)[)]') _tag_class_regex = re.compile( r'<(?:([\w-]+)|([\w-]*\.[\w-]*))(:-?\d+)?>') _comment_regex = re.compile(r'#\((.*?)\)') def dumb_tokenize(expr): orig_expr = expr while 1: expr = expr.lstrip() if not expr: break pos = len(orig_expr) - len(expr) if expr.startswith('#'): m = _comment_regex.match(expr) if not m: raise DumbSyntaxError( "Bad comment at position %s (remaining: %r)" % (pos, expr)) expr = expr[m.end():] yield _m_comment, (m.group(1),) elif expr.startswith('$'): m = _id_regex.match(expr) if not m: raise DumbSyntaxError( "Bad id match at position %s (remaining: %r)" % (pos, expr)) expr = expr[m.end():] yield _m_id, (m.group(1),) elif expr.startswith('<'): m = _tag_class_regex.match(expr) if not m: raise DumbSyntaxError( "Bad tag match at position %s (remaining: %r)" % (pos, expr)) expr = expr[m.end():] yield _m_tag, (m.group(1) or m.group(2), m.group(3)) elif expr[0] in ('+', '-', 'p', 'c', 'P', 'C'): c = expr[0] expr = expr[1:] if c == '+': yield _m_position, (1,) elif c == '-': yield _m_position, (-1,) elif c == 'p' or c == 'P': yield _m_parent, () else: yield _m_child, () else: raise DumbSyntaxError( "Unknown command at position %s (remaining: %r)" % (pos, expr)) def _m_comment(node, log, comment): log(node, comment) return node def _m_id(node, log, id): try: n = node.get_element_by_id(id) except KeyError: n = None if n is None: log(node, 'No element by id %r' % id) # Error? return node return n def _m_tag(node, log, tag_class, count): elements = node.cssselect(tag_class) if not elements: log(node, 'No elements matching %s.%s' % (tag_class)) # Error? return node try: return elements[count] except IndexError: log(node, 'Count for %s.%s out of range (len=%s): %s; selecting %s' % (tag_class, len(elements), count, count >=0 and 'last' or 'first')) if count >= 0: return elements[-1] else: return elements[0] def _m_position(node, log, dir): parent = node.getparent() if parent is None: return node children = list(parent) if dir < 0: children.reverse() next = False for child in children: if next: return child if child == node: next = True log(node, 'Node has no %s sibling' % (dir > 0 and 'next' or 'previous')) return node def _m_parent(node, log): p = node.getparent() if not p: log(node, 'No parent node') return p or node def _m_child(node, log): try: return node[0] except IndexError: log(node, 'No children nodes') return node