# Twisted, the Framework of Your Internet # Copyright (C) 2001-2002 Matthew W. Lefkowitz # # This library is free software; you can redistribute it and/or # modify it under the terms of version 2.1 of the GNU Lesser General Public # License as published by the Free Software Foundation. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # from twisted.lore import tree from twisted.web import domhelpers from twisted.python import reflect import parser, urlparse, os.path class TagChecker: def __init__(self, allowedTags, allowedClasses): self.allowedTags = allowedTags self.allowedClasses = allowedClasses def check(self, dom, filename): for method in reflect.prefixedMethods(self, 'check_'): method(dom, filename) def _reportError(self, filename, element, error): hlint = element.hasAttribute('hlint') and element.getAttribute('hlint') if hlint != 'off': pos = getattr(element, '_markpos', None) or (0, 0) print "%s:%s:%s: %s" % ((filename,)+pos+(error,)) def check_disallowedElements(self, dom, filename): def m(node, self=self): return not self.allowedTags(node.tagName) for element in domhelpers.findElements(dom, m): self._reportError(filename, element, 'unrecommended tag %s' % element.tagName) def check_disallowedClasses(self, dom, filename): def matcher(element, self=self): if not element.hasAttribute('class'): return 0 checker = self.allowedClasses.get(element.tagName, lambda x:0) return not checker(element.getAttribute('class')) for element in domhelpers.findElements(dom, matcher): self._reportError(filename, element, 'unknown class %s' %element.getAttribute('class')) def check_quote(self, dom, filename): def matcher(node): return ('"' in getattr(node, 'data', '') and [1 for n in domhelpers.getParents(node)[1:-1] if n.tagName in ('pre', 'code')] == []) for node in domhelpers.findNodes(dom, matcher): self._reportError(filename, node.parentNode, 'contains quote') def check_styleattr(self, dom, filename): for node in domhelpers.findElementsWithAttribute(dom, 'style'): self._reportError(filename, node, 'explicit style') def check_align(self, dom, filename): for node in domhelpers.findElementsWithAttribute(dom, 'align'): self._reportError(filename, node, 'explicit alignment') def check_style(self, dom, filename): for node in domhelpers.findNodesNamed(dom, 'style'): if domhelpers.getNodeText(node) != '': self._reportError(filename, node, 'hand hacked style') def check_title(self, dom, filename): doc = dom.documentElement title = domhelpers.findNodesNamed(dom, 'title') if len(title)!=1: return self._reportError(filename, doc, 'not exactly one title') h1 = domhelpers.findNodesNamed(dom, 'h1') if len(h1)!=1: return self._reportError(filename, doc, 'not exactly one h1') if domhelpers.getNodeText(h1[0]) != domhelpers.getNodeText(title[0]): self._reportError(filename, h1[0], 'title and h1 text differ') def check_80_columns(self, dom, filename): for node in domhelpers.findNodesNamed(dom, 'pre'): for line in domhelpers.getNodeText(node).split('\n'): if len(line.rstrip()) > 80: self._reportError(filename, node, 'text wider than 80 columns in pre') for node in domhelpers.findNodesNamed(dom, 'a'): if node.getAttribute('class', '').endswith('listing'): try: fn = os.path.dirname(filename) fn = os.path.join(fn, node.getAttribute('href')) lines = open(fn,'r').readlines() except: self._reportError(filename, node, 'bad listing href: %r' % node.getAttribute('href')) continue for line in lines: if len(line.rstrip()) > 80: self._reportError(filename, node, 'listing wider than 80 columns') def check_pre_py_listing(self, dom, filename): for node in domhelpers.findNodesNamed(dom, 'pre'): if node.getAttribute('class') == 'python': try: text = domhelpers.getNodeText(node) # Fix < and > text = text.replace('>', '>').replace('<', '<') # Strip blank lines lines = filter(None,[l.rstrip() for l in text.split('\n')]) # Strip leading space while not [1 for line in lines if line[:1] not in ('',' ')]: lines = [line[1:] for line in lines] text = '\n'.join(lines) + '\n' try: parser.suite(text) except parser.ParserError, e: # Pretend the "..." idiom is syntactically valid text = text.replace("...","'...'") parser.suite(text) except parser.ParserError, e: self._reportError(filename, node, 'invalid python code:' + str(e)) def check_anchor_in_heading(self, dom, filename): headingNames = ['h%d' % n for n in range(1,7)] for hname in headingNames: for node in domhelpers.findNodesNamed(dom, hname): if domhelpers.findNodesNamed(node, 'a'): self._reportError(filename, node, 'anchor in heading') def check_texturl_matches_href(self, dom, filename): for node in domhelpers.findNodesNamed(dom, 'a'): if not node.hasAttribute('href'): continue text = node.childNodes[0].data proto = urlparse.urlparse(text)[0] if proto: if text != node.getAttribute('href',''): self._reportError(filename, node, 'link text does not match href') def check_a_py_listing(self, dom, filename): for node in domhelpers.findNodesNamed(dom, 'a'): if node.getAttribute('class') == 'py-listing': fn = os.path.join(os.path.dirname(filename), node.getAttribute('href')) lines = open(fn).readlines() lines = lines[int(node.getAttribute('skipLines', 0)):] for line, num in zip(lines, range(len(lines))): if line.count('59 Temple Place, Suite 330, Boston'): self._reportError(filename, node, 'included source file %s has licence boilerplate.' ' Use skipLines="%d".' % (fn, int(node.getAttribute('skipLines',0))+num+1)) def list2dict(l): d = {} for el in l: d[el] = None return d classes = list2dict(['shell', 'API', 'python', 'py-prototype', 'py-filename', 'py-src-string', 'py-signature', 'py-src-parameter', 'py-src-identifier', 'py-src-keyword']) tags = list2dict(["html", "title", "head", "body", "h1", "h2", "h3", "ol", "ul", "dl", "li", "dt", "dd", "p", "code", "img", "blockquote", "a", "cite", "div", "span", "strong", "em", "pre", "q", "table", "tr", "td", "th", "style"]) span = list2dict(['footnote', 'manhole-output']) div = list2dict(['note', 'boxed', 'doit']) a = list2dict(['py-listing', 'html-listing']) pre = list2dict(['python', 'shell', 'python-interpreter', 'elisp']) allowed = {'code': classes.has_key, 'span': span.has_key, 'div': div.has_key, 'a': a.has_key, 'pre': pre.has_key, 'ul': lambda x: x=='toc', 'ol': lambda x: x=='toc', 'li': lambda x: x=='ignoretoc'} def getDefaultChecker(): return TagChecker(tags.has_key, allowed) def doFile(file, checker): dom = tree.parseFileAndReport(file) if dom: checker.check(dom, file)