[Commits] r487 - website

Tue Apr 28 10:55:32 CEST 2009

Author: tschaub
Date: 2009-04-28 10:55:32 +0200 (Tue, 28 Apr 2009)
New Revision: 487

Modified:
   website/jst.py
   website/jst.txt
Log:
refactor comment extraction

Modified: website/jst.py
===================================================================

--- website/jst.py	2009-04-27 21:04:32 UTC (rev 486)
+++ website/jst.py	2009-04-28 08:55:32 UTC (rev 487)
@@ -9,13 +9,6 @@
 SUFFIX_JST = ".jst"
 SUFFIX_RST = ".rst"
 
-JST_RE = re.compile(r'^\s*/\*\*\s*jst\s*:\s*(.*?)\s*$')
-COMMENT_RE = re.compile(r'^\s*\*?')
-ENDCOMMENT_RE = re.compile(r'^\s*\*/')
-DEF_RE = re.compile(r"\s*(\w+)\s*=\s*(.*?)\s*$")
-INDENTED_RE = re.compile(r"^\s+\S")
-BRACKET_RE = re.compile(r"(\w+)\[(.*?)\]")
-
 _marker = object()
 
 class DocParser(ConfigParser):
@@ -111,38 +104,29 @@
         source = fh.readlines()
         fh.close()
         return cls(source)
-    
+        
     @property
     def comments(self):
         if self._comments == _marker:
+            source = "".join(self.source)
             comments = ()
-            inblock = False
-            label = None
-            block = None
-            got_indent = False
-            for line in self.source:
-                if not inblock:
-                    m = JST_RE.match(line)
-                    if m:
-                        inblock = True
-                        label = m.group(1)
-                        block = ()
-                        spaces = 0
-                        got_indent = False
-                else:
-                    m = ENDCOMMENT_RE.match(line)
-                    if m:
-                        inblock = False
-                        comments += (dict(label=label, block=block),)
-                    else:
-                        line = COMMENT_RE.sub("", line)
-                        if not got_indent:
-                            if INDENTED_RE.match(line):
-                                spaces = len(line) - len(line.lstrip())
-                                got_indent = True
-                        if len(line) > spaces:
-                            line = line[spaces:]
-                        block += (line,)
+            for comment in re.findall(r'^\s*/\*\*\s*jst\s*:\s*([\S\s]*?)\*+/', source, re.MULTILINE):
+                lines = [re.sub(r'^\s*\*+', '', line.rstrip()) for line in comment.split('\n')]
+                if len(lines) == 1:
+                    label = "(define)"
+                    block = lines[0].strip(),
+                elif len(lines) > 1:
+                    spaces = None
+                    label = lines.pop(0)
+                    block = ()
+                    for line in lines:
+                        if line and not line.isspace() and spaces is None:
+                            spaces = len(line) - len(line.lstrip())
+                        if spaces is not None:
+                            if len(line) > spaces:
+                                line = line[spaces:]
+                            block += line,
+                comments += dict(label=label, block=block),
             self._comments = comments
         return self._comments
     
@@ -156,14 +140,14 @@
                 if label.startswith("("):
                     if label == "(define)":
                         for defline in block:
-                            m = DEF_RE.match(defline)
+                            m = re.match(r"\s*(\w+)\s*=\s*(.*?)\s*$", defline)
                             if m:
                                 data[m.group(1)] = m.group(2)
                     elif label == "(extends)":
                         self.extends = [path.strip() for path in block if path.strip()]
                 else:
-                    block = "".join(block)
-                    m = BRACKET_RE.match(label)
+                    block = "\n".join(block)
+                    m = re.match(r"(\w+)\[(.*?)\]", label)
                     if m:
                         name = m.group(1)
                         key = m.group(2)

Modified: website/jst.txt
===================================================================
--- website/jst.txt	2009-04-27 21:04:32 UTC (rev 486)
+++ website/jst.txt	2009-04-28 08:55:32 UTC (rev 487)
@@ -1,9 +1,21 @@
 ``jst`` basics
 ==============
 
+Accessing data in comments
+--------------------------
+
+Set up so we can pretend string comes from file::
+
+    >>> from StringIO import StringIO
+    >>> def code(string):
+    ...     f = StringIO(string)
+    ...     lines = f.readlines()
+    ...     f.close()
+    ...     return lines
+
 Consider some code with well formatted comment blocks::
 
-    >>> code = """
+    >>> lines = code("""
     ... /**
     ...  *  This is a plain old comment block.
     ...  */
@@ -39,16 +51,8 @@
     ...  *  to = assign
     ...  *  keys = values
     ...  */
-    ... """
+    ... """)
 
-
-Pretend we read the above from a file::
-
-    >>> import StringIO
-    >>> f = StringIO.StringIO(code)
-    >>> lines = f.readlines()
-    >>> f.close()
-
 Now import jst and create an object representing the lines of code::
 
     >>> import jst
@@ -65,7 +69,7 @@
     >>> comments[1]['label']
     'bar[]'
     >>> comments[2]['block']
-    ('Keep adding items to a list.\n',)
+    ('Keep adding items to a list.', '')
     >>> comments[6]['label']
     '(define)'
 
@@ -84,3 +88,38 @@
     >>> obj.data['convenient']
     'way'
 
+
+Testing various comment formats
+-------------------------------
+
+Make sure that different comment formats don't mess things up::
+
+    >>> obj = jst.SourceFile(code("""
+    ... /** regular old comment */
+    ... 
+    ... /** jst: foo
+    ...  *  foo content
+    ...  */
+    ... 
+    ... some_code; /* comment */
+    ...  
+    ... /** jst: one_liner = the one liner value */ 
+    ...  
+    ... /** jst: bar
+    ...  *  bar content
+    ...  */
+    ... """))
+
+We expect three comments.  The one liner is like a single line from a (defines) block::
+
+    >>> len(obj.comments)
+    3
+    >>> obj.comments[0]
+    {'block': ('foo content', ''), 'label': 'foo'}
+    >>> obj.comments[1]
+    {'block': ('one_liner = the one liner value',), 'label': '(define)'}
+    >>> obj.comments[2]
+    {'block': ('bar content', ''), 'label': 'bar'}
+    >>> obj.data['one_liner']
+    'the one liner value'
+