aboutsummaryrefslogtreecommitdiff
path: root/tools/addon-sdk-1.5/python-lib/markdown/preprocessors.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/addon-sdk-1.5/python-lib/markdown/preprocessors.py')
-rw-r--r--tools/addon-sdk-1.5/python-lib/markdown/preprocessors.py214
1 files changed, 214 insertions, 0 deletions
diff --git a/tools/addon-sdk-1.5/python-lib/markdown/preprocessors.py b/tools/addon-sdk-1.5/python-lib/markdown/preprocessors.py
new file mode 100644
index 0000000..712a1e8
--- /dev/null
+++ b/tools/addon-sdk-1.5/python-lib/markdown/preprocessors.py
@@ -0,0 +1,214 @@
+
+"""
+PRE-PROCESSORS
+=============================================================================
+
+Preprocessors work on source text before we start doing anything too
+complicated.
+"""
+
+import re
+import markdown
+
+HTML_PLACEHOLDER_PREFIX = markdown.STX+"wzxhzdk:"
+HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + "%d" + markdown.ETX
+
+class Processor:
+ def __init__(self, markdown_instance=None):
+ if markdown_instance:
+ self.markdown = markdown_instance
+
+class Preprocessor (Processor):
+ """
+ Preprocessors are run after the text is broken into lines.
+
+ Each preprocessor implements a "run" method that takes a pointer to a
+ list of lines of the document, modifies it as necessary and returns
+ either the same pointer or a pointer to a new list.
+
+ Preprocessors must extend markdown.Preprocessor.
+
+ """
+ def run(self, lines):
+ """
+ Each subclass of Preprocessor should override the `run` method, which
+ takes the document as a list of strings split by newlines and returns
+ the (possibly modified) list of lines.
+
+ """
+ pass
+
+class HtmlStash:
+ """
+ This class is used for stashing HTML objects that we extract
+ in the beginning and replace with place-holders.
+ """
+
+ def __init__ (self):
+ """ Create a HtmlStash. """
+ self.html_counter = 0 # for counting inline html segments
+ self.rawHtmlBlocks=[]
+
+ def store(self, html, safe=False):
+ """
+ Saves an HTML segment for later reinsertion. Returns a
+ placeholder string that needs to be inserted into the
+ document.
+
+ Keyword arguments:
+
+ * html: an html segment
+ * safe: label an html segment as safe for safemode
+
+ Returns : a placeholder string
+
+ """
+ self.rawHtmlBlocks.append((html, safe))
+ placeholder = HTML_PLACEHOLDER % self.html_counter
+ self.html_counter += 1
+ return placeholder
+
+ def reset(self):
+ self.html_counter = 0
+ self.rawHtmlBlocks = []
+
+
+class HtmlBlockPreprocessor(Preprocessor):
+ """Remove html blocks from the text and store them for later retrieval."""
+
+ right_tag_patterns = ["</%s>", "%s>"]
+
+ def _get_left_tag(self, block):
+ return block[1:].replace(">", " ", 1).split()[0].lower()
+
+ def _get_right_tag(self, left_tag, block):
+ for p in self.right_tag_patterns:
+ tag = p % left_tag
+ i = block.rfind(tag)
+ if i > 2:
+ return tag.lstrip("<").rstrip(">"), i + len(p)-2 + len(left_tag)
+ return block.rstrip()[-len(left_tag)-2:-1].lower(), len(block)
+
+ def _equal_tags(self, left_tag, right_tag):
+ if left_tag == 'div' or left_tag[0] in ['?', '@', '%']: # handle PHP, etc.
+ return True
+ if ("/" + left_tag) == right_tag:
+ return True
+ if (right_tag == "--" and left_tag == "--"):
+ return True
+ elif left_tag == right_tag[1:] \
+ and right_tag[0] != "<":
+ return True
+ else:
+ return False
+
+ def _is_oneliner(self, tag):
+ return (tag in ['hr', 'hr/'])
+
+ def run(self, lines):
+ text = "\n".join(lines)
+ new_blocks = []
+ text = text.split("\n\n")
+ items = []
+ left_tag = ''
+ right_tag = ''
+ in_tag = False # flag
+
+ while text:
+ block = text[0]
+ if block.startswith("\n"):
+ block = block[1:]
+ text = text[1:]
+
+ if block.startswith("\n"):
+ block = block[1:]
+
+ if not in_tag:
+ if block.startswith("<"):
+ left_tag = self._get_left_tag(block)
+ right_tag, data_index = self._get_right_tag(left_tag, block)
+
+ if data_index < len(block):
+ text.insert(0, block[data_index:])
+ block = block[:data_index]
+
+ if not (markdown.isBlockLevel(left_tag) \
+ or block[1] in ["!", "?", "@", "%"]):
+ new_blocks.append(block)
+ continue
+
+ if self._is_oneliner(left_tag):
+ new_blocks.append(block.strip())
+ continue
+
+ if block[1] == "!":
+ # is a comment block
+ left_tag = "--"
+ right_tag, data_index = self._get_right_tag(left_tag, block)
+ # keep checking conditions below and maybe just append
+
+ if block.rstrip().endswith(">") \
+ and self._equal_tags(left_tag, right_tag):
+ new_blocks.append(
+ self.markdown.htmlStash.store(block.strip()))
+ continue
+ else: #if not block[1] == "!":
+ # if is block level tag and is not complete
+
+ if markdown.isBlockLevel(left_tag) or left_tag == "--" \
+ and not block.rstrip().endswith(">"):
+ items.append(block.strip())
+ in_tag = True
+ else:
+ new_blocks.append(
+ self.markdown.htmlStash.store(block.strip()))
+
+ continue
+
+ new_blocks.append(block)
+
+ else:
+ items.append(block.strip())
+
+ right_tag, data_index = self._get_right_tag(left_tag, block)
+
+ if self._equal_tags(left_tag, right_tag):
+ # if find closing tag
+ in_tag = False
+ new_blocks.append(
+ self.markdown.htmlStash.store('\n\n'.join(items)))
+ items = []
+
+ if items:
+ new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items)))
+ new_blocks.append('\n')
+
+ new_text = "\n\n".join(new_blocks)
+ return new_text.split("\n")
+
+
+class ReferencePreprocessor(Preprocessor):
+ """ Remove reference definitions from text and store for later use. """
+
+ RE = re.compile(r'^(\ ?\ ?\ ?)\[([^\]]*)\]:\s*([^ ]*)(.*)$', re.DOTALL)
+
+ def run (self, lines):
+ new_text = [];
+ for line in lines:
+ m = self.RE.match(line)
+ if m:
+ id = m.group(2).strip().lower()
+ t = m.group(4).strip() # potential title
+ if not t:
+ self.markdown.references[id] = (m.group(3), t)
+ elif (len(t) >= 2
+ and (t[0] == t[-1] == "\""
+ or t[0] == t[-1] == "\'"
+ or (t[0] == "(" and t[-1] == ")") ) ):
+ self.markdown.references[id] = (m.group(3), t[1:-1])
+ else:
+ new_text.append(line)
+ else:
+ new_text.append(line)
+
+ return new_text #+ "\n"