root/releases/elgg0.8rc2/lib/markdown.php

Revision 269, 41.0 kB (checked in by ben, 3 years ago)

--

  • Property svn:eol-style set to native
Line 
1 <?php
2
3 #
4 # Markdown  -  A text-to-HTML conversion tool for web writers
5 #
6 # Copyright (c) 2004 John Gruber 
7 # <http://daringfireball.net/projects/markdown/>
8 #
9 # Copyright (c) 2004-2005 Michel Fortin - PHP Port 
10 # <http://www.michelf.com/projects/php-markdown/>
11 #
12
13
14 global    $MarkdownPHPVersion, $MarkdownSyntaxVersion,
15         $md_empty_element_suffix, $md_tab_width,
16         $md_nested_brackets_depth, $md_nested_brackets,
17         $md_escape_table, $md_backslash_escape_table,
18         $md_list_level;
19
20 $MarkdownPHPVersion    = '1.0.1a'; # Fri 15 Apr 2005
21 $MarkdownSyntaxVersion = '1.0.1'# Sun 12 Dec 2004
22
23
24 #
25 # Global default settings:
26 #
27 $md_empty_element_suffix = " />";     # Change to ">" for HTML output
28 $md_tab_width = 4;
29
30
31 # -- WordPress Plugin Interface -----------------------------------------------
32 /*
33 Plugin Name: Markdown
34 Plugin URI: http://www.michelf.com/projects/php-markdown/
35 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
36 Version: 1.0.1a
37 Author: Michel Fortin
38 Author URI: http://www.michelf.com/
39 */
40 if (isset($wp_version)) {
41     # Remove default WordPress auto-paragraph filter.
42     remove_filter('the_content''wpautop');
43     remove_filter('the_excerpt''wpautop');
44     remove_filter('comment_text', 'wpautop');
45     # Add Markdown filter with priority 6 (same as Textile).
46     add_filter('the_content',     'Markdown', 6);
47     add_filter('the_excerpt',     'Markdown', 6);
48     add_filter('the_excerpt_rss', 'Markdown', 6);
49     add_filter('comment_text',    'Markdown', 6);
50     add_filter('comment_excerpt', 'Markdown', 6);
51
52     # Postpone the not-allowed-tag-filter until Markdown has run. For comments,
53     # it would probably be better to filter with Markdown before they are
54     # added in the database, but doing this would break older sites.
55     remove_filter('pre_comment_content', 'wp_filter_kses');
56     add_filter('comment_text', 'wp_filter_kses', 45);
57
58     # Make balenceTags work *after* Markdown. You can still disable
59     # balanceTags from the admin interface (in Options > Writing).
60     remove_filter('content_save_pre', 'balanceTags', 50);
61     remove_filter('excerpt_save_pre', 'balanceTags', 50);
62     remove_filter('comment_save_pre', 'balanceTags', 50);
63     add_filter('the_content',     'balanceTags', 50);
64     add_filter('the_excerpt',     'balanceTags', 50);
65     add_filter('the_excerpt_rss', 'balanceTags', 50);
66     add_filter('comment_text',    'balanceTags', 50);
67     add_filter('comment_excerpt', 'balanceTags', 50);
68 }
69
70
71 # -- bBlog Plugin Info --------------------------------------------------------
72 function identify_modifier_markdown() {
73     global $MarkdownPHPVersion;
74     return array(
75         'name'            => 'markdown',
76         'type'            => 'modifier',
77         'nicename'        => 'Markdown',
78         'description'    => 'A text-to-HTML conversion tool for web writers',
79         'authors'        => 'Michel Fortin and John Gruber',
80         'licence'        => 'GPL',
81         'version'        => $MarkdownPHPVersion,
82         'help'            => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>'
83     );
84 }
85
86 # -- Smarty Modifier Interface ------------------------------------------------
87 function smarty_modifier_markdown($text) {
88     return Markdown($text);
89 }
90
91 # -- Textile Compatibility Mode -----------------------------------------------
92 # Rename this file to "classTextile.php" and it can replace Textile anywhere.
93 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
94     # Try to include PHP SmartyPants. Should be in the same directory.
95     @include_once 'smartypants.php';
96     # Fake Textile class. It calls Markdown instead.
97     class Textile {
98         function TextileThis($text, $lite='', $encode='', $noimage='', $strict='') {
99             if ($lite == '' && $encode == '')   $text = Markdown($text);
100             if (function_exists('SmartyPants')) $text = SmartyPants($text);
101             return $text;
102         }
103     }
104 }
105
106
107
108 #
109 # Globals:
110 #
111
112 # Regex to match balanced [brackets].
113 # Needed to insert a maximum bracked depth while converting to PHP.
114 $md_nested_brackets_depth = 6;
115 $md_nested_brackets =
116     str_repeat('(?>[^\[\]]+|\[', $md_nested_brackets_depth).
117     str_repeat('\])*', $md_nested_brackets_depth);
118
119 # Table of hash values for escaped characters:
120 $md_escape_table = array(
121     "\\" => md5("\\"),
122     "`" => md5("`"),
123     "*" => md5("*"),
124     "_" => md5("_"),
125     "{" => md5("{"),
126     "}" => md5("}"),
127     "[" => md5("["),
128     "]" => md5("]"),
129     "(" => md5("("),
130     ")" => md5(")"),
131     ">" => md5(">"),
132     "#" => md5("#"),
133     "+" => md5("+"),
134     "-" => md5("-"),
135     "." => md5("."),
136     "!" => md5("!")
137 );
138 # Create an identical table but for escaped characters.
139 $md_backslash_escape_table;
140 foreach ($md_escape_table as $key => $char)
141     $md_backslash_escape_table["\\$key"] = $char;
142
143
144 function Markdown($text) {
145 #
146 # Main function. The order in which other subs are called here is
147 # essential. Link and image substitutions need to happen before
148 # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
149 # and <img> tags get encoded.
150 #
151     # Clear the global hashes. If we don't clear these, you get conflicts
152     # from other articles when generating a page which contains more than
153     # one article (e.g. an index page that shows the N most recent
154     # articles):
155     global $md_urls, $md_titles, $md_html_blocks;
156     $md_urls = array();
157     $md_titles = array();
158     $md_html_blocks = array();
159
160     # Standardize line endings:
161     #   DOS to Unix and Mac to Unix
162     $text = str_replace(array("\r\n", "\r"), "\n", $text);
163
164     # Make sure $text ends with a couple of newlines:
165     $text .= "\n\n";
166
167     # Convert all tabs to spaces.
168     $text = _Detab($text);
169
170     # Strip any lines consisting only of spaces and tabs.
171     # This makes subsequent regexen easier to write, because we can
172     # match consecutive blank lines with /\n+/ instead of something
173     # contorted like /[ \t]*\n+/ .
174     $text = preg_replace('/^[ \t]+$/m', '', $text);
175
176     # Turn block-level HTML blocks into hash entries
177     $text = _HashHTMLBlocks($text);
178
179     # Strip link definitions, store in hashes.
180     $text = _StripLinkDefinitions($text);
181
182     $text = _RunBlockGamut($text);
183
184     $text = _UnescapeSpecialChars($text);
185
186     return $text . "\n";
187 }
188
189
190 function _StripLinkDefinitions($text) {
191 #
192 # Strips link definitions from text, stores the URLs and titles in
193 # hash references.
194 #
195     global $md_tab_width;
196     $less_than_tab = $md_tab_width - 1;
197
198     # Link defs are in the form: ^[id]: url "optional title"
199     $text = preg_replace_callback('{
200                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\]:    # id = $1
201                           [ \t]*
202                           \n?                # maybe *one* newline
203                           [ \t]*
204                         <?(\S+?)>?            # url = $2
205                           [ \t]*
206                           \n?                # maybe one newline
207                           [ \t]*
208                         (?:
209                             (?<=\s)            # lookbehind for whitespace
210                             ["(]
211                             (.+?)            # title = $3
212                             [")]
213                             [ \t]*
214                         )?    # title is optional
215                         (?:\n+|\Z)
216         }xm',
217         '_StripLinkDefinitions_callback',
218         $text);
219     return $text;
220 }
221 function _StripLinkDefinitions_callback($matches) {
222     global $md_urls, $md_titles;
223     $link_id = strtolower($matches[1]);
224     $md_urls[$link_id] = _EncodeAmpsAndAngles($matches[2]);
225     if (isset($matches[3]))
226         $md_titles[$link_id] = str_replace('"', '&quot;', $matches[3]);
227     return ''; # String that will replace the block
228 }
229
230
231 function _HashHTMLBlocks($text) {
232     global $md_tab_width;
233     $less_than_tab = $md_tab_width - 1;
234
235     # Hashify HTML blocks:
236     # We only want to do this for block-level HTML tags, such as headers,
237     # lists, and tables. That's because we still want to wrap <p>s around
238     # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
239     # phrase emphasis, and spans. The list of tags we're looking for is
240     # hard-coded:
241     $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
242                     'script|noscript|form|fieldset|iframe|math|ins|del';
243     $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
244                     'script|noscript|form|fieldset|iframe|math';
245
246     # First, look for nested blocks, e.g.:
247     #     <div>
248     #         <div>
249     #         tags for inner block must be indented.
250     #         </div>
251     #     </div>
252     #
253     # The outermost tags must start at the left margin for this to match, and
254     # the inner nested divs must be indented.
255     # We need to do this before the next, more liberal match, because the next
256     # match will start at the first `<div>` and stop at the first `</div>`.
257     $text = preg_replace_callback("{
258                 (                        # save in $1
259                     ^                    # start of line  (with /m)
260                     <($block_tags_a)    # start tag = $2
261                     \\b                    # word break
262                     (.*\\n)*?            # any number of lines, minimally matching
263                     </\\2>                # the matching end tag
264                     [ \\t]*                # trailing spaces/tabs
265                     (?=\\n+|\\Z)    # followed by a newline or end of document
266                 )
267         }xm",
268         '_HashHTMLBlocks_callback',
269         $text);
270
271     #
272     # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
273     #
274     $text = preg_replace_callback("{
275                 (                        # save in $1
276                     ^                    # start of line  (with /m)
277                     <($block_tags_b)    # start tag = $2
278                     \\b                    # word break
279                     (.*\\n)*?            # any number of lines, minimally matching
280                     .*</\\2>                # the matching end tag
281                     [ \\t]*                # trailing spaces/tabs
282                     (?=\\n+|\\Z)    # followed by a newline or end of document
283                 )
284         }xm",
285         '_HashHTMLBlocks_callback',
286         $text);
287
288     # Special case just for <hr />. It was easier to make a special case than
289     # to make the other regex more complicated.
290     $text = preg_replace_callback('{
291                 (?:
292                     (?<=\n\n)        # Starting after a blank line
293                     |                # or
294                     \A\n?            # the beginning of the doc
295                 )
296                 (                        # save in $1
297                     [ ]{0,'.$less_than_tab.'}
298                     <(hr)                # start tag = $2
299                     \b                    # word break
300                     ([^<>])*?            #
301                     /?>                    # the matching end tag
302                     [ \t]*
303                     (?=\n{2,}|\Z)        # followed by a blank line or end of document
304                 )
305         }x',
306         '_HashHTMLBlocks_callback',
307         $text);
308
309     # Special case for standalone HTML comments:
310     $text = preg_replace_callback('{
311                 (?:
312                     (?<=\n\n)        # Starting after a blank line
313                     |                # or
314                     \A\n?            # the beginning of the doc
315                 )
316                 (                        # save in $1
317                     [ ]{0,'.$less_than_tab.'}
318                     (?s:
319                         <!
320                         (--.*?--\s*)+
321                         >
322                     )
323                     [ \t]*
324                     (?=\n{2,}|\Z)        # followed by a blank line or end of document
325                 )
326             }x',
327             '_HashHTMLBlocks_callback',
328             $text);
329
330     return $text;
331 }
332 function _HashHTMLBlocks_callback($matches) {
333     global $md_html_blocks;
334     $text = $matches[1];
335     $key = md5($text);
336     $md_html_blocks[$key] = $text;
337     return "\n\n$key\n\n"; # String that will replace the block
338 }
339
340
341 function _RunBlockGamut($text) {
342 #
343 # These are all the transformations that form block-level
344 # tags like paragraphs, headers, and list items.
345 #
346     global $md_empty_element_suffix;
347
348     $text = _DoHeaders($text);
349
350     # Do Horizontal Rules:
351     $text = preg_replace(
352         array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}mx',
353               '{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}mx',
354               '{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}mx'),
355         "\n<hr$md_empty_element_suffix\n",
356         $text);
357
358     $text = _DoLists($text);
359
360     $text = _DoCodeBlocks($text);
361
362     $text = _DoBlockQuotes($text);
363
364     # We already ran _HashHTMLBlocks() before, in Markdown(), but that
365     # was to escape raw HTML in the original Markdown source. This time,
366     # we're escaping the markup we've just created, so that we don't wrap
367     # <p> tags around block-level tags.
368     $text = _HashHTMLBlocks($text);
369
370     $text = _FormParagraphs($text);
371
372     return $text;
373 }
374
375
376 function _RunSpanGamut($text) {
377 #
378 # These are all the transformations that occur *within* block-level
379 # tags like paragraphs, headers, and list items.
380 #
381     global $md_empty_element_suffix;
382
383     $text = _DoCodeSpans($text);
384
385     $text = _EscapeSpecialChars($text);
386
387     # Process anchor and image tags. Images must come first,
388     # because ![foo][f] looks like an anchor.
389     $text = _DoImages($text);
390     $text = _DoAnchors($text);
391
392     # Make links out of things like `<http://example.com/>`
393     # Must come after _DoAnchors(), because you can use < and >
394     # delimiters in inline links like [this](<url>).
395     $text = _DoAutoLinks($text);
396
397     # Fix unencoded ampersands and <'s:
398     $text = _EncodeAmpsAndAngles($text);
399
400     $text = _DoItalicsAndBold($text);
401
402     # Do hard breaks:
403     $text = preg_replace('/ {2,}\n/', "<br$md_empty_element_suffix\n", $text);
404
405     return $text;
406 }
407
408
409 function _EscapeSpecialChars($text) {
410     global $md_escape_table;
411     $tokens = _TokenizeHTML($text);
412
413     $text = '';   # rebuild $text from the tokens
414 #    $in_pre = 0;  # Keep track of when we're inside <pre> or <code> tags.
415 #    $tags_to_skip = "!<(/?)(?:pre|code|kbd|script|math)[\s>]!";
416
417     foreach ($tokens as $cur_token) {
418         if ($cur_token[0] == 'tag') {
419             # Within tags, encode * and _ so they don't conflict
420             # with their use in Markdown for italics and strong.
421             # We're replacing each such character with its
422             # corresponding MD5 checksum value; this is likely
423             # overkill, but it should prevent us from colliding
424             # with the escape values by accident.
425             $cur_token[1] = str_replace(array('*', '_'),
426                 array($md_escape_table['*'], $md_escape_table['_']),
427                 $cur_token[1]);
428             $text .= $cur_token[1];
429         } else {
430             $t = $cur_token[1];
431             $t = _EncodeBackslashEscapes($t);
432             $text .= $t;
433         }
434     }
435     return $text;
436 }
437
438
439 function _DoAnchors($text) {
440 #
441 # Turn Markdown link shortcuts into XHTML <a> tags.
442 #
443     global $md_nested_brackets;
444     #
445     # First, handle reference-style links: [link text] [id]
446     #
447     $text = preg_replace_callback("{
448         (                    # wrap whole match in $1
449           \\[
450             ($md_nested_brackets)    # link text = $2
451           \\]
452
453           [ ]?                # one optional space
454           (?:\\n[ ]*)?        # one optional newline followed by spaces
455
456           \\[
457             (.*?)        # id = $3
458           \\]
459         )
460         }xs",
461         '_DoAnchors_reference_callback', $text);
462
463     #
464     # Next, inline-style links: [link text](url "optional title")
465     #
466     $text = preg_replace_callback("{
467         (                # wrap whole match in $1
468           \\[
469             ($md_nested_brackets)    # link text = $2
470           \\]
471           \\(            # literal paren
472             [ \\t]*
473             <?(.*?)>?    # href = $3
474             [ \\t]*
475             (            # $4
476               (['\"])    # quote char = $5
477               (.*?)        # Title = $6
478               \\5        # matching quote
479             )?            # title is optional
480           \\)
481         )
482         }xs",
483         '_DoAnchors_inline_callback', $text);
484
485     return $text;
486 }
487 function _DoAnchors_reference_callback($matches) {
488     global $md_urls, $md_titles, $md_escape_table;
489     $whole_match = $matches[1];
490     $link_text   = $matches[2];
491     $link_id     = strtolower($matches[3]);
492
493     if ($link_id == "") {
494         $link_id = strtolower($link_text); # for shortcut links like [this][].
495     }
496
497     if (isset($md_urls[$link_id])) {
498         $url = $md_urls[$link_id];
499         # We've got to encode these to avoid conflicting with italics/bold.
500         $url = str_replace(array('*', '_'),
501                            array($md_escape_table['*'], $md_escape_table['_']),
502                            $url);
503         $result = "<a href=\"$url\"";
504         if ( isset( $md_titles[$link_id] ) ) {
505             $title = $md_titles[$link_id];
506             $title = str_replace(array('*',     '_'),
507                                  array($md_escape_table['*'],
508                                        $md_escape_table['_']), $title);
509             $result .=  " title=\"$title\"";
510         }
511         $result .= ">$link_text</a>";
512     }
513     else {
514         $result = $whole_match;
515     }
516     return $result;
517 }
518 function _DoAnchors_inline_callback($matches) {
519     global $md_escape_table;
520     $whole_match    = $matches[1];
521     $link_text        = $matches[2];
522     $url            = $matches[3];
523     $title            =& $matches[6];
524
525     # We've got to encode these to avoid conflicting with italics/bold.
526     $url = str_replace(array('*', '_'),
527                        array($md_escape_table['*'], $md_escape_table['_']),
528                        $url);
529     $result = "<a href=\"$url\"";
530     if (isset($title)) {
531         $title = str_replace('"', '&quot;', $title);
532         $title = str_replace(array('*', '_'),
533                              array($md_escape_table['*'], $md_escape_table['_']),
534                              $title);
535         $result .=  " title=\"$title\"";
536     }
537     
538     $result .= ">$link_text</a>";
539
540     return $result;
541 }
542
543
544 function _DoImages($text) {
545 #
546 # Turn Markdown image shortcuts into <img> tags.
547 #
548     #
549     # First, handle reference-style labeled images: ![alt text][id]
550     #
551     $text = preg_replace_callback('{
552         (                # wrap whole match in $1
553           !\[
554             (.*?)        # alt text = $2
555           \]
556
557           [ ]?                # one optional space
558           (?:\n[ ]*)?        # one optional newline followed by spaces
559
560           \[
561             (.*?)        # id = $3
562           \]
563
564         )
565         }xs',
566         '_DoImages_reference_callback', $text);
567
568     #
569     # Next, handle inline images:  ![alt text](url "optional title")
570     # Don't forget: encode * and _
571
572     $text = preg_replace_callback("{
573         (                # wrap whole match in $1
574           !\\[
575             (.*?)        # alt text = $2
576           \\]
577           \\(            # literal paren
578             [ \\t]*
579             <?(\S+?)>?    # src url = $3
580             [ \\t]*
581             (            # $4
582               (['\"])    # quote char = $5
583               (.*?)        # title = $6
584               \\5        # matching quote
585