source code: ./othercms/xenForo 2.2.8/src/XF/BbCode/ProcessorAction/Markdown.php

Seditio Source
./othercms/xenForo 2.2.8/src/XF/BbCode/ProcessorAction/Markdown.php

<?php





namespace XF\BbCode\ProcessorAction;





use XF\BbCode\Parser;


use XF\BbCode\RuleSet;





use function count, strlen, strval;





class Markdown implements FiltererInterface, ProcessorAwareInterface


{


    protected $tokens;





    /**


     * @var \XF\BbCode\Processor


     */


    protected $processor;





    /**


     * @var array


     */


    protected $tags;





    protected $enabled = true;





    protected $smilieStrings = [];





    public function setProcessor(\XF\BbCode\Processor $processor)


    {


        $this->processor = $processor;


    }





    public function setSmilieStrings(array $smilies)


    {


        $this->smilieStrings = array_fill_keys($smilies, true);


    }





    public function addFiltererHooks(FiltererHooks $hooks)


    {


        $hooks->addParseHook('filterInput');


    }





    public function setEnabled($enabled)


    {


        $this->enabled = (bool)$enabled;


    }





    public function filterInput($string, Parser $parser, RuleSet $rules, array &$options)


    {


        if (!$this->enabled)


        {


            return $string;


        }





        $this->tags = $rules->getTags();





        return $this->parseMarkdown($string);


    }





    protected function getBbCodePlainTags()


    {


        $tags = ['code', 'icode', 'php', 'html', 'plain', 'media', 'img', 'user', 'attach'];





        if ($this->tags)


        {


            foreach ($this->tags AS $tag => $config)


            {


                if (!empty($config['plain']))


                {


                    $this->tags[] = $tag;


                }


            }


        }





        return array_unique($tags);


    }





    protected function addToken($string)


    {


        $tokenId = count($this->tokens);


        $token = "\x1A" . $tokenId . "\x1A";





        $this->tokens[$tokenId] = $string;





        return $token;


    }





    protected function replaceTokens($string, $isTopLevel = false)


    {


        if ($this->tokens)


        {


            $string = preg_replace_callback(


                "#\x1A(\d+)\x1A#",


                function($match)


                {


                    $tokenId = $match[1];


                    if (isset($this->tokens[$tokenId]))


                    {


                        return $this->replaceTokens($this->tokens[$tokenId]);


                    }


                    else


                    {


                        return '';


                    }


                },


                $string


            );





            if ($isTopLevel)


            {


                $string = str_replace("\x1A", '', $string);


            }


        }





        return $string;


    }





    protected function parseMarkdown($string)


    {


        $this->tokens = [];





        // tokenize stuff we don't want parsed for markdown


        $string = $this->stashBbCodeListItems($string);


        $string = $this->stashBbCodePlainItems($string);


        $string = $this->stashBbCodeMarkUp($string);


        $string = $this->stashUrls($string);





        // parse markdown


        $string = $this->parseFencedCode($string);


        $string = $this->parseUnorderedLists($string);


        $string = $this->parseOrderedLists($string);


        $string = $this->parseBlockQuote($string);


        $string = $this->parseInlineCode($string);


        $string = $this->parseLinks($string);


        $string = $this->parseInlineTags($string);





        // restore stuff that was stashed or tokenized


        $string = $this->replaceTokens($string, true);





        return $string;


    }





    /**


     * Replace BB Code [*] items with tokens so they are not parsed for tokens


     *


     * @param $string


     *


     * @return string


     */


    protected function stashBbCodeListItems($string)


    {


        return preg_replace_callback('/\[\*\]/', function($match)


        {


            return $this->addToken($match[0]);


        }, $string);


    }





    /**


     * Replace BB Code "plain" items with tokens so they are not parsed for markdown


     *


     * @param $string


     *


     * @return string


     */


    protected function stashBbCodePlainItems($string)


    {


        $plainTagsRegex = '(' . implode('|', $this->getBbCodePlainTags()) . ')';





        return preg_replace_callback(


            '#\[' . $plainTagsRegex . '(=[^\]]*)?](.*)\[/\\1]#siU',


            function($match)


            {


                return $this->addToken($match[0]);


            },


            $string


        );


    }





    /**


     * Stashes actual BB code markup so Markdown is not matched within it.


     *


     * @param string $string


     *


     * @return string


     */


    protected function stashBbCodeMarkUp($string)


    {


        if ($this->tags)


        {


            $tagRegex = '(' . implode('|', array_keys($this->tags)) . ')';


        }


        else


        {


            $tagRegex = '\w+';


        }





        return preg_replace_callback(


            '#(\[' . $tagRegex . '(?:=[^\]]*)?+\]|\[\w+(?:\s?\w+="[^"]*")+\]|\[/' . $tagRegex . '\])#si',


            function($match)


            {


                return $this->addToken($match[0]);


            },


            $string


        );


    }





    /**


     * Stashes things that look like URLs so they don't get modified by Markdown parsing.


     *


     * @param string $string


     *


     * @return string


     */


    protected function stashUrls($string)


    {


        // note: keep URLs preceded by ]( as this is likely a Markdown link





        return preg_replace_callback(


            '#(?<=[^a-z0-9@/\.-]|^)(?<!\]\()(https?://|www\.)[^\s"<>{}`\[]+#siu',


            function($match)


            {


                $url = $match[0];


                $suffix = '';





                // if we pick up a trailer that looks like inline MD, we need to step back our stashing before it


                $suffixMatchesMd = true;


                while ($suffixMatchesMd)


                {


                    $lastChar = substr($url, -1);





                    switch ($lastChar)


                    {


                        case '_':


                        case '*':


                        case '~':


                        case '`':


                            $url = substr($url, 0, -1);


                            $suffix = $lastChar . $suffix;


                            break;





                        default:


                            $suffixMatchesMd = false;


                    }


                }





                return $this->addToken($url) . $suffix;


            },


            $string


        );


    }





    /**


     * Parse markdown fenced code into [CODE] BB code


     *


     * @param $string


     *


     * @return string


     */


    protected function parseFencedCode($string)


    {


        return preg_replace_callback('/(?<=\s|^)(`{3}|~{3})([^\n`]+)?\n(.+)\1(?=\s|$)/siU', function($match)


        {


            if (empty($match[2]))


            {


                return $this->addToken('[CODE]' . $match[3] . '[/CODE]');


            }


            else if (preg_match('/^[a-z0-9]+$/i', $match[2]) && substr($match[3], -1) == "\n")


            {


                $match[3] = rtrim($match[3]);





                if (strtoupper($match[2]) == 'RICH')


                {


                    return "[CODE=\"rich\"]{$match[3]}[/CODE]";


                }


                else


                {


                    return $this->addToken("[CODE=\"{$match[2]}\"]" . $match[3] . '[/CODE]');


                }


            }


            else


            {


                return $this->addToken('[CODE]' . "{$match[2]}\n{$match[3]}" . '[/CODE]');


            }


        }, $string);


    }





    /**


     * Parse markdown inline code into [ICODE] BB code


     *


     * @param $string


     *


     * @return string


     */


    protected function parseInlineCode($string)


    {


        return preg_replace_callback('/(?<=\W|^)(?<!@)(`{1,3})([^`\n]+)\1(?=\W|$)/U', function($match)


        {


            if ($this->isMatchSmilie($match[0]))


            {


                return $match[0];


            }





            return $this->addToken('[ICODE]' . $match[2] . '[/ICODE]');


        }, $string);


    }





    /**


     * Parse markdown links and images to [URL] and [IMG]


     *


     * @param $string


     *


     * @return string


     */


    protected function parseLinks($string)


    {


        return preg_replace_callback('/(\!)?\[([^\]]*)\]\((http[^\)]+)\)/iU', function($match)


        {


            $url = trim($match[3]);


            $text = str_replace('"', "'", trim($match[2]));





            if (!empty($match[1]))


            {


                // image


                if (strlen($text))


                {


                    return $this->addToken("[IMG alt=\"{$text}\"]{$url}[/IMG]");


                }


                else


                {


                    return $this->addToken("[IMG]{$url}[/IMG]");


                }


            }


            else


            {


                // link


                if (strlen($text))


                {


                    $text = $this->parseInlineTags($text);


                    return $this->addToken("[URL=\"{$url}\"]{$text}[/URL]");


                }


                else


                {


                    return $this->addToken("[URL]{$url}[/URL]");


                }


            }


        }, $string);


    }





    /**


     * Parse markdown blockquote bits and pieces into [QUOTE]


     *


     * @param $string


     *


     * @return string


     */


    protected function parseBlockQuote($string)


    {


        return preg_replace_callback('/(?<=\n|^)([ \t]*)>([^\r\n]*)(\r?\n\1>[^\r\n]*)*/', function($match)


        {


            return '[QUOTE]' . preg_replace('/(?<=\n|^)[ \t]*>([^\r\n]*)/', '$1', $match[0]) . '[/QUOTE]';


        }, $string);


    }





    /**


     * Parse inline markdown tags into [B], [I], [S] and [ICODE]


     *


     * @param $string


     *


     * @return string


     */


    protected function parseInlineTags($string)


    {


        // __ delimiters require non-word characters on either side


        $string = preg_replace_callback(


            '#(?<=\W|^)(?<!@)(__|_(?!_))(?!\s)(.+)(?<!\s)\\1(?!_)(?=\W|$)#U',


            function($match)


            {


                if ($this->isMatchSmilie($match[0]))


                {


                    return $match[0];


                }


                if (preg_match('#^_+$#', $match[0]))


                {


                    return $match[0];


                }





                $innerText = $this->parseInlineTags($match[2]);





                if ($match[1] == '__')


                {


                    return "[B]{$innerText}[/B]";


                }


                else


                {


                    return "[I]{$innerText}[/I]";


                }


            },


            $string


        );





        // note: parsing ~ to strikethrough has been disabled due to false positives


        /*// ~ delimiters require non-word characters on either side


        $string = preg_replace_callback(


            '#(?<=\W|^)(?<!@)(~+?(?!~))(?!\s)(.+)(?<!\s)\\1(?!~)(?=\W|$)#U',


            function ($match) {


                if ($this->isMatchSmilie($match[0]))


                {


                    return $match[0];


                }





                $innerText = $this->parseInlineTags($match[2]);





                return "[S]{$innerText}[/S]";


            },


            $string


        );*/





        // * now require non-word characters on either side to avoid false positives with things like word censoring


        $string = preg_replace_callback(


            '#(?<=\W|^)(?<!@)(\*\*|(?<!\[)\*(?!\*|]))(?!\s)(.+)(?<!\s)\\1(?!\*)(?=\W|$)#U',


            function($match)


            {


                if ($this->isMatchSmilie($match[0]))


                {


                    return $match[0];


                }


                if (preg_match('#^\*+$#', $match[0]))


                {


                    return $match[0];


                }





                $innerText = $this->parseInlineTags($match[2]);





                if ($match[1] == '**')


                {


                    return "[B]{$innerText}[/B]";


                }


                else


                {


                    return "[I]{$innerText}[/I]";


                }


            },


            $string


        );





        return $string;


    }





    /**


     * Parse unordered markdown lists into [LIST] [*]...


     * Supports star, dash or plus then space or tab


     *


     * @param $string


     *


     * @return string


     */


    protected function parseUnorderedLists($string)


    {


        return preg_replace_callback('#(?<=\n|^)(\*|-|\+)[ \t]+[^\r\n]+(\r?\n\\1[ \t]+[^\r\n]+)*(?:\r?\n|$)#', function($match)


        {


            $listItems = preg_split('#(\n|^)(\*|-|\+)[ \t]+#', $match[0], -1, PREG_SPLIT_NO_EMPTY);


            if (count($listItems) < 2)


            {


                return $match[0];


            }





            $listItems = $this->adjustListItems($listItems, $postListContent);


            return "[LIST]\n[*]" . implode("\n[*]", $listItems) . "\n[/LIST]\n$postListContent";


        }, $string);


    }





    /**


     * Parse ordered markdown lists into [LIST="1"] [*]...


     * Supports 1-9 digits followed by . or ) and then space or tab


     *


     * @param $string


     *


     * @return string


     */


    protected function parseOrderedLists($string)


    {


        return $string;





        // Ordered list parsing has been disabled as it can trigger false positives that are difficult to recover from.


        // To re-enable it, we likely need to support starting lists at different values. We may also wish to be


        // more strict with our matching, such as requiring the numbers to move up continuously. Otherwise, we still risk


        // unexpected behaviors.





        /*return preg_replace_callback('#(?<=\n|^)((?:\d{1,9})(?:\.|\))[ \t]+([^\r\n]+)(?:\r?\n|$)){2,}#', function($match)


        {


            $listItems = preg_split('#(\n|^)\d+(?:\.|\))[ \t]+#', $match[0], -1, PREG_SPLIT_NO_EMPTY);


            if (count($listItems) < 2)


            {


                return $match[0];


            }





            $listItems = $this->adjustListItems($listItems, $postListContent);


            return "[LIST=\"1\"]\n[*]" . implode("\n[*]", $listItems) . "\n[/LIST]\n$postListContent";


        }, $string);*/


    }





    /**


     * Adjusts the content of the list items for BB code. Primarily, handles situations where there may be


     * existing BB code at the end of the last list item which shouldn't be part of the list.


     *


     * @param array $listItems


     * @param string $postListContent Content to move after the list if needed


     *


     * @return array


     */


    protected function adjustListItems(array $listItems, &$postListContent)


    {


        $postListContent = '';





        $listItems = array_map('trim', $listItems);





        $lastItem = end($listItems);


        $lastId = key($listItems);





        if (preg_match_all("#\x1A(\d+)\x1A#", $lastItem, $tokenMatches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE))


        {


            $openTags = [];





            foreach ($tokenMatches AS $tokenMatch)


            {


                $tokenId = $tokenMatch[1][0];


                if (!isset($this->tokens[$tokenId]))


                {


                    // this really shouldn't happen


                    continue;


                }





                $token = $this->tokens[$tokenId];





                if (preg_match('#^\[(\w+)#i', $token, $openMatch))


                {


                    // tag open


                    array_unshift($openTags, strtolower($openMatch[1]));


                }


                else if (preg_match('#^\[/(\w+)\]#i', $token, $closeMatch))


                {


                    // close match


                    if (!$openTags || $openTags[0] != strtolower($closeMatch[1]))


                    {


                        // invalid close, split here and move outside the list


                        $postListContent = strval(substr($lastItem, $tokenMatch[0][1])) . "\n";


                        $listItems[$lastId] = strval(substr($lastItem, 0, $tokenMatch[0][1]));


                        break;


                    }


                    else


                    {


                        // close the last tag as it matches


                        array_shift($openTags);


                    }


                }


            }


        }





        return $listItems;


    }





    protected function isMatchSmilie($match)


    {


        return isset($this->smilieStrings[$match]);


    }





    public static function factory(\XF\App $app)


    {


        $md = new static();





        if (!$app->options()->convertMarkdownToBbCode)


        {


            $md->setEnabled(false);


        }





        $md->setSmilieStrings($app->stringFormatter()->getSmilieStrings());





        return $md;


    }


}