namespace XF\BbCode\ProcessorAction;
use XF\BbCode\Parser;
use XF\BbCode\RuleSet;
use function count, strlen, strval;
class Markdown implements FiltererInterface, ProcessorAwareInterface
protected $tokens;
* @var \XF\BbCode\Processor
protected $processor;
* @var array
protected $tags;
protected $enabled = true;
protected $smilieStrings = [];
public function setProcessor(\XF\BbCode\Processor $processor)
$this->processor = $processor;
public function setSmilieStrings(array $smilies)
$this->smilieStrings = array_fill_keys($smilies, true);
public function addFiltererHooks(FiltererHooks $hooks)
public function setEnabled($enabled)
$this->enabled = (bool)$enabled;
public function filterInput($string, Parser $parser, RuleSet $rules, array &$options)
if (!$this->enabled)
return $string;
$this->tags = $rules->getTags();
return $this->parseMarkdown($string);
protected function getBbCodePlainTags()
$tags = ['code', 'icode', 'php', 'html', 'plain', 'media', 'img', 'user', 'attach'];
if ($this->tags)
foreach ($this->tags AS $tag => $config)
if (!empty($config['plain']))
$this->tags[] = $tag;
return array_unique($tags);
protected function addToken($string)
$tokenId = count($this->tokens);
$token = "\x1A" . $tokenId . "\x1A";
$this->tokens[$tokenId] = $string;
return $token;
protected function replaceTokens($string, $isTopLevel = false)
if ($this->tokens)
$string = preg_replace_callback(
$tokenId = $match[1];
if (isset($this->tokens[$tokenId]))
return $this->replaceTokens($this->tokens[$tokenId]);
return '';
if ($isTopLevel)
$string = str_replace("\x1A", '', $string);
return $string;
protected function parseMarkdown($string)
$this->tokens = [];
// tokenize stuff we don't want parsed for markdown
$string = $this->stashBbCodeListItems($string);
$string = $this->stashBbCodePlainItems($string);
$string = $this->stashBbCodeMarkUp($string);
$string = $this->stashUrls($string);
// parse markdown
$string = $this->parseFencedCode($string);
$string = $this->parseUnorderedLists($string);
$string = $this->parseOrderedLists($string);
$string = $this->parseBlockQuote($string);
$string = $this->parseInlineCode($string);
$string = $this->parseLinks($string);
$string = $this->parseInlineTags($string);
// restore stuff that was stashed or tokenized
$string = $this->replaceTokens($string, true);
return $string;
* Replace BB Code [*] items with tokens so they are not parsed for tokens
* @param $string
* @return string
protected function stashBbCodeListItems($string)
return preg_replace_callback('/\[\*\]/', function($match)
return $this->addToken($match[0]);
}, $string);
* Replace BB Code "plain" items with tokens so they are not parsed for markdown
* @param $string
* @return string
protected function stashBbCodePlainItems($string)
$plainTagsRegex = '(' . implode('|', $this->getBbCodePlainTags()) . ')';
return preg_replace_callback(
'#\[' . $plainTagsRegex . '(=[^\]]*)?](.*)\[/\\1]#siU',
return $this->addToken($match[0]);
* Stashes actual BB code markup so Markdown is not matched within it.
* @param string $string
* @return string
protected function stashBbCodeMarkUp($string)
if ($this->tags)
$tagRegex = '(' . implode('|', array_keys($this->tags)) . ')';
$tagRegex = '\w+';
return preg_replace_callback(
'#(\[' . $tagRegex . '(?:=[^\]]*)?+\]|\[\w+(?:\s?\w+="[^"]*")+\]|\[/' . $tagRegex . '\])#si',
return $this->addToken($match[0]);
* Stashes things that look like URLs so they don't get modified by Markdown parsing.
* @param string $string
* @return string
protected function stashUrls($string)
// note: keep URLs preceded by ]( as this is likely a Markdown link
return preg_replace_callback(
$url = $match[0];
$suffix = '';
// if we pick up a trailer that looks like inline MD, we need to step back our stashing before it
$suffixMatchesMd = true;
while ($suffixMatchesMd)
$lastChar = substr($url, -1);
switch ($lastChar)
case '_':
case '*':
case '~':
case '`':
$url = substr($url, 0, -1);
$suffix = $lastChar . $suffix;
$suffixMatchesMd = false;
return $this->addToken($url) . $suffix;
* Parse markdown fenced code into [CODE] BB code
* @param $string
* @return string
protected function parseFencedCode($string)
return preg_replace_callback('/(?<=\s|^)(`{3}|~{3})([^\n`]+)?\n(.+)\1(?=\s|$)/siU', function($match)
if (empty($match[2]))
return $this->addToken('[CODE]' . $match[3] . '[/CODE]');
else if (preg_match('/^[a-z0-9]+$/i', $match[2]) && substr($match[3], -1) == "\n")
$match[3] = rtrim($match[3]);
if (strtoupper($match[2]) == 'RICH')
return "[CODE=\"rich\"]{$match[3]}[/CODE]";
return $this->addToken("[CODE=\"{$match[2]}\"]" . $match[3] . '[/CODE]');
return $this->addToken('[CODE]' . "{$match[2]}\n{$match[3]}" . '[/CODE]');
}, $string);
* Parse markdown inline code into [ICODE] BB code
* @param $string
* @return string
protected function parseInlineCode($string)
return preg_replace_callback('/(?<=\W|^)(?<!@)(`{1,3})([^`\n]+)\1(?=\W|$)/U', function($match)
if ($this->isMatchSmilie($match[0]))
return $match[0];
return $this->addToken('[ICODE]' . $match[2] . '[/ICODE]');
}, $string);
* Parse markdown links and images to [URL] and [IMG]
* @param $string
* @return string
protected function parseLinks($string)
return preg_replace_callback('/(\!)?\[([^\]]*)\]\((http[^\)]+)\)/iU', function($match)
$url = trim($match[3]);
$text = str_replace('"', "'", trim($match[2]));
if (!empty($match[1]))
// image
if (strlen($text))
return $this->addToken("[IMG alt=\"{$text}\"]{$url}[/IMG]");
return $this->addToken("[IMG]{$url}[/IMG]");
// link
if (strlen($text))
$text = $this->parseInlineTags($text);
return $this->addToken("[URL=\"{$url}\"]{$text}[/URL]");
return $this->addToken("[URL]{$url}[/URL]");
}, $string);
* Parse markdown blockquote bits and pieces into [QUOTE]
* @param $string
* @return string
protected function parseBlockQuote($string)
return preg_replace_callback('/(?<=\n|^)([ \t]*)>([^\r\n]*)(\r?\n\1>[^\r\n]*)*/', function($match)
return '[QUOTE]' . preg_replace('/(?<=\n|^)[ \t]*>([^\r\n]*)/', '$1', $match[0]) . '[/QUOTE]';
}, $string);
* Parse inline markdown tags into [B], [I], [S] and [ICODE]
* @param $string
* @return string
protected function parseInlineTags($string)
// __ delimiters require non-word characters on either side
$string = preg_replace_callback(
if ($this->isMatchSmilie($match[0]))
return $match[0];
if (preg_match('#^_+$#', $match[0]))
return $match[0];
$innerText = $this->parseInlineTags($match[2]);
if ($match[1] == '__')
return "[B]{$innerText}[/B]";
return "[I]{$innerText}[/I]";
// note: parsing ~ to strikethrough has been disabled due to false positives
/*// ~ delimiters require non-word characters on either side
$string = preg_replace_callback(
function ($match) {
if ($this->isMatchSmilie($match[0]))
return $match[0];
$innerText = $this->parseInlineTags($match[2]);
return "[S]{$innerText}[/S]";
// * now require non-word characters on either side to avoid false positives with things like word censoring
$string = preg_replace_callback(
if ($this->isMatchSmilie($match[0]))
return $match[0];
if (preg_match('#^\*+$#', $match[0]))
return $match[0];
$innerText = $this->parseInlineTags($match[2]);
if ($match[1] == '**')
return "[B]{$innerText}[/B]";
return "[I]{$innerText}[/I]";
return $string;
* Parse unordered markdown lists into [LIST] [*]...
* Supports star, dash or plus then space or tab
* @param $string
* @return string
protected function parseUnorderedLists($string)
return preg_replace_callback('#(?<=\n|^)(\*|-|\+)[ \t]+[^\r\n]+(\r?\n\\1[ \t]+[^\r\n]+)*(?:\r?\n|$)#', function($match)
$listItems = preg_split('#(\n|^)(\*|-|\+)[ \t]+#', $match[0], -1, PREG_SPLIT_NO_EMPTY);
if (count($listItems) < 2)
return $match[0];
$listItems = $this->adjustListItems($listItems, $postListContent);
return "[LIST]\n[*]" . implode("\n[*]", $listItems) . "\n[/LIST]\n$postListContent";
}, $string);
* Parse ordered markdown lists into [LIST="1"] [*]...
* Supports 1-9 digits followed by . or ) and then space or tab
* @param $string
* @return string
protected function parseOrderedLists($string)
return $string;
// Ordered list parsing has been disabled as it can trigger false positives that are difficult to recover from.
// To re-enable it, we likely need to support starting lists at different values. We may also wish to be
// more strict with our matching, such as requiring the numbers to move up continuously. Otherwise, we still risk
// unexpected behaviors.
/*return preg_replace_callback('#(?<=\n|^)((?:\d{1,9})(?:\.|\))[ \t]+([^\r\n]+)(?:\r?\n|$)){2,}#', function($match)
$listItems = preg_split('#(\n|^)\d+(?:\.|\))[ \t]+#', $match[0], -1, PREG_SPLIT_NO_EMPTY);
if (count($listItems) < 2)
return $match[0];
$listItems = $this->adjustListItems($listItems, $postListContent);
return "[LIST=\"1\"]\n[*]" . implode("\n[*]", $listItems) . "\n[/LIST]\n$postListContent";
}, $string);*/
* Adjusts the content of the list items for BB code. Primarily, handles situations where there may be
* existing BB code at the end of the last list item which shouldn't be part of the list.
* @param array $listItems
* @param string $postListContent Content to move after the list if needed
* @return array
protected function adjustListItems(array $listItems, &$postListContent)
$postListContent = '';
$listItems = array_map('trim', $listItems);
$lastItem = end($listItems);
$lastId = key($listItems);
if (preg_match_all("#\x1A(\d+)\x1A#", $lastItem, $tokenMatches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE))
$openTags = [];
foreach ($tokenMatches AS $tokenMatch)
$tokenId = $tokenMatch[1][0];
if (!isset($this->tokens[$tokenId]))
// this really shouldn't happen
$token = $this->tokens[$tokenId];
if (preg_match('#^\[(\w+)#i', $token, $openMatch))
// tag open
array_unshift($openTags, strtolower($openMatch[1]));
else if (preg_match('#^\[/(\w+)\]#i', $token, $closeMatch))
// close match
if (!$openTags || $openTags[0] != strtolower($closeMatch[1]))
// invalid close, split here and move outside the list
$postListContent = strval(substr($lastItem, $tokenMatch[0][1])) . "\n";
$listItems[$lastId] = strval(substr($lastItem, 0, $tokenMatch[0][1]));
// close the last tag as it matches
return $listItems;
protected function isMatchSmilie($match)
return isset($this->smilieStrings[$match]);
public static function factory(\XF\App $app)
$md = new static();
if (!$app->options()->convertMarkdownToBbCode)
return $md;