Skip to content

Commit 23d7bb6

Browse files
authored
Merge pull request #109 from andreskrey/master-issue67-taglike-characters
Simple refactor for the sanitization function
2 parents 8f8e3fe + a6fa448 commit 23d7bb6

File tree

2 files changed

+46
-37
lines changed

2 files changed

+46
-37
lines changed

src/Converter/ParagraphConverter.php

Lines changed: 45 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,9 @@ public function getSupportedTags()
4545
*/
4646
private function escapeSpecialCharacters($line)
4747
{
48-
$line = $this->escapeHeaderlikeCharacters($line);
49-
$line = $this->escapeBlockquotelikeCharacters($line);
50-
$line = $this->escapeOrderedListlikeCharacters($line);
51-
$line = $this->escapeListlikeCharacters($line);
48+
$line = $this->escapeFirstCharacters($line);
49+
$line = $this->escapeOtherCharacters($line);
50+
$line = $this->escapeOtherCharactersRegex($line);
5251

5352
return $line;
5453
}
@@ -58,59 +57,68 @@ private function escapeSpecialCharacters($line)
5857
*
5958
* @return string
6059
*/
61-
private function escapeBlockquotelikeCharacters($line)
60+
private function escapeFirstCharacters($line)
6261
{
63-
if (strpos(ltrim($line), '>') === 0) {
64-
// Found a > char, escaping it
65-
return '\\' . ltrim($line);
66-
} else {
67-
return $line;
68-
}
69-
}
62+
$escapable = array(
63+
'>',
64+
'- ',
65+
'+ ',
66+
'--',
67+
'~~~',
68+
'---',
69+
'- - -'
70+
);
7071

71-
/**
72-
* @param string $line
73-
*
74-
* @return string
75-
*/
76-
private function escapeHeaderlikeCharacters($line)
77-
{
78-
if (strpos(ltrim($line), '--') === 0) {
79-
// Found a -- structure, escaping it
80-
return '\\' . ltrim($line);
81-
} else {
82-
return $line;
72+
foreach ($escapable as $i) {
73+
if (strpos(ltrim($line), $i) === 0) {
74+
// Found a character that must be escaped, adding a backslash before
75+
return '\\' . ltrim($line);
76+
}
8377
}
78+
79+
return $line;
8480
}
8581

8682
/**
8783
* @param string $line
8884
*
8985
* @return string
9086
*/
91-
private function escapeOrderedListlikeCharacters($line)
87+
private function escapeOtherCharacters($line)
9288
{
93-
// This regex will match numbers ending on ')' or '.' that are at the beginning of the line.
94-
if (preg_match('/^[0-9]+(?=\)|\.)/', $line, $match)) {
95-
// Found an Ordered list like character, escaping it
96-
return substr_replace($line, '\\', strlen($match[0]), 0);
97-
} else {
98-
return $line;
89+
$escapable = array(
90+
'<!--'
91+
);
92+
93+
foreach ($escapable as $i) {
94+
if (strpos($line, $i) !== false) {
95+
// Found an escapable character, escaping it
96+
$line = substr_replace($line, '\\', strpos($line, $i), 0);
97+
}
9998
}
99+
100+
return $line;
100101
}
101102

102103
/**
103104
* @param string $line
104105
*
105106
* @return string
106107
*/
107-
private function escapeListlikeCharacters($line)
108+
private function escapeOtherCharactersRegex($line)
108109
{
109-
if (strpos(ltrim($line), '- ') === 0 || strpos(ltrim($line), '+ ') === 0) {
110-
// Found an list like character, escaping it
111-
return '\\' . ltrim($line);
112-
} else {
113-
return $line;
110+
$regExs = array(
111+
// Match numbers ending on ')' or '.' that are at the beginning of the line.
112+
'/^[0-9]+(?=\)|\.)/'
113+
);
114+
115+
foreach ($regExs as $i) {
116+
if (preg_match($i, $line, $match)) {
117+
// Matched an escapable character, adding a backslash on the string before the offending character
118+
$line = substr_replace($line, '\\', strlen($match[0]), 0);
119+
}
114120
}
121+
122+
return $line;
115123
}
116124
}

tests/HtmlConverterTest.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,5 +221,6 @@ public function test_sanitization()
221221
$this->html_gives_markdown('<p>Foo<br>--<br>Bar<br>Foo--</p>', "Foo \n\\-- \nBar \nFoo--");
222222
$this->html_gives_markdown("<p>123456789) Foo and 1234567890) Bar!</p>\n<p>1. Platz in 'Das große Backen'</p>", "123456789\\) Foo and 1234567890) Bar!\n\n1\\. Platz in 'Das große Backen'");
223223
$this->html_gives_markdown("<p>\n+ Siri works well for TV and movies<br>\n- No 4K support\n</p>", "\+ Siri works well for TV and movies \n\- No 4K support");
224+
$this->html_gives_markdown('<p>You forgot the &lt;!--more--&gt; tag!</p>', 'You forgot the \<!--more--> tag!');
224225
}
225226
}

0 commit comments

Comments
 (0)