Make the regex dot match any character at all, including a newline, when cleaning HTML strings (so multiline tag bodies are allowed, as they are in Telegram HTML markdown).

This commit is contained in:
davte 2022-12-26 12:13:53 +01:00 committed by davte
parent fafa639328
commit 27f9d62cf9
Signed by: Davte
GPG Key ID: 70336F92E6814706
2 changed files with 3 additions and 2 deletions

View File

@ -11,7 +11,7 @@ __author__ = "Davide Testa"
__email__ = "davide@davte.it"
__credits__ = ["Marco Origlia", "Nick Lee @Nickoala"]
__license__ = "GNU General Public License v3.0"
__version__ = "2.8.10"
__version__ = "2.8.11"
__maintainer__ = "Davide Testa"
__contact__ = "t.me/davte"

View File

@ -1450,7 +1450,8 @@ def clean_html_string(text: str) -> str:
rf'(?P<opening><{tag}{attribute}>)'
rf'(?P<body>.*?)'
rf'(?P<close></{tag}>)',
text
text,
flags=re.DOTALL
)
if match and (first_match is None or match.start() < first_match.start()):
first_match = match