Convert any URLs in text into clickable links. Work on http://, https://, www. links, and also on links ending in one of the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org). Links can have trailing punctuation (periods, commas, close-parens) and leading punc
| 287 | |
| 288 | |
| 289 | class Urlizer: |
| 290 | """ |
| 291 | Convert any URLs in text into clickable links. |
| 292 | |
| 293 | Work on http://, https://, www. links, and also on links ending in one of |
| 294 | the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org). |
| 295 | Links can have trailing punctuation (periods, commas, close-parens) and |
| 296 | leading punctuation (opening parens) and it'll still do the right thing. |
| 297 | """ |
| 298 | |
| 299 | trailing_punctuation_chars = ".,:;!" |
| 300 | wrapping_punctuation = [("(", ")"), ("[", "]")] |
| 301 | |
| 302 | simple_url_re = _lazy_re_compile(r"^https?://\[?\w", re.IGNORECASE) |
| 303 | simple_url_2_re = _lazy_re_compile( |
| 304 | rf"^www\.|^(?!http)(?:{DomainNameValidator.hostname_re})" |
| 305 | rf"(?:{DomainNameValidator.domain_re})" |
| 306 | r"\.(com|edu|gov|int|mil|net|org)($|/.*)$", |
| 307 | re.IGNORECASE, |
| 308 | ) |
| 309 | word_split_re = _lazy_re_compile(r"""([\s<>"']+)""") |
| 310 | |
| 311 | mailto_template = "mailto:{local}@{domain}" |
| 312 | url_template = '<a href="{href}"{attrs}>{url}</a>' |
| 313 | |
| 314 | def __call__(self, text, trim_url_limit=None, nofollow=False, autoescape=False): |
| 315 | """ |
| 316 | If trim_url_limit is not None, truncate the URLs in the link text |
| 317 | longer than this limit to trim_url_limit - 1 characters and append an |
| 318 | ellipsis. |
| 319 | |
| 320 | If nofollow is True, give the links a rel="nofollow" attribute. |
| 321 | |
| 322 | If autoescape is True, autoescape the link text and URLs. |
| 323 | """ |
| 324 | safe_input = isinstance(text, SafeData) |
| 325 | |
| 326 | words = self.word_split_re.split(str(text)) |
| 327 | local_cache = {} |
| 328 | urlized_words = [] |
| 329 | for word in words: |
| 330 | if (urlized_word := local_cache.get(word)) is None: |
| 331 | urlized_word = self.handle_word( |
| 332 | word, |
| 333 | safe_input=safe_input, |
| 334 | trim_url_limit=trim_url_limit, |
| 335 | nofollow=nofollow, |
| 336 | autoescape=autoescape, |
| 337 | ) |
| 338 | local_cache[word] = urlized_word |
| 339 | urlized_words.append(urlized_word) |
| 340 | return "".join(urlized_words) |
| 341 | |
| 342 | def handle_word( |
| 343 | self, |
| 344 | word, |
| 345 | *, |
| 346 | safe_input, |
no test coverage detected