Trim trailing and wrapping punctuation from `word`. Return the items of the new state.
(self, word)
| 427 | return ";" in self.trailing_punctuation_chars |
| 428 | |
| 429 | def trim_punctuation(self, word): |
| 430 | """ |
| 431 | Trim trailing and wrapping punctuation from `word`. Return the items of |
| 432 | the new state. |
| 433 | """ |
| 434 | # Strip all opening wrapping punctuation. |
| 435 | middle = word.lstrip(self.wrapping_punctuation_openings) |
| 436 | lead = word[: len(word) - len(middle)] |
| 437 | trail = deque() |
| 438 | |
| 439 | # Continue trimming until middle remains unchanged. |
| 440 | trimmed_something = True |
| 441 | counts = CountsDict(word=middle) |
| 442 | while trimmed_something and middle: |
| 443 | trimmed_something = False |
| 444 | # Trim wrapping punctuation. |
| 445 | for opening, closing in self.wrapping_punctuation: |
| 446 | if counts[opening] < counts[closing]: |
| 447 | rstripped = middle.rstrip(closing) |
| 448 | if rstripped != middle: |
| 449 | strip = counts[closing] - counts[opening] |
| 450 | trail.appendleft(middle[-strip:]) |
| 451 | middle = middle[:-strip] |
| 452 | trimmed_something = True |
| 453 | counts[closing] -= strip |
| 454 | |
| 455 | amp = middle.rfind("&") |
| 456 | if amp == -1: |
| 457 | rstripped = middle.rstrip(self.trailing_punctuation_chars) |
| 458 | else: |
| 459 | rstripped = middle.rstrip(self.trailing_punctuation_chars_no_semicolon) |
| 460 | if rstripped != middle: |
| 461 | trail.appendleft(middle[len(rstripped) :]) |
| 462 | middle = rstripped |
| 463 | trimmed_something = True |
| 464 | |
| 465 | if self.trailing_punctuation_chars_has_semicolon and middle.endswith(";"): |
| 466 | # Only strip if not part of an HTML entity. |
| 467 | potential_entity = middle[amp:] |
| 468 | escaped = html.unescape(potential_entity) |
| 469 | if escaped == potential_entity or escaped.endswith(";"): |
| 470 | rstripped = middle.rstrip(self.trailing_punctuation_chars) |
| 471 | trail_start = len(rstripped) |
| 472 | amount_trailing_semicolons = len(middle) - len(middle.rstrip(";")) |
| 473 | if amp > -1 and amount_trailing_semicolons > 1: |
| 474 | # Leave up to most recent semicolon as might be an |
| 475 | # entity. |
| 476 | recent_semicolon = middle[trail_start:].index(";") |
| 477 | middle_semicolon_index = recent_semicolon + trail_start + 1 |
| 478 | trail.appendleft(middle[middle_semicolon_index:]) |
| 479 | middle = rstripped + middle[trail_start:middle_semicolon_index] |
| 480 | else: |
| 481 | trail.appendleft(middle[trail_start:]) |
| 482 | middle = rstripped |
| 483 | trimmed_something = True |
| 484 | |
| 485 | trail = "".join(trail) |
| 486 | return lead, middle, trail |
no test coverage detected