This method contains the algorithm that StringSplitter uses to determine which character to split each string at. Args: @string: The substring that we are attempting to split. @max_break_idx: The ideal break index. We will return this value if it
(self, string: str, max_break_idx: int)
| 1797 | return illegal_indices |
| 1798 | |
| 1799 | def _get_break_idx(self, string: str, max_break_idx: int) -> int | None: |
| 1800 | """ |
| 1801 | This method contains the algorithm that StringSplitter uses to |
| 1802 | determine which character to split each string at. |
| 1803 | |
| 1804 | Args: |
| 1805 | @string: The substring that we are attempting to split. |
| 1806 | @max_break_idx: The ideal break index. We will return this value if it |
| 1807 | meets all the necessary conditions. In the likely event that it |
| 1808 | doesn't we will try to find the closest index BELOW @max_break_idx |
| 1809 | that does. If that fails, we will expand our search by also |
| 1810 | considering all valid indices ABOVE @max_break_idx. |
| 1811 | |
| 1812 | Pre-Conditions: |
| 1813 | * assert_is_leaf_string(@string) |
| 1814 | * 0 <= @max_break_idx < len(@string) |
| 1815 | |
| 1816 | Returns: |
| 1817 | break_idx, if an index is able to be found that meets all of the |
| 1818 | conditions listed in the 'Transformations' section of this classes' |
| 1819 | docstring. |
| 1820 | OR |
| 1821 | None, otherwise. |
| 1822 | """ |
| 1823 | is_valid_index = is_valid_index_factory(string) |
| 1824 | |
| 1825 | assert is_valid_index(max_break_idx) |
| 1826 | assert_is_leaf_string(string) |
| 1827 | |
| 1828 | _illegal_split_indices = self._get_illegal_split_indices(string) |
| 1829 | |
| 1830 | def breaks_unsplittable_expression(i: Index) -> bool: |
| 1831 | """ |
| 1832 | Returns: |
| 1833 | True iff returning @i would result in the splitting of an |
| 1834 | unsplittable expression (which is NOT allowed). |
| 1835 | """ |
| 1836 | return i in _illegal_split_indices |
| 1837 | |
| 1838 | def passes_all_checks(i: Index) -> bool: |
| 1839 | """ |
| 1840 | Returns: |
| 1841 | True iff ALL of the conditions listed in the 'Transformations' |
| 1842 | section of this classes' docstring would be met by returning @i. |
| 1843 | """ |
| 1844 | is_space = string[i] == " " |
| 1845 | is_split_safe = is_valid_index(i - 1) and string[i - 1] in SPLIT_SAFE_CHARS |
| 1846 | |
| 1847 | is_not_escaped = True |
| 1848 | j = i - 1 |
| 1849 | while is_valid_index(j) and string[j] == "\\": |
| 1850 | is_not_escaped = not is_not_escaped |
| 1851 | j -= 1 |
| 1852 | |
| 1853 | is_big_enough = ( |
| 1854 | len(string[i:]) >= self.MIN_SUBSTR_SIZE |
| 1855 | and len(string[:i]) >= self.MIN_SUBSTR_SIZE |
| 1856 | ) |
no test coverage detected