diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e305772298d3..57f92ce941d9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ repos: - id: auto-walrus - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.14.3 + rev: v0.14.7 hooks: - id: ruff-check - id: ruff-format @@ -32,7 +32,7 @@ repos: - tomli - repo: https://github.com/tox-dev/pyproject-fmt - rev: v2.11.0 + rev: v2.11.1 hooks: - id: pyproject-fmt @@ -50,7 +50,7 @@ repos: - id: validate-pyproject - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.18.2 + rev: v1.19.0 hooks: - id: mypy args: diff --git a/searches/binary_search.py b/searches/binary_search.py index 2e66b672d5b4..5125dc6bdb9a 100644 --- a/searches/binary_search.py +++ b/searches/binary_search.py @@ -243,6 +243,81 @@ def binary_search_std_lib(sorted_collection: list[int], item: int) -> int: return -1 +def binary_search_with_duplicates(sorted_collection: list[int], item: int) -> list[int]: + """Pure implementation of a binary search algorithm in Python that supports + duplicates. + + Resources used: + https://stackoverflow.com/questions/13197552/using-binary-search-with-sorted-array-with-duplicates + + The collection must be sorted in ascending order; otherwise the result will be + unpredictable. If the target appears multiple times, this function returns a + list of all indexes where the target occurs. If the target is not found, + this function returns an empty list. + + :param sorted_collection: some ascending sorted collection with comparable items + :param item: item value to search for + :return: a list of indexes where the item is found (empty list if not found) + + Examples: + >>> binary_search_with_duplicates([0, 5, 7, 10, 15], 0) + [0] + >>> binary_search_with_duplicates([0, 5, 7, 10, 15], 15) + [4] + >>> binary_search_with_duplicates([1, 2, 2, 2, 3], 2) + [1, 2, 3] + >>> binary_search_with_duplicates([1, 2, 2, 2, 3], 4) + [] + """ + if list(sorted_collection) != sorted(sorted_collection): + raise ValueError("sorted_collection must be sorted in ascending order") + + def lower_bound(sorted_collection: list[int], item: int) -> int: + """ + Returns the index of the first element greater than or equal to the item. + + :param sorted_collection: The sorted list to search. + :param item: The item to find the lower bound for. + :return: The index where the item can be inserted while maintaining order. + """ + left = 0 + right = len(sorted_collection) + while left < right: + midpoint = left + (right - left) // 2 + current_item = sorted_collection[midpoint] + if current_item < item: + left = midpoint + 1 + else: + right = midpoint + return left + + def upper_bound(sorted_collection: list[int], item: int) -> int: + """ + Returns the index of the first element strictly greater than the item. + + :param sorted_collection: The sorted list to search. + :param item: The item to find the upper bound for. + :return: The index where the item can be inserted after all existing instances. + """ + left = 0 + right = len(sorted_collection) + while left < right: + midpoint = left + (right - left) // 2 + current_item = sorted_collection[midpoint] + if current_item <= item: + left = midpoint + 1 + else: + right = midpoint + return left + + left = lower_bound(sorted_collection, item) + right = upper_bound(sorted_collection, item) + + if left == len(sorted_collection) or sorted_collection[left] != item: + return [] + return list(range(left, right)) + + def binary_search_by_recursion( sorted_collection: list[int], item: int, left: int = 0, right: int = -1 ) -> int: