diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 731e3fad3b85..2bb8e1d69217 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -9,22 +9,25 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 - - uses: astral-sh/setup-uv@v6 + - run: sudo apt-get update && sudo apt-get install -y libhdf5-dev + - uses: actions/checkout@v6 + - uses: astral-sh/setup-uv@v7 with: enable-cache: true cache-dependency-glob: uv.lock - uses: actions/setup-python@v6 with: - python-version: 3.x + python-version: 3.14 allow-prereleases: true - run: uv sync --group=test - name: Run tests # TODO: #8818 Re-enable quantum tests - run: uv run pytest + run: uv run --with=pytest-run-parallel pytest + --iterations=8 --parallel-threads=auto --ignore=computer_vision/cnn_classification.py --ignore=docs/conf.py --ignore=dynamic_programming/k_means_clustering_tensorflow.py + --ignore=machine_learning/local_weighted_learning/local_weighted_learning.py --ignore=machine_learning/lstm/lstm_prediction.py --ignore=neural_network/input_data.py --ignore=project_euler/ diff --git a/.github/workflows/devcontainer_ci.yml b/.github/workflows/devcontainer_ci.yml index 71623e5e6e69..d1b81593866f 100644 --- a/.github/workflows/devcontainer_ci.yml +++ b/.github/workflows/devcontainer_ci.yml @@ -12,7 +12,7 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: devcontainers/ci@v0.3 with: push: never diff --git a/.github/workflows/directory_writer.yml b/.github/workflows/directory_writer.yml index 9a4682677c00..deffbe9e364f 100644 --- a/.github/workflows/directory_writer.yml +++ b/.github/workflows/directory_writer.yml @@ -6,12 +6,13 @@ jobs: directory_writer: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 with: fetch-depth: 0 - uses: actions/setup-python@v6 with: - python-version: 3.x + python-version: 3.14 + allow-prereleases: true - name: Write DIRECTORY.md run: | scripts/build_directory_md.py 2>&1 | tee DIRECTORY.md diff --git a/.github/workflows/project_euler.yml b/.github/workflows/project_euler.yml index f52ff280b29a..591b2163cc1a 100644 --- a/.github/workflows/project_euler.yml +++ b/.github/workflows/project_euler.yml @@ -14,21 +14,37 @@ jobs: project-euler: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 - - uses: astral-sh/setup-uv@v6 + - run: + sudo apt-get update && sudo apt-get install -y libtiff5-dev libjpeg8-dev libopenjp2-7-dev + zlib1g-dev libfreetype6-dev liblcms2-dev libwebp-dev tcl8.6-dev tk8.6-dev python3-tk + libharfbuzz-dev libfribidi-dev libxcb1-dev + libxml2-dev libxslt-dev + libhdf5-dev + libopenblas-dev + - uses: actions/checkout@v6 + - uses: astral-sh/setup-uv@v7 - uses: actions/setup-python@v6 with: - python-version: 3.x + python-version: 3.14 + allow-prereleases: true - run: uv sync --group=euler-validate --group=test - run: uv run pytest --doctest-modules --cov-report=term-missing:skip-covered --cov=project_euler/ project_euler/ validate-solutions: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 - - uses: astral-sh/setup-uv@v6 + - run: + sudo apt-get update && sudo apt-get install -y libtiff5-dev libjpeg8-dev libopenjp2-7-dev + zlib1g-dev libfreetype6-dev liblcms2-dev libwebp-dev tcl8.6-dev tk8.6-dev python3-tk + libharfbuzz-dev libfribidi-dev libxcb1-dev + libxml2-dev libxslt-dev + libhdf5-dev + libopenblas-dev + - uses: actions/checkout@v6 + - uses: astral-sh/setup-uv@v7 - uses: actions/setup-python@v6 with: - python-version: 3.x + python-version: 3.14 + allow-prereleases: true - run: uv sync --group=euler-validate --group=test - run: uv run pytest scripts/validate_solutions.py env: diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml index 7bcc2850782f..13df19c8d743 100644 --- a/.github/workflows/ruff.yml +++ b/.github/workflows/ruff.yml @@ -11,6 +11,6 @@ jobs: ruff: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 - - uses: astral-sh/setup-uv@v6 + - uses: actions/checkout@v6 + - uses: astral-sh/setup-uv@v7 - run: uvx ruff check --output-format=github . diff --git a/.github/workflows/sphinx.yml b/.github/workflows/sphinx.yml index bd253dc3de65..bf0a74a239c8 100644 --- a/.github/workflows/sphinx.yml +++ b/.github/workflows/sphinx.yml @@ -25,11 +25,18 @@ jobs: build_docs: runs-on: ubuntu-24.04-arm steps: - - uses: actions/checkout@v5 - - uses: astral-sh/setup-uv@v6 + - run: + sudo apt-get update && sudo apt-get install -y libtiff5-dev libjpeg8-dev libopenjp2-7-dev + zlib1g-dev libfreetype6-dev liblcms2-dev libwebp-dev tcl8.6-dev tk8.6-dev python3-tk + libharfbuzz-dev libfribidi-dev libxcb1-dev + libxml2-dev libxslt-dev + libhdf5-dev + libopenblas-dev + - uses: actions/checkout@v6 + - uses: astral-sh/setup-uv@v7 - uses: actions/setup-python@v6 with: - python-version: 3.13 + python-version: 3.14 allow-prereleases: true - run: uv sync --group=docs - uses: actions/configure-pages@v5 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c30442a2a6f6..57f92ce941d9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,3 +1,6 @@ +ci: + autoupdate_schedule: monthly + repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v6.0.0 @@ -16,7 +19,7 @@ repos: - id: auto-walrus - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.12.12 + rev: v0.14.7 hooks: - id: ruff-check - id: ruff-format @@ -29,7 +32,7 @@ repos: - tomli - repo: https://github.com/tox-dev/pyproject-fmt - rev: v2.6.0 + rev: v2.11.1 hooks: - id: pyproject-fmt @@ -47,7 +50,7 @@ repos: - id: validate-pyproject - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.17.1 + rev: v1.19.0 hooks: - id: mypy args: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3df39f95b784..35de0bf75ed5 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -99,7 +99,7 @@ We want your work to be readable by others; therefore, we encourage you to note ruff check ``` -- Original code submission require docstrings or comments to describe your work. +- Original code submissions require docstrings or comments to describe your work. - More on docstrings and comments: diff --git a/DIRECTORY.md b/DIRECTORY.md index 36acb3b97f1e..0f9859577493 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -195,6 +195,7 @@ * [Permutations](data_structures/arrays/permutations.py) * [Prefix Sum](data_structures/arrays/prefix_sum.py) * [Product Sum](data_structures/arrays/product_sum.py) + * [Rotate Array](data_structures/arrays/rotate_array.py) * [Sparse Table](data_structures/arrays/sparse_table.py) * [Sudoku Solver](data_structures/arrays/sudoku_solver.py) * Binary Tree @@ -623,6 +624,7 @@ * [Sequential Minimum Optimization](machine_learning/sequential_minimum_optimization.py) * [Similarity Search](machine_learning/similarity_search.py) * [Support Vector Machines](machine_learning/support_vector_machines.py) + * [T Stochastic Neighbour Embedding](machine_learning/t_stochastic_neighbour_embedding.py) * [Word Frequency Functions](machine_learning/word_frequency_functions.py) * [Xgboost Classifier](machine_learning/xgboost_classifier.py) * [Xgboost Regressor](machine_learning/xgboost_regressor.py) diff --git a/ciphers/gronsfeld_cipher.py b/ciphers/gronsfeld_cipher.py index 8fbeab4307fc..a72b141bd502 100644 --- a/ciphers/gronsfeld_cipher.py +++ b/ciphers/gronsfeld_cipher.py @@ -20,7 +20,7 @@ def gronsfeld(text: str, key: str) -> str: >>> gronsfeld('yes, ¥€$ - _!@#%?', '') Traceback (most recent call last): ... - ZeroDivisionError: integer modulo by zero + ZeroDivisionError: division by zero """ ascii_len = len(ascii_uppercase) key_len = len(key) diff --git a/data_structures/arrays/rotate_array.py b/data_structures/arrays/rotate_array.py new file mode 100644 index 000000000000..d5ce4b4078b3 --- /dev/null +++ b/data_structures/arrays/rotate_array.py @@ -0,0 +1,80 @@ +def rotate_array(arr: list[int], steps: int) -> list[int]: + """ + Rotates a list to the right by steps positions. + + Parameters: + arr (List[int]): The list of integers to rotate. + steps (int): Number of positions to rotate. Can be negative for left rotation. + + Returns: + List[int]: Rotated list. + + Examples: + >>> rotate_array([1, 2, 3, 4, 5], 2) + [4, 5, 1, 2, 3] + >>> rotate_array([1, 2, 3, 4, 5], -2) + [3, 4, 5, 1, 2] + >>> rotate_array([1, 2, 3, 4, 5], 7) + [4, 5, 1, 2, 3] + >>> rotate_array([], 3) + [] + """ + + n = len(arr) + if n == 0: + return arr + + steps = steps % n + + if steps < 0: + steps += n + + def reverse(start: int, end: int) -> None: + """ + Reverses a portion of the list in place from index start to end. + + Parameters: + start (int): Starting index of the portion to reverse. + end (int): Ending index of the portion to reverse. + + Returns: + None + + Examples: + >>> example = [1, 2, 3, 4, 5] + >>> def reverse_test(arr, start, end): + ... while start < end: + ... arr[start], arr[end] = arr[end], arr[start] + ... start += 1 + ... end -= 1 + >>> reverse_test(example, 0, 2) + >>> example + [3, 2, 1, 4, 5] + >>> reverse_test(example, 2, 4) + >>> example + [3, 2, 5, 4, 1] + """ + + while start < end: + arr[start], arr[end] = arr[end], arr[start] + start += 1 + end -= 1 + + reverse(0, n - 1) + reverse(0, steps - 1) + reverse(steps, n - 1) + + return arr + + +if __name__ == "__main__": + examples = [ + ([1, 2, 3, 4, 5], 2), + ([1, 2, 3, 4, 5], -2), + ([1, 2, 3, 4, 5], 7), + ([], 3), + ] + + for arr, steps in examples: + rotated = rotate_array(arr.copy(), steps) + print(f"Rotate {arr} by {steps}: {rotated}") diff --git a/data_structures/arrays/sudoku_solver.py b/data_structures/arrays/sudoku_solver.py index 4c722f12fd6e..d2fa43bbf298 100644 --- a/data_structures/arrays/sudoku_solver.py +++ b/data_structures/arrays/sudoku_solver.py @@ -11,6 +11,19 @@ def cross(items_a, items_b): """ Cross product of elements in A and elements in B. + + >>> cross('AB', '12') + ['A1', 'A2', 'B1', 'B2'] + >>> cross('ABC', '123') + ['A1', 'A2', 'A3', 'B1', 'B2', 'B3', 'C1', 'C2', 'C3'] + >>> cross('ABC', '1234') + ['A1', 'A2', 'A3', 'A4', 'B1', 'B2', 'B3', 'B4', 'C1', 'C2', 'C3', 'C4'] + >>> cross('', '12') + [] + >>> cross('A', '') + [] + >>> cross('', '') + [] """ return [a + b for a in items_a for b in items_b] @@ -149,7 +162,7 @@ def search(values): if all(len(values[s]) == 1 for s in squares): return values ## Solved! ## Chose the unfilled square s with the fewest possibilities - n, s = min((len(values[s]), s) for s in squares if len(values[s]) > 1) + _n, s = min((len(values[s]), s) for s in squares if len(values[s]) > 1) return some(search(assign(values.copy(), s, d)) for d in values[s]) diff --git a/data_structures/queues/circular_queue.py b/data_structures/queues/circular_queue.py index efbf1efdc42d..e9cb2cac4fd8 100644 --- a/data_structures/queues/circular_queue.py +++ b/data_structures/queues/circular_queue.py @@ -17,7 +17,7 @@ def __len__(self) -> int: >>> len(cq) 0 >>> cq.enqueue("A") # doctest: +ELLIPSIS - >>> cq.array ['A', None, None, None, None] >>> len(cq) @@ -51,17 +51,24 @@ def enqueue(self, data): """ This function inserts an element at the end of the queue using self.rear value as an index. + >>> cq = CircularQueue(5) >>> cq.enqueue("A") # doctest: +ELLIPSIS - >>> (cq.size, cq.first()) (1, 'A') >>> cq.enqueue("B") # doctest: +ELLIPSIS - >>> cq.array ['A', 'B', None, None, None] >>> (cq.size, cq.first()) (2, 'A') + >>> cq.enqueue("C").enqueue("D").enqueue("E") # doctest: +ELLIPSIS + + >>> cq.enqueue("F") + Traceback (most recent call last): + ... + Exception: QUEUE IS FULL """ if self.size >= self.n: raise Exception("QUEUE IS FULL") @@ -75,6 +82,7 @@ def dequeue(self): """ This function removes an element from the queue using on self.front value as an index and returns it + >>> cq = CircularQueue(5) >>> cq.dequeue() Traceback (most recent call last): diff --git a/data_structures/trie/radix_tree.py b/data_structures/trie/radix_tree.py index caf566a6ce30..bd2306befa79 100644 --- a/data_structures/trie/radix_tree.py +++ b/data_structures/trie/radix_tree.py @@ -115,7 +115,7 @@ def find(self, word: str) -> bool: if not incoming_node: return False else: - matching_string, remaining_prefix, remaining_word = incoming_node.match( + _matching_string, remaining_prefix, remaining_word = incoming_node.match( word ) # If there is remaining prefix, the word can't be on the tree @@ -144,7 +144,7 @@ def delete(self, word: str) -> bool: if not incoming_node: return False else: - matching_string, remaining_prefix, remaining_word = incoming_node.match( + _matching_string, remaining_prefix, remaining_word = incoming_node.match( word ) # If there is remaining prefix, the word can't be on the tree diff --git a/graphs/graph_adjacency_list.py b/graphs/graph_adjacency_list.py index 244e59e0e1bf..34014d69dfb8 100644 --- a/graphs/graph_adjacency_list.py +++ b/graphs/graph_adjacency_list.py @@ -61,6 +61,15 @@ def add_vertex(self, vertex: T) -> None: """ Adds a vertex to the graph. If the given vertex already exists, a ValueError will be thrown. + + >>> g = GraphAdjacencyList(vertices=[], edges=[], directed=False) + >>> g.add_vertex("A") + >>> g.adj_list + {'A': []} + >>> g.add_vertex("A") + Traceback (most recent call last): + ... + ValueError: Incorrect input: A is already in the graph. """ if self.contains_vertex(vertex): msg = f"Incorrect input: {vertex} is already in the graph." @@ -448,7 +457,7 @@ def test_remove_edge(self) -> None: ( undirected_graph, directed_graph, - random_vertices, + _random_vertices, random_edges, ) = self.__generate_graphs(20, 0, 100, 4) @@ -502,7 +511,7 @@ def test_add_vertex_exception_check(self) -> None: undirected_graph, directed_graph, random_vertices, - random_edges, + _random_edges, ) = self.__generate_graphs(20, 0, 100, 4) for vertex in random_vertices: @@ -516,7 +525,7 @@ def test_remove_vertex_exception_check(self) -> None: undirected_graph, directed_graph, random_vertices, - random_edges, + _random_edges, ) = self.__generate_graphs(20, 0, 100, 4) for i in range(101): @@ -530,7 +539,7 @@ def test_add_edge_exception_check(self) -> None: ( undirected_graph, directed_graph, - random_vertices, + _random_vertices, random_edges, ) = self.__generate_graphs(20, 0, 100, 4) @@ -569,7 +578,7 @@ def test_contains_edge_exception_check(self) -> None: undirected_graph, directed_graph, random_vertices, - random_edges, + _random_edges, ) = self.__generate_graphs(20, 0, 100, 4) for vertex in random_vertices: diff --git a/graphs/graph_adjacency_matrix.py b/graphs/graph_adjacency_matrix.py index 8eeeae786513..6dca0fbbcf05 100644 --- a/graphs/graph_adjacency_matrix.py +++ b/graphs/graph_adjacency_matrix.py @@ -469,7 +469,7 @@ def test_remove_edge(self) -> None: ( undirected_graph, directed_graph, - random_vertices, + _random_vertices, random_edges, ) = self.__generate_graphs(20, 0, 100, 4) @@ -523,7 +523,7 @@ def test_add_vertex_exception_check(self) -> None: undirected_graph, directed_graph, random_vertices, - random_edges, + _random_edges, ) = self.__generate_graphs(20, 0, 100, 4) for vertex in random_vertices: @@ -537,7 +537,7 @@ def test_remove_vertex_exception_check(self) -> None: undirected_graph, directed_graph, random_vertices, - random_edges, + _random_edges, ) = self.__generate_graphs(20, 0, 100, 4) for i in range(101): @@ -551,7 +551,7 @@ def test_add_edge_exception_check(self) -> None: ( undirected_graph, directed_graph, - random_vertices, + _random_vertices, random_edges, ) = self.__generate_graphs(20, 0, 100, 4) @@ -590,7 +590,7 @@ def test_contains_edge_exception_check(self) -> None: undirected_graph, directed_graph, random_vertices, - random_edges, + _random_edges, ) = self.__generate_graphs(20, 0, 100, 4) for vertex in random_vertices: diff --git a/knapsack/tests/test_greedy_knapsack.py b/knapsack/tests/test_greedy_knapsack.py index e6a40084109e..7ebaddd3c99e 100644 --- a/knapsack/tests/test_greedy_knapsack.py +++ b/knapsack/tests/test_greedy_knapsack.py @@ -28,7 +28,7 @@ def test_negative_max_weight(self): # profit = [10, 20, 30, 40, 50, 60] # weight = [2, 4, 6, 8, 10, 12] # max_weight = -15 - pytest.raises(ValueError, match="max_weight must greater than zero.") + pytest.raises(ValueError, match=r"max_weight must greater than zero.") def test_negative_profit_value(self): """ @@ -38,7 +38,7 @@ def test_negative_profit_value(self): # profit = [10, -20, 30, 40, 50, 60] # weight = [2, 4, 6, 8, 10, 12] # max_weight = 15 - pytest.raises(ValueError, match="Weight can not be negative.") + pytest.raises(ValueError, match=r"Weight can not be negative.") def test_negative_weight_value(self): """ @@ -48,7 +48,7 @@ def test_negative_weight_value(self): # profit = [10, 20, 30, 40, 50, 60] # weight = [2, -4, 6, -8, 10, 12] # max_weight = 15 - pytest.raises(ValueError, match="Profit can not be negative.") + pytest.raises(ValueError, match=r"Profit can not be negative.") def test_null_max_weight(self): """ @@ -58,7 +58,7 @@ def test_null_max_weight(self): # profit = [10, 20, 30, 40, 50, 60] # weight = [2, 4, 6, 8, 10, 12] # max_weight = null - pytest.raises(ValueError, match="max_weight must greater than zero.") + pytest.raises(ValueError, match=r"max_weight must greater than zero.") def test_unequal_list_length(self): """ @@ -68,7 +68,9 @@ def test_unequal_list_length(self): # profit = [10, 20, 30, 40, 50] # weight = [2, 4, 6, 8, 10, 12] # max_weight = 100 - pytest.raises(IndexError, match="The length of profit and weight must be same.") + pytest.raises( + IndexError, match=r"The length of profit and weight must be same." + ) if __name__ == "__main__": diff --git a/linear_algebra/gaussian_elimination.py b/linear_algebra/gaussian_elimination.py index 6f4075b710fd..cf816940b0d1 100644 --- a/linear_algebra/gaussian_elimination.py +++ b/linear_algebra/gaussian_elimination.py @@ -33,7 +33,7 @@ def retroactive_resolution( [ 0.5]]) """ - rows, columns = np.shape(coefficients) + rows, _columns = np.shape(coefficients) x: NDArray[float64] = np.zeros((rows, 1), dtype=float) for row in reversed(range(rows)): diff --git a/linear_algebra/jacobi_iteration_method.py b/linear_algebra/jacobi_iteration_method.py index 2cc9c103018b..0f9fcde7af6c 100644 --- a/linear_algebra/jacobi_iteration_method.py +++ b/linear_algebra/jacobi_iteration_method.py @@ -112,7 +112,7 @@ def jacobi_iteration_method( (coefficient_matrix, constant_matrix), axis=1 ) - rows, cols = table.shape + rows, _cols = table.shape strictly_diagonally_dominant(table) @@ -149,7 +149,7 @@ def jacobi_iteration_method( # Here we get 'i_col' - these are the column numbers, for each row # without diagonal elements, except for the last column. - i_row, i_col = np.where(masks) + _i_row, i_col = np.where(masks) ind = i_col.reshape(-1, rows - 1) #'i_col' is converted to a two-dimensional list 'ind', which will be diff --git a/machine_learning/apriori_algorithm.py b/machine_learning/apriori_algorithm.py index 09a89ac236bd..5c3e2baba2c2 100644 --- a/machine_learning/apriori_algorithm.py +++ b/machine_learning/apriori_algorithm.py @@ -11,6 +11,7 @@ Examples: https://www.kaggle.com/code/earthian/apriori-association-rules-mining """ +from collections import Counter from itertools import combinations @@ -44,11 +45,16 @@ def prune(itemset: list, candidates: list, length: int) -> list: >>> prune(itemset, candidates, 3) [] """ + itemset_counter = Counter(tuple(item) for item in itemset) pruned = [] for candidate in candidates: is_subsequence = True for item in candidate: - if item not in itemset or itemset.count(item) < length - 1: + item_tuple = tuple(item) + if ( + item_tuple not in itemset_counter + or itemset_counter[item_tuple] < length - 1 + ): is_subsequence = False break if is_subsequence: diff --git a/machine_learning/decision_tree.py b/machine_learning/decision_tree.py index 72970431c3fc..b4df64796bb1 100644 --- a/machine_learning/decision_tree.py +++ b/machine_learning/decision_tree.py @@ -146,14 +146,13 @@ def predict(self, x): """ if self.prediction is not None: return self.prediction - elif self.left or self.right is not None: + elif self.left is not None and self.right is not None: if x >= self.decision_boundary: return self.right.predict(x) else: return self.left.predict(x) else: - print("Error: Decision tree not yet trained") - return None + raise ValueError("Decision tree not yet trained") class TestDecisionTree: @@ -201,4 +200,4 @@ def main(): main() import doctest - doctest.testmod(name="mean_squarred_error", verbose=True) + doctest.testmod(name="mean_squared_error", verbose=True) diff --git a/machine_learning/polynomial_regression.py b/machine_learning/polynomial_regression.py index 212f40bea197..f52177df1292 100644 --- a/machine_learning/polynomial_regression.py +++ b/machine_learning/polynomial_regression.py @@ -93,7 +93,7 @@ def _design_matrix(data: np.ndarray, degree: int) -> np.ndarray: ... ValueError: Data must have dimensions N x 1 """ - rows, *remaining = data.shape + _rows, *remaining = data.shape if remaining: raise ValueError("Data must have dimensions N x 1") diff --git a/machine_learning/principle_component_analysis.py b/machine_learning/principle_component_analysis.py index 46ccdb968494..174500d89620 100644 --- a/machine_learning/principle_component_analysis.py +++ b/machine_learning/principle_component_analysis.py @@ -65,7 +65,7 @@ def main() -> None: """ Driver function to execute PCA and display results. """ - data_x, data_y = collect_dataset() + data_x, _data_y = collect_dataset() # Number of principal components to retain n_components = 2 diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py new file mode 100644 index 000000000000..d6f630149087 --- /dev/null +++ b/machine_learning/t_stochastic_neighbour_embedding.py @@ -0,0 +1,178 @@ +""" +t-distributed stochastic neighbor embedding (t-SNE) + +For more details, see: +https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding +""" + +import doctest + +import numpy as np +from numpy import ndarray +from sklearn.datasets import load_iris + + +def collect_dataset() -> tuple[ndarray, ndarray]: + """ + Load the Iris dataset and return features and labels. + + Returns: + tuple[ndarray, ndarray]: Feature matrix and target labels. + + >>> features, targets = collect_dataset() + >>> features.shape + (150, 4) + >>> targets.shape + (150,) + """ + iris_dataset = load_iris() + return np.array(iris_dataset.data), np.array(iris_dataset.target) + + +def compute_pairwise_affinities(data_matrix: ndarray, sigma: float = 1.0) -> ndarray: + """ + Compute high-dimensional affinities (P matrix) using a Gaussian kernel. + + Args: + data_matrix: Input data of shape (n_samples, n_features). + sigma: Gaussian kernel bandwidth. + + Returns: + ndarray: Symmetrized probability matrix. + + >>> x = np.array([[0.0, 0.0], [1.0, 0.0]]) + >>> probabilities = compute_pairwise_affinities(x) + >>> float(round(probabilities[0, 1], 3)) + 0.25 + """ + n_samples = data_matrix.shape[0] + squared_sum = np.sum(np.square(data_matrix), axis=1) + squared_distance = np.add( + np.add(-2 * np.dot(data_matrix, data_matrix.T), squared_sum).T, squared_sum + ) + + affinity_matrix = np.exp(-squared_distance / (2 * sigma**2)) + np.fill_diagonal(affinity_matrix, 0) + + affinity_matrix /= np.sum(affinity_matrix) + return (affinity_matrix + affinity_matrix.T) / (2 * n_samples) + + +def compute_low_dim_affinities(embedding_matrix: ndarray) -> tuple[ndarray, ndarray]: + """ + Compute low-dimensional affinities (Q matrix) using a Student-t distribution. + + Args: + embedding_matrix: Low-dimensional embedding of shape (n_samples, n_components). + + Returns: + tuple[ndarray, ndarray]: (Q probability matrix, numerator matrix). + + >>> y = np.array([[0.0, 0.0], [1.0, 0.0]]) + >>> q_matrix, numerators = compute_low_dim_affinities(y) + >>> q_matrix.shape + (2, 2) + """ + squared_sum = np.sum(np.square(embedding_matrix), axis=1) + numerator_matrix = 1 / ( + 1 + + np.add( + np.add(-2 * np.dot(embedding_matrix, embedding_matrix.T), squared_sum).T, + squared_sum, + ) + ) + np.fill_diagonal(numerator_matrix, 0) + + q_matrix = numerator_matrix / np.sum(numerator_matrix) + return q_matrix, numerator_matrix + + +def apply_tsne( + data_matrix: ndarray, + n_components: int = 2, + learning_rate: float = 200.0, + n_iter: int = 500, +) -> ndarray: + """ + Apply t-SNE for dimensionality reduction. + + Args: + data_matrix: Original dataset (features). + n_components: Target dimension (2D or 3D). + learning_rate: Step size for gradient descent. + n_iter: Number of iterations. + + Returns: + ndarray: Low-dimensional embedding of the data. + + >>> features, _ = collect_dataset() + >>> embedding = apply_tsne(features, n_components=2, n_iter=50) + >>> embedding.shape + (150, 2) + """ + if n_components < 1 or n_iter < 1: + raise ValueError("n_components and n_iter must be >= 1") + + n_samples = data_matrix.shape[0] + rng = np.random.default_rng() + embedding = rng.standard_normal((n_samples, n_components)) * 1e-4 + + high_dim_affinities = compute_pairwise_affinities(data_matrix) + high_dim_affinities = np.maximum(high_dim_affinities, 1e-12) + + embedding_increment = np.zeros_like(embedding) + momentum = 0.5 + + for iteration in range(n_iter): + low_dim_affinities, numerator_matrix = compute_low_dim_affinities(embedding) + low_dim_affinities = np.maximum(low_dim_affinities, 1e-12) + + affinity_diff = high_dim_affinities - low_dim_affinities + + gradient = 4 * ( + np.dot((affinity_diff * numerator_matrix), embedding) + - np.multiply( + np.sum(affinity_diff * numerator_matrix, axis=1)[:, np.newaxis], + embedding, + ) + ) + + embedding_increment = momentum * embedding_increment - learning_rate * gradient + embedding += embedding_increment + + if iteration == int(n_iter / 4): + momentum = 0.8 + + return embedding + + +def main() -> None: + """ + Run t-SNE on the Iris dataset and display the first 5 embeddings. + + >>> main() # doctest: +ELLIPSIS + t-SNE embedding (first 5 points): + [[... + """ + features, _labels = collect_dataset() + embedding = apply_tsne(features, n_components=2, n_iter=300) + + if not isinstance(embedding, np.ndarray): + raise TypeError("t-SNE embedding must be an ndarray") + + print("t-SNE embedding (first 5 points):") + print(embedding[:5]) + + # Optional visualization (Ruff/mypy compliant) + + # import matplotlib.pyplot as plt + # plt.scatter(embedding[:, 0], embedding[:, 1], c=labels, cmap="viridis") + # plt.title("t-SNE Visualization of the Iris Dataset") + # plt.xlabel("Dimension 1") + # plt.ylabel("Dimension 2") + # plt.show() + + +if __name__ == "__main__": + doctest.testmod() + main() diff --git a/machine_learning/xgboost_classifier.py b/machine_learning/xgboost_classifier.py index 1da933cf690f..e845480074b9 100644 --- a/machine_learning/xgboost_classifier.py +++ b/machine_learning/xgboost_classifier.py @@ -42,8 +42,6 @@ def xgboost(features: np.ndarray, target: np.ndarray) -> XGBClassifier: def main() -> None: """ - >>> main() - Url for the algorithm: https://xgboost.readthedocs.io/en/stable/ Iris type dataset is used to demonstrate algorithm. diff --git a/maths/chinese_remainder_theorem.py b/maths/chinese_remainder_theorem.py index 18af63d106e8..b7a7712ae917 100644 --- a/maths/chinese_remainder_theorem.py +++ b/maths/chinese_remainder_theorem.py @@ -65,7 +65,7 @@ def invert_modulo(a: int, n: int) -> int: 1 """ - (b, x) = extended_euclid(a, n) + (b, _x) = extended_euclid(a, n) if b < 0: b = (b % n + n) % n return b diff --git a/maths/factorial.py b/maths/factorial.py index aaf90f384bb9..ba61447c7564 100644 --- a/maths/factorial.py +++ b/maths/factorial.py @@ -56,7 +56,7 @@ def factorial_recursive(n: int) -> int: raise ValueError("factorial() only accepts integral values") if n < 0: raise ValueError("factorial() not defined for negative values") - return 1 if n in {0, 1} else n * factorial(n - 1) + return 1 if n in {0, 1} else n * factorial_recursive(n - 1) if __name__ == "__main__": diff --git a/maths/fibonacci.py b/maths/fibonacci.py index 24b2d7ae449e..71ff479f9cc2 100644 --- a/maths/fibonacci.py +++ b/maths/fibonacci.py @@ -183,7 +183,7 @@ def fib_memoization(n: int) -> list[int]: """ if n < 0: raise ValueError("n is negative") - # Cache must be outside recursuive function + # Cache must be outside recursive function # other it will reset every time it calls itself. cache: dict[int, int] = {0: 0, 1: 1, 2: 1} # Prefilled cache diff --git a/maths/largest_of_very_large_numbers.py b/maths/largest_of_very_large_numbers.py index edee50371e02..e38ab2edb932 100644 --- a/maths/largest_of_very_large_numbers.py +++ b/maths/largest_of_very_large_numbers.py @@ -15,7 +15,7 @@ def res(x, y): >>> res(-1, 5) Traceback (most recent call last): ... - ValueError: math domain error + ValueError: expected a positive input """ if 0 not in (x, y): # We use the relation x^y = y*log10(x), where 10 is the base. diff --git a/maths/modular_division.py b/maths/modular_division.py index 2f8f4479b27d..94f12b3e096e 100644 --- a/maths/modular_division.py +++ b/maths/modular_division.py @@ -31,7 +31,7 @@ def modular_division(a: int, b: int, n: int) -> int: assert n > 1 assert a > 0 assert greatest_common_divisor(a, n) == 1 - (d, t, s) = extended_gcd(n, a) # Implemented below + (_d, _t, s) = extended_gcd(n, a) # Implemented below x = (b * s) % n return x @@ -47,7 +47,7 @@ def invert_modulo(a: int, n: int) -> int: 1 """ - (b, x) = extended_euclid(a, n) # Implemented below + (b, _x) = extended_euclid(a, n) # Implemented below if b < 0: b = (b % n + n) % n return b diff --git a/maths/monte_carlo.py b/maths/monte_carlo.py index d174a0b188a2..5eb176238ffb 100644 --- a/maths/monte_carlo.py +++ b/maths/monte_carlo.py @@ -8,7 +8,7 @@ from statistics import mean -def pi_estimator(iterations: int): +def pi_estimator(iterations: int) -> None: """ An implementation of the Monte Carlo method used to find pi. 1. Draw a 2x2 square centred at (0,0). diff --git a/maths/test_factorial.py b/maths/test_factorial.py index d80d88add745..1795ebba194f 100644 --- a/maths/test_factorial.py +++ b/maths/test_factorial.py @@ -33,5 +33,11 @@ def test_negative_number(function): function(-3) +@pytest.mark.parametrize("function", [factorial, factorial_recursive]) +def test_float_number(function): + with pytest.raises(ValueError): + function(1.5) + + if __name__ == "__main__": pytest.main(["-v", __file__]) diff --git a/maths/volume.py b/maths/volume.py index 08bdf72b013b..1715c9c300d5 100644 --- a/maths/volume.py +++ b/maths/volume.py @@ -555,7 +555,7 @@ def main(): print(f"Torus: {vol_torus(2, 2) = }") # ~= 157.9 print(f"Conical Frustum: {vol_conical_frustum(2, 2, 4) = }") # ~= 58.6 print(f"Spherical cap: {vol_spherical_cap(1, 2) = }") # ~= 5.24 - print(f"Spheres intersetion: {vol_spheres_intersect(2, 2, 1) = }") # ~= 21.21 + print(f"Spheres intersection: {vol_spheres_intersect(2, 2, 1) = }") # ~= 21.21 print(f"Spheres union: {vol_spheres_union(2, 2, 1) = }") # ~= 45.81 print( f"Hollow Circular Cylinder: {vol_hollow_circular_cylinder(1, 2, 3) = }" diff --git a/neural_network/convolution_neural_network.py b/neural_network/convolution_neural_network.py index d4ac360a98de..6b1aa50c7981 100644 --- a/neural_network/convolution_neural_network.py +++ b/neural_network/convolution_neural_network.py @@ -317,7 +317,7 @@ def predict(self, datas_test): print((" - - Shape: Test_Data ", np.shape(datas_test))) for p in range(len(datas_test)): data_test = np.asmatrix(datas_test[p]) - data_focus1, data_conved1 = self.convolute( + _data_focus1, data_conved1 = self.convolute( data_test, self.conv1, self.w_conv1, @@ -339,7 +339,7 @@ def predict(self, datas_test): def convolution(self, data): # return the data of image after convoluting process so we can check it out data_test = np.asmatrix(data) - data_focus1, data_conved1 = self.convolute( + _data_focus1, data_conved1 = self.convolute( data_test, self.conv1, self.w_conv1, diff --git a/project_euler/problem_551/sol1.py b/project_euler/problem_551/sol1.py index 100e9d41dd31..e13cf77a776d 100644 --- a/project_euler/problem_551/sol1.py +++ b/project_euler/problem_551/sol1.py @@ -185,7 +185,7 @@ def solution(n: int = 10**15) -> int: i = 1 dn = 0 while True: - diff, terms_jumped = next_term(digits, 20, i + dn, n) + _diff, terms_jumped = next_term(digits, 20, i + dn, n) dn += terms_jumped if dn == n - i: break diff --git a/pyproject.toml b/pyproject.toml index b680cc0d439e..f1559d6bc1b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,25 +3,27 @@ name = "thealgorithms-python" version = "0.0.1" description = "TheAlgorithms in Python" authors = [ { name = "TheAlgorithms Contributors" } ] -requires-python = ">=3.13" +requires-python = ">=3.14" classifiers = [ "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", ] dependencies = [ "beautifulsoup4>=4.12.3", + "cython>=3.1.2", "fake-useragent>=1.5.1", "httpx>=0.28.1", "imageio>=2.36.1", "keras>=3.7", - "lxml>=5.3", + "lxml>=6", "matplotlib>=3.9.3", "numpy>=2.1.3", "opencv-python>=4.10.0.84", "pandas>=2.2.3", - "pillow>=11", + "pillow>=11.3", "rich>=13.9.4", "scikit-learn>=1.5.2", + "scipy>=1.16.2", "sphinx-pyproject>=0.3", "statsmodels>=0.14.4", "sympy>=1.13.3", @@ -32,7 +34,7 @@ dependencies = [ [dependency-groups] test = [ - "pytest>=8.3.4", + "pytest>=8.4.1", "pytest-cov>=6", ] @@ -47,7 +49,7 @@ euler-validate = [ ] [tool.ruff] -target-version = "py313" +target-version = "py314" output-format = "full" lint.select = [ @@ -108,7 +110,7 @@ lint.ignore = [ # `ruff rule S101` for a description of that rule "B904", # Within an `except` clause, raise exceptions with `raise ... from err` -- FIX ME "B905", # `zip()` without an explicit `strict=` parameter -- FIX ME - "EM101", # Exception must not use a string literal, assign to variable first + "EM101", # Exception must not use a string literal, assign to a variable first "EXE001", # Shebang is present but file is not executable -- DO NOT FIX "G004", # Logging statement uses f-string "ISC001", # Conflicts with ruff format -- DO NOT FIX @@ -124,7 +126,7 @@ lint.ignore = [ "S311", # Standard pseudo-random generators are not suitable for cryptographic purposes -- FIX ME "SIM905", # Consider using a list literal instead of `str.split` -- DO NOT FIX "SLF001", # Private member accessed: `_Iterator` -- FIX ME - "UP038", # Use `X | Y` in `{}` call instead of `(X, Y)` -- DO NOT FIX + "UP037", # FIX ME ] lint.per-file-ignores."data_structures/hashing/tests/test_hash_map.py" = [ diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 66b5d8a6b94e..000000000000 --- a/requirements.txt +++ /dev/null @@ -1,19 +0,0 @@ -beautifulsoup4 -fake-useragent -httpx -imageio -keras -lxml -matplotlib -numpy -opencv-python -pandas -pillow -rich -scikit-learn -sphinx-pyproject -statsmodels -sympy -tweepy -typing_extensions -xgboost diff --git a/scheduling/multi_level_feedback_queue.py b/scheduling/multi_level_feedback_queue.py index abee3c85c5a5..58ba2afa0e67 100644 --- a/scheduling/multi_level_feedback_queue.py +++ b/scheduling/multi_level_feedback_queue.py @@ -255,7 +255,7 @@ def multi_level_feedback_queue(self) -> deque[Process]: # all queues except last one have round_robin algorithm for i in range(self.number_of_queues - 1): - finished, self.ready_queue = self.round_robin( + _finished, self.ready_queue = self.round_robin( self.ready_queue, self.time_slices[i] ) # the last queue has first_come_first_served algorithm diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 000000000000..92ebf3a7e8ba --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,27 @@ +Dealing with the onslaught of Hacktoberfest +* https://hacktoberfest.com + +Each year, October brings a swarm of new contributors participating in Hacktoberfest. This event has its pros and cons, but it presents a monumental workload for the few active maintainers of this repo. The maintainer workload is further impacted by a new version of CPython being released in the first week of each October. + +To help make our algorithms more valuable to visitors, our CONTRIBUTING.md file outlines several strict requirements, such as tests, type hints, descriptive names, functions, and/or classes. Maintainers reviewing pull requests should try to encourage improvements to meet these goals, but when the workload becomes overwhelming (esp. in October), pull requests that do not meet these goals should be closed. + +Below are a few [`gh`](https://cli.github.com) scripts that should close pull requests that do not match the definition of an acceptable algorithm as defined in CONTRIBUTING.md. I tend to run these scripts in the following order. + +* close_pull_requests_with_require_descriptive_names.sh +* close_pull_requests_with_require_tests.sh +* close_pull_requests_with_require_type_hints.sh +* close_pull_requests_with_failing_tests.sh +* close_pull_requests_with_awaiting_changes.sh +* find_git_conflicts.sh + +### Run on 14 Oct 2025: 107 of 541 (19.77%) pull requests closed. + +Script run | Open pull requests | Pull requests closed +--- | --- | --- +None | 541 | 0 +require_descriptive_names | 515 | 26 +require_tests | 498 | 17 +require_type_hints | 496 | 2 +failing_tests | 438 | ___58___ +awaiting_changes | 434 | 4 +git_conflicts | [ broken ] | 0 diff --git a/searches/binary_search.py b/searches/binary_search.py index 2e66b672d5b4..5125dc6bdb9a 100644 --- a/searches/binary_search.py +++ b/searches/binary_search.py @@ -243,6 +243,81 @@ def binary_search_std_lib(sorted_collection: list[int], item: int) -> int: return -1 +def binary_search_with_duplicates(sorted_collection: list[int], item: int) -> list[int]: + """Pure implementation of a binary search algorithm in Python that supports + duplicates. + + Resources used: + https://stackoverflow.com/questions/13197552/using-binary-search-with-sorted-array-with-duplicates + + The collection must be sorted in ascending order; otherwise the result will be + unpredictable. If the target appears multiple times, this function returns a + list of all indexes where the target occurs. If the target is not found, + this function returns an empty list. + + :param sorted_collection: some ascending sorted collection with comparable items + :param item: item value to search for + :return: a list of indexes where the item is found (empty list if not found) + + Examples: + >>> binary_search_with_duplicates([0, 5, 7, 10, 15], 0) + [0] + >>> binary_search_with_duplicates([0, 5, 7, 10, 15], 15) + [4] + >>> binary_search_with_duplicates([1, 2, 2, 2, 3], 2) + [1, 2, 3] + >>> binary_search_with_duplicates([1, 2, 2, 2, 3], 4) + [] + """ + if list(sorted_collection) != sorted(sorted_collection): + raise ValueError("sorted_collection must be sorted in ascending order") + + def lower_bound(sorted_collection: list[int], item: int) -> int: + """ + Returns the index of the first element greater than or equal to the item. + + :param sorted_collection: The sorted list to search. + :param item: The item to find the lower bound for. + :return: The index where the item can be inserted while maintaining order. + """ + left = 0 + right = len(sorted_collection) + while left < right: + midpoint = left + (right - left) // 2 + current_item = sorted_collection[midpoint] + if current_item < item: + left = midpoint + 1 + else: + right = midpoint + return left + + def upper_bound(sorted_collection: list[int], item: int) -> int: + """ + Returns the index of the first element strictly greater than the item. + + :param sorted_collection: The sorted list to search. + :param item: The item to find the upper bound for. + :return: The index where the item can be inserted after all existing instances. + """ + left = 0 + right = len(sorted_collection) + while left < right: + midpoint = left + (right - left) // 2 + current_item = sorted_collection[midpoint] + if current_item <= item: + left = midpoint + 1 + else: + right = midpoint + return left + + left = lower_bound(sorted_collection, item) + right = upper_bound(sorted_collection, item) + + if left == len(sorted_collection) or sorted_collection[left] != item: + return [] + return list(range(left, right)) + + def binary_search_by_recursion( sorted_collection: list[int], item: int, left: int = 0, right: int = -1 ) -> int: diff --git a/sorts/binary_insertion_sort.py b/sorts/binary_insertion_sort.py index 50653a99e7ce..b928316a849d 100644 --- a/sorts/binary_insertion_sort.py +++ b/sorts/binary_insertion_sort.py @@ -56,7 +56,7 @@ def binary_insertion_sort(collection: list) -> list: return collection -if __name__ == "__main": +if __name__ == "__main__": user_input = input("Enter numbers separated by a comma:\n").strip() try: unsorted = [int(item) for item in user_input.split(",")] diff --git a/sorts/comb_sort.py b/sorts/comb_sort.py index 3c8b1e99a454..94ad8f533328 100644 --- a/sorts/comb_sort.py +++ b/sorts/comb_sort.py @@ -5,8 +5,7 @@ Comb sort improves on bubble sort algorithm. In bubble sort, distance (or gap) between two compared elements is always one. Comb sort improvement is that gap can be much more than 1, in order to prevent slowing -down by small values -at the end of a list. +down by small values at the end of a list. More info on: https://en.wikipedia.org/wiki/Comb_sort diff --git a/strings/edit_distance.py b/strings/edit_distance.py index e842c8555c8e..77ed23037937 100644 --- a/strings/edit_distance.py +++ b/strings/edit_distance.py @@ -14,6 +14,20 @@ def edit_distance(source: str, target: str) -> int: >>> edit_distance("GATTIC", "GALTIC") 1 + >>> edit_distance("NUM3", "HUM2") + 2 + >>> edit_distance("cap", "CAP") + 3 + >>> edit_distance("Cat", "") + 3 + >>> edit_distance("cat", "cat") + 0 + >>> edit_distance("", "123456789") + 9 + >>> edit_distance("Be@uty", "Beautyyyy!") + 5 + >>> edit_distance("lstring", "lsstring") + 1 """ if len(source) == 0: return len(target) diff --git a/web_programming/covid_stats_via_xpath.py b/web_programming/covid_stats_via_xpath.py index 9c016ba414ea..88a248610441 100644 --- a/web_programming/covid_stats_via_xpath.py +++ b/web_programming/covid_stats_via_xpath.py @@ -1,7 +1,8 @@ """ -This is to show simple COVID19 info fetching from worldometers archive site using lxml -* The main motivation to use lxml in place of bs4 is that it is faster and therefore -more convenient to use in Python web projects (e.g. Django or Flask-based) +This script demonstrates fetching simple COVID-19 statistics from the +Worldometers archive site using lxml. lxml is chosen over BeautifulSoup +for its speed and convenience in Python web projects (such as Django or +Flask). """ # /// script @@ -25,15 +26,34 @@ class CovidData(NamedTuple): def covid_stats( - url: str = "https://web.archive.org/web/20250825095350/https://www.worldometers.info/coronavirus/", + url: str = ( + "https://web.archive.org/web/20250825095350/" + "https://www.worldometers.info/coronavirus/" + ), ) -> CovidData: xpath_str = '//div[@class = "maincounter-number"]/span/text()' - return CovidData( - *html.fromstring(httpx.get(url, timeout=10).content).xpath(xpath_str) + try: + response = httpx.get(url, timeout=10).raise_for_status() + except httpx.TimeoutException: + print( + "Request timed out. Please check your network connection " + "or try again later." + ) + return CovidData("N/A", "N/A", "N/A") + except httpx.HTTPStatusError as e: + print(f"HTTP error occurred: {e}") + return CovidData("N/A", "N/A", "N/A") + data = html.fromstring(response.content).xpath(xpath_str) + if len(data) != 3: + print("Unexpected data format. The page structure may have changed.") + data = "N/A", "N/A", "N/A" + return CovidData(*data) + + +if __name__ == "__main__": + fmt = ( + "Total COVID-19 cases in the world: {}\n" + "Total deaths due to COVID-19 in the world: {}\n" + "Total COVID-19 patients recovered in the world: {}" ) - - -fmt = """Total COVID-19 cases in the world: {} -Total deaths due to COVID-19 in the world: {} -Total COVID-19 patients recovered in the world: {}""" -print(fmt.format(*covid_stats())) + print(fmt.format(*covid_stats()))