From e7b11aac10ff91a0d33ce8954558bb13080b55af Mon Sep 17 00:00:00 2001 From: ranveersingh2718 <63253765+ranveersingh2718@users.noreply.github.com> Date: Sun, 14 Dec 2025 09:34:37 -0800 Subject: [PATCH] add md articles (#5126) * add kill-process.md article * add all-paths-from-source-lead-to-destination.md article * add web-crawler.md article * add number-of-islands-ii.md article --- ...l-paths-from-source-lead-to-destination.md | 222 +++++++++++ articles/kill-process.md | 349 +++++++++++++++++ articles/number-of-islands-ii.md | 360 ++++++++++++++++++ articles/web-crawler.md | 231 +++++++++++ 4 files changed, 1162 insertions(+) create mode 100644 articles/all-paths-from-source-lead-to-destination.md create mode 100644 articles/kill-process.md create mode 100644 articles/number-of-islands-ii.md create mode 100644 articles/web-crawler.md diff --git a/articles/all-paths-from-source-lead-to-destination.md b/articles/all-paths-from-source-lead-to-destination.md new file mode 100644 index 000000000..b69df6808 --- /dev/null +++ b/articles/all-paths-from-source-lead-to-destination.md @@ -0,0 +1,222 @@ +## 1. Depth First Search + +::tabs-start + +```python +class Solution: + + # We don't use the state WHITE as such anywhere. Instead, the "null" value in the states array below is a substitute for WHITE. + GRAY = 1 + BLACK = 2 + + def leadsToDestination(self, n: int, edges: List[List[int]], source: int, destination: int) -> bool: + graph = self.buildDigraph(n, edges) + return self.leadsToDest(graph, source, destination, [None] * n) + + def leadsToDest(self, graph, node, dest, states): + + # If the state is GRAY, this is a backward edge and hence, it creates a Loop. + if states[node] != None: + return states[node] == Solution.BLACK + + # If this is a leaf node, it should be equal to the destination. + if len(graph[node]) == 0: + return node == dest + + # Now, we are processing this node. So we mark it as GRAY. + states[node] = Solution.GRAY + + for next_node in graph[node]: + + # If we get a `false` from any recursive call on the neighbors, we short circuit and return from there. + if not self.leadsToDest(graph, next_node, dest, states): + return False + + # Recursive processing done for the node. We mark it BLACK. + states[node] = Solution.BLACK + return True + + def buildDigraph(self, n, edges): + graph = [[] for _ in range(n)] + + for edge in edges: + graph[edge[0]].append(edge[1]) + + return graph +``` + +```java +class Solution { + + // We don't use the state WHITE as such anywhere. Instead, the "null" value in the states array below is a substitute for WHITE. + enum Color { GRAY, BLACK }; + + public boolean leadsToDestination(int n, int[][] edges, int source, int destination) { + + List[] graph = buildDigraph(n, edges); + return leadsToDest(graph, source, destination, new Color[n]); + } + + private boolean leadsToDest(List[] graph, int node, int dest, Color[] states) { + + // If the state is GRAY, this is a backward edge and hence, it creates a loop. + if (states[node] != null) { + return states[node] == Color.BLACK; + } + + // If this is a leaf node, it should be equal to the destination. + if (graph[node].isEmpty()) { + return node == dest; + } + + // Now, we are processing this node. So we mark it as GRAY + states[node] = Color.GRAY; + + for (int next : graph[node]) { + + // If we get a `false` from any recursive call on the neighbors, we short circuit and return from there. + if (!leadsToDest(graph, next, dest, states)) { + return false; + } + } + + // Recursive processing done for the node. We mark it BLACK + states[node] = Color.BLACK; + return true; + } + + private List[] buildDigraph(int n, int[][] edges) { + List[] graph = new List[n]; + for (int i = 0; i < n; i++) { + graph[i] = new ArrayList<>(); + } + + for (int[] edge : edges) { + graph[edge[0]].add(edge[1]); + } + + return graph; + } +} +``` + +```cpp +class Solution { +public: + static const int GRAY = 1; + static const int BLACK = 2; + + bool leadsToDestination(int n, vector>& edges, int source, int destination) { + vector> graph = buildDigraph(n, edges); + vector states(n, 0); + return leadsToDest(graph, source, destination, states); + } + +private: + bool leadsToDest(vector>& graph, int node, int dest, vector& states) { + if (states[node] != 0) { + return states[node] == BLACK; + } + if (graph[node].size() == 0) { + return node == dest; + } + states[node] = GRAY; + for (int next_node : graph[node]) { + if (!leadsToDest(graph, next_node, dest, states)) { + return false; + } + } + states[node] = BLACK; + return true; + } + + vector> buildDigraph(int n, vector>& edges) { + vector> graph(n); + for (auto& edge : edges) { + graph[edge[0]].push_back(edge[1]); + } + return graph; + } +}; +``` + +```javascript +class Solution { + static GRAY = 1; + static BLACK = 2; + + /** + * @param {number} n + * @param {number[][]} edges + * @param {number} source + * @param {number} destination + * @return {boolean} + */ + leadsToDestination(n, edges, source, destination) { + const graph = this.buildDigraph(n, edges); + const states = new Array(n).fill(null); + return this.leadsToDest(graph, source, destination, states); + } + + /** + * @param {number[][]} graph + * @param {number} node + * @param {number} dest + * @param {(number|null)[]} states + * @return {boolean} + */ + leadsToDest(graph, node, dest, states) { + if (states[node] !== null) { + return states[node] === Solution.BLACK; + } + if (graph[node].length === 0) { + return node === dest; + } + states[node] = Solution.GRAY; + for (const next_node of graph[node]) { + if (!this.leadsToDest(graph, next_node, dest, states)) { + return false; + } + } + states[node] = Solution.BLACK; + return true; + } + + /** + * @param {number} n + * @param {number[][]} edges + * @return {number[][]} + */ + buildDigraph(n, edges) { + const graph = Array.from({ length: n }, () => []); + for (const edge of edges) { + graph[edge[0]].push(edge[1]); + } + return graph; + } +} +``` + +::tabs-end + +### Time & Space Complexity + +- Time complexity: + - Typically for an entire DFS over an input graph, it takes $O(V + E)$ where $V$ represents the number of vertices in the graph and likewise, $E$ represents the number of edges in the graph. In the worst case $E$ can be $O(V^2)$ in case each vertex is connected to every other vertex in the graph. However even in the worst case, we will end up discovering a cycle very early on and prune the recursion tree. If we were to traverse the entire graph, then the complexity would be $O(V^2)$ as the $O(E)$ part would dominate. However, due to pruning and backtracking in case of cycle detection, we end up with an overall time complexity of $O(V)$. + +- Space complexity: $O(V + E)$ + - Where $O(E)$ is occupied by the adjacency list and $O(V)$ is occupied by the recursion stack and the color states. + +> Where $V$ represents the number of vertices in the graph and $E$ represents the number of edges in the graph. + +--- + +### Why not Breadth-First Search? + +From this [Stack Overflow](https://stackoverflow.com/questions/2869647/why-dfs-and-not-bfs-for-finding-cycle-in-graphs) answer: + +> A BFS could be reasonable if the graph is undirected (be my guest at showing an efficient algorithm using BFS that would report the cycles in a directed graph!), where each cross edge defines a cycle (edge going from a node to an already visited node). If the cross edge is `{v1, v2}`, and the root (in the BFS tree) that contains those nodes is `r`, then the cycle is `r ~ v1 - v2 ~ r` (~ is a path, - a single edge), which can be reported almost as easily as in DFS. +> +> The only reason to use a BFS would be if you know your (undirected) graph is going to have long paths and small path cover (in other words, deep and narrow). In that case, BFS would require proportionally less memory for its queue than DFS' stack (both still linear of course). +> +> In all other cases, DFS is clearly the winner. diff --git a/articles/kill-process.md b/articles/kill-process.md new file mode 100644 index 000000000..ddeffc7b7 --- /dev/null +++ b/articles/kill-process.md @@ -0,0 +1,349 @@ +## 1. Depth First Search + +::tabs-start + +```java +class Solution { + public List < Integer > killProcess(List < Integer > pid, List < Integer > ppid, int kill) { + List < Integer > l = new ArrayList < > (); + + if (kill == 0) + return l; + + l.add(kill); + + for (int i = 0; i < ppid.size(); i++) + if (ppid.get(i) == kill) + l.addAll(killProcess(pid, ppid, pid.get(i))); + + return l; + } +} +``` + +::tabs-end + +### Time & Space Complexity + +- Time complexity: $O(n^2)$ +- Space complexity: $O(n)$ + +> Where $n$ is the length of the `pid` and `ppid`. + +--- + +## 2. Tree Simulation + +::tabs-start + +```java +class Solution { + + class Node { + int val; + List < Node > children = new ArrayList < > (); + } + + public List < Integer > killProcess(List < Integer > pid, List < Integer > ppid, int kill) { + HashMap < Integer, Node > map = new HashMap < > (); + for (int id: pid) { + Node node = new Node(); + node.val = id; + map.put(id, node); + } + for (int i = 0; i < ppid.size(); i++) { + if (ppid.get(i) > 0) { + Node par = map.get(ppid.get(i)); + par.children.add(map.get(pid.get(i))); + } + } + List < Integer > l = new ArrayList < > (); + l.add(kill); + getAllChildren(map.get(kill), l); + return l; + } + + public void getAllChildren(Node pn, List < Integer > l) { + for (Node n: pn.children) { + l.add(n.val); + getAllChildren(n, l); + } + } +} +``` + +::tabs-end + +### Time & Space Complexity + +- Time complexity: $O(n)$ +- Space complexity: $O(n)$ + +> Where $n$ is the length of the `pid` and `ppid`. + +--- + +## 3. HashMap + Depth First Search + +::tabs-start + +```python +class Solution: + def killProcess(self, pid: List[int], ppid: List[int], kill: int) -> List[int]: + map_dict = {} + for i in range(len(ppid)): + if ppid[i] > 0: + if ppid[i] not in map_dict: + map_dict[ppid[i]] = [] + map_dict[ppid[i]].append(pid[i]) + + result = [kill] + self.getAllChildren(map_dict, result, kill) + return result + + def getAllChildren(self, map_dict, result, kill): + if kill in map_dict: + for child_id in map_dict[kill]: + result.append(child_id) + self.getAllChildren(map_dict, result, child_id) +``` + +```java +class Solution { + public List killProcess(List pid, List ppid, int kill) { + HashMap > map = new HashMap <> (); + + for (int i = 0; i < ppid.size(); i++) { + if (ppid.get(i) > 0) { + List l = map.getOrDefault(ppid.get(i), new ArrayList ()); + l.add(pid.get(i)); + map.put(ppid.get(i), l); + } + } + + List l = new ArrayList<> (); + l.add(kill); + getAllChildren(map, l, kill); + return l; + } + + public void getAllChildren(HashMap > map, List l, int kill) { + if (map.containsKey(kill)) + for (int id: map.get(kill)) { + l.add(id); + getAllChildren(map, l, id); + } + } +} +``` + +```cpp +class Solution { +public: + vector killProcess(vector& pid, vector& ppid, int kill) { + unordered_map> map; + for (int i = 0; i < ppid.size(); i++) { + if (ppid[i] > 0) { + map[ppid[i]].push_back(pid[i]); + } + } + + vector result; + result.push_back(kill); + getAllChildren(map, result, kill); + return result; + } + +private: + void getAllChildren(unordered_map>& map, vector& result, int kill) { + if (map.find(kill) != map.end()) { + for (int child_id : map[kill]) { + result.push_back(child_id); + getAllChildren(map, result, child_id); + } + } + } +}; +``` + +```javascript +class Solution { + /** + * @param {number[]} pid + * @param {number[]} ppid + * @param {number} kill + * @return {number[]} + */ + killProcess(pid, ppid, kill) { + const map = new Map(); + for (let i = 0; i < ppid.length; i++) { + if (ppid[i] > 0) { + if (!map.has(ppid[i])) { + map.set(ppid[i], []); + } + map.get(ppid[i]).push(pid[i]); + } + } + + const result = [kill]; + this.getAllChildren(map, result, kill); + return result; + } + + /** + * @param {Map} map + * @param {number[]} result + * @param {number} kill + * @return {void} + */ + getAllChildren(map, result, kill) { + if (map.has(kill)) { + for (const childId of map.get(kill)) { + result.push(childId); + this.getAllChildren(map, result, childId); + } + } + } +} +``` + +::tabs-end + +### Time & Space Complexity + +- Time complexity: $O(n)$ +- Space complexity: $O(n)$ + +> Where $n$ is the length of the `pid` and `ppid`. + +--- + +## 4. HashMap + Breadth First Search + +::tabs-start + +```python +class Solution: + def killProcess(self, pid: List[int], ppid: List[int], kill: int) -> List[int]: + map_dict = {} + for i in range(len(ppid)): + if ppid[i] > 0: + if ppid[i] not in map_dict: + map_dict[ppid[i]] = [] + map_dict[ppid[i]].append(pid[i]) + + queue = deque([kill]) + result = [] + while queue: + r = queue.popleft() + result.append(r) + if r in map_dict: + for child_id in map_dict[r]: + queue.append(child_id) + + return result +``` + +```java +class Solution { + + public List < Integer > killProcess(List < Integer > pid, List < Integer > ppid, int kill) { + HashMap < Integer, List < Integer >> map = new HashMap < > (); + + for (int i = 0; i < ppid.size(); i++) { + if (ppid.get(i) > 0) { + List < Integer > l = map.getOrDefault(ppid.get(i), new ArrayList < Integer > ()); + l.add(pid.get(i)); + map.put(ppid.get(i), l); + } + } + + Queue < Integer > queue = new LinkedList < > (); + List < Integer > l = new ArrayList < > (); + queue.add(kill); + while (!queue.isEmpty()) { + int r = queue.remove(); + l.add(r); + if (map.containsKey(r)) + for (int id: map.get(r)) + queue.add(id); + } + return l; + } +} +``` + +```cpp +class Solution { +public: + vector killProcess(vector& pid, vector& ppid, int kill) { + unordered_map> map; + for (int i = 0; i < ppid.size(); i++) { + if (ppid[i] > 0) { + map[ppid[i]].push_back(pid[i]); + } + } + + queue q; + vector result; + q.push(kill); + while (!q.empty()) { + int r = q.front(); + q.pop(); + result.push_back(r); + if (map.find(r) != map.end()) { + for (int child_id : map[r]) { + q.push(child_id); + } + } + } + + return result; + } +}; +``` + +```javascript +class Solution { + /** + * @param {number[]} pid + * @param {number[]} ppid + * @param {number} kill + * @return {number[]} + */ + killProcess(pid, ppid, kill) { + const map = new Map(); + for (let i = 0; i < ppid.length; i++) { + if (ppid[i] > 0) { + if (!map.has(ppid[i])) { + map.set(ppid[i], []); + } + map.get(ppid[i]).push(pid[i]); + } + } + + const queue = [kill]; + const result = []; + while (queue.length > 0) { + const r = queue.shift(); + result.push(r); + if (map.has(r)) { + for (const childId of map.get(r)) { + queue.push(childId); + } + } + } + + return result; + } +} +``` + +::tabs-end + +### Time & Space Complexity + +- Time complexity: $O(n)$ +- Space complexity: $O(n)$ + +> Where $n$ is the length of the `pid` and `ppid`. diff --git a/articles/number-of-islands-ii.md b/articles/number-of-islands-ii.md new file mode 100644 index 000000000..d9b530026 --- /dev/null +++ b/articles/number-of-islands-ii.md @@ -0,0 +1,360 @@ +## 1. Union Find + +::tabs-start + +```python +class UnionFind: + def __init__(self, size): + self.parent = [-1] * size + self.rank = [0] * size + self.count = 0 + + def add_land(self, x): + if self.parent[x] >= 0: + return + self.parent[x] = x + self.count += 1 + + def is_land(self, x): + if self.parent[x] >= 0: + return True + else: + return False + + def number_of_islands(self): + return self.count + + def find(self, x): + if self.parent[x] != x: + self.parent[x] = self.find(self.parent[x]) + return self.parent[x] + + def union(self, x, y): + xset = self.find(x) + yset = self.find(y) + + if xset == yset: + return + elif self.rank[xset] < self.rank[yset]: + self.parent[xset] = yset + elif self.rank[xset] > self.rank[yset]: + self.parent[yset] = xset + else: + self.parent[yset] = xset + self.rank[xset] += 1 + + self.count -= 1 + + +class Solution: + def numIslands2(self, m: int, n: int, positions: List[List[int]]) -> List[int]: + x = [-1, 1, 0, 0] + y = [0, 0, -1, 1] + dsu = UnionFind(m * n) + answer = [] + + for position in positions: + land_position = position[0] * n + position[1] + dsu.add_land(land_position) + + for i in range(4): + neighbor_x = position[0] + x[i] + neighbor_y = position[1] + y[i] + neighbor_position = neighbor_x * n + neighbor_y + + # If neighborX and neighborY correspond to a point in the grid and there is a + # land at that point, then merge it with the current land. + if neighbor_x >= 0 and neighbor_x < m and neighbor_y >= 0 and neighbor_y < n and dsu.is_land(neighbor_position): + dsu.union(land_position, neighbor_position) + + answer.append(dsu.number_of_islands()) + + return answer +``` + +```java +class UnionFind { + int[] parent; + int[] rank; + int count; + + public UnionFind(int size) { + parent = new int[size]; + rank = new int[size]; + for (int i = 0; i < size; i++) + parent[i] = -1; + count = 0; + } + + public void addLand(int x) { + if (parent[x] >= 0) + return; + parent[x] = x; + count++; + } + + public boolean isLand(int x) { + if (parent[x] >= 0) { + return true; + } else { + return false; + } + } + + int numberOfIslands() { + return count; + } + + public int find(int x) { + if (parent[x] != x) + parent[x] = find(parent[x]); + return parent[x]; + } + + public void union(int x, int y) { + int xset = find(x), yset = find(y); + if (xset == yset) { + return; + } else if (rank[xset] < rank[yset]) { + parent[xset] = yset; + } else if (rank[xset] > rank[yset]) { + parent[yset] = xset; + } else { + parent[yset] = xset; + rank[xset]++; + } + count--; + } +} + +class Solution { + public List numIslands2(int m, int n, int[][] positions) { + int x[] = { -1, 1, 0, 0 }; + int y[] = { 0, 0, -1, 1 }; + UnionFind dsu = new UnionFind(m * n); + List answer = new ArrayList<>(); + + for (int[] position : positions) { + int landPosition = position[0] * n + position[1]; + dsu.addLand(landPosition); + + for (int i = 0; i < 4; i++) { + int neighborX = position[0] + x[i]; + int neighborY = position[1] + y[i]; + int neighborPosition = neighborX * n + neighborY; + // If neighborX and neighborY correspond to a point in the grid and there is a + // land at that point, then merge it with the current land. + if (neighborX >= 0 && neighborX < m && neighborY >= 0 && neighborY < n && + dsu.isLand(neighborPosition)) { + dsu.union(landPosition, neighborPosition); + } + } + + answer.add(dsu.numberOfIslands()); + } + return answer; + } +} +``` + +```cpp +class UnionFind { +private: + vector parent, rank; + int count; + +public: + UnionFind(int size) { + parent.resize(size, -1); + rank.resize(size, 0); + count = 0; + } + + void addLand(int x) { + if (parent[x] >= 0) return; + parent[x] = x; + count++; + } + + bool isLand(int x) { + if (parent[x] >= 0) { + return true; + } else { + return false; + } + } + + int numberOfIslands() { return count; } + + int find(int x) { + if (parent[x] != x) { + parent[x] = find(parent[x]); + } + return parent[x]; + } + + void union_set(int x, int y) { + int xset = find(x), yset = find(y); + if (xset == yset) { + return; + } else if (rank[xset] < rank[yset]) { + parent[xset] = yset; + } else if (rank[xset] > rank[yset]) { + parent[yset] = xset; + } else { + parent[yset] = xset; + rank[xset]++; + } + count--; + } +}; + +class Solution { +public: + vector numIslands2(int m, int n, vector>& positions) { + int x[] = {-1, 1, 0, 0}; + int y[] = {0, 0, -1, 1}; + UnionFind dsu(m * n); + vector answer; + + for (auto& position : positions) { + int landPosition = position[0] * n + position[1]; + dsu.addLand(landPosition); + + for (int i = 0; i < 4; i++) { + int neighborX = position[0] + x[i]; + int neighborY = position[1] + y[i]; + int neighborPosition = neighborX * n + neighborY; + // If neighborX and neighborY correspond to a point in the grid and there is a land + // at that point, then merge it with the current land. + if (neighborX >= 0 && neighborX < m && neighborY >= 0 && neighborY < n && + dsu.isLand(neighborPosition)) { + dsu.union_set(landPosition, neighborPosition); + } + } + answer.push_back(dsu.numberOfIslands()); + } + return answer; + } +}; +``` + +```javascript +class UnionFind { + /** + * @param {number} size + */ + constructor(size) { + this.parent = new Array(size).fill(-1); + this.rank = new Array(size).fill(0); + this.count = 0; + } + + /** + * @param {number} x + * @return {void} + */ + addLand(x) { + if (this.parent[x] >= 0) return; + this.parent[x] = x; + this.count++; + } + + /** + * @param {number} x + * @return {boolean} + */ + isLand(x) { + if (this.parent[x] >= 0) { + return true; + } else { + return false; + } + } + + /** + * @return {number} + */ + numberOfIslands() { + return this.count; + } + + /** + * @param {number} x + * @return {number} + */ + find(x) { + if (this.parent[x] !== x) + this.parent[x] = this.find(this.parent[x]); + return this.parent[x]; + } + + /** + * @param {number} x + * @param {number} y + * @return {void} + */ + union(x, y) { + let xset = this.find(x); + let yset = this.find(y); + + if (xset === yset) { + return; + } else if (this.rank[xset] < this.rank[yset]) { + this.parent[xset] = yset; + } else if (this.rank[xset] > this.rank[yset]) { + this.parent[yset] = xset; + } else { + this.parent[yset] = xset; + this.rank[xset]++; + } + + this.count--; + } +} + +class Solution { + /** + * @param {number} m + * @param {number} n + * @param {number[][]} positions + * @return {number[]} + */ + numIslands2(m, n, positions) { + let x = [-1, 1, 0, 0]; + let y = [0, 0, -1, 1]; + let dsu = new UnionFind(m * n); + let answer = []; + + for (let position of positions) { + let landPosition = position[0] * n + position[1]; + dsu.addLand(landPosition); + + for (let i = 0; i < 4; i++) { + let neighborX = position[0] + x[i]; + let neighborY = position[1] + y[i]; + let neighborPosition = neighborX * n + neighborY; + + // If neighborX and neighborY correspond to a point in the grid and there is a + // land at that point, then merge it with the current land. + if (neighborX >= 0 && neighborX < m && neighborY >= 0 && neighborY < n && dsu.isLand(neighborPosition)) { + dsu.union(landPosition, neighborPosition); + } + } + + answer.push(dsu.numberOfIslands()); + } + + return answer; + } +} +``` + +::tabs-end + +### Time & Space Complexity + +- Time complexity: $O(m \cdot n + l)$ +- Space complexity: $O(m \cdot n)$ + +> Where $m$ and $n$ are the number of rows and columns in the given grid, and $l$ is the size of `positions`. diff --git a/articles/web-crawler.md b/articles/web-crawler.md new file mode 100644 index 000000000..cc59bf0b2 --- /dev/null +++ b/articles/web-crawler.md @@ -0,0 +1,231 @@ +## 1. Depth-first search + +::tabs-start + +```python +class Solution: + def crawl(self, startUrl: str, htmlParser: 'HtmlParser') -> List[str]: + def get_hostname(url): + # split url by slashes + # for instance, "http://example.org/foo/bar" will be split into + # "http:", "", "example.org", "foo", "bar" + # the hostname is the 2-nd (0-indexed) element + return url.split('/')[2] + + start_hostname = get_hostname(startUrl) + visited = set() + + def dfs(url, htmlParser): + visited.add(url) + for next_url in htmlParser.getUrls(url): + if get_hostname(next_url) == start_hostname and next_url not in visited: + dfs(next_url, htmlParser) + + dfs(startUrl, htmlParser) + return visited +``` + +```java +class Solution { + + private String startHostname; + private HashSet visited = new HashSet(); + + private String getHostname(String url) { + // split url by slashes + // for instance, "http://example.org/foo/bar" will be split into + // "http:", "", "example.org", "foo", "bar" + // the hostname is the 2-nd (0-indexed) element + return url.split("/")[2]; + } + + private void dfs(String url, HtmlParser htmlParser) { + visited.add(url); + for (String nextUrl : htmlParser.getUrls(url)) { + if (getHostname(nextUrl).equals(startHostname) && !visited.contains(nextUrl)) { + dfs(nextUrl, htmlParser); + } + } + } + + public List crawl(String startUrl, HtmlParser htmlParser) { + startHostname = getHostname(startUrl); + dfs(startUrl, htmlParser); + return new ArrayList<>(visited); + } +} +``` + +```cpp +class Solution { +public: + vector crawl(string startUrl, HtmlParser htmlParser) { + function getHostname = [](string url) -> string { + // find the next slash in the url after "http://" + // that is after the 7-th position inclusively + // if there is no such slash, pos will be equal to url.size() + int pos = min(url.size(), url.find('/', 7)); + // return the substring that starts after "http://" and ends + // before the next slash of at the end of the string + return url.substr(7, pos - 7); + }; + + string startHostname = getHostname(startUrl); + unordered_set visited; + + function dfs = [&](string url) -> void { + visited.insert(url); + for (string nextUrl : htmlParser.getUrls(url)) { + if (getHostname(nextUrl) == startHostname && !visited.count(nextUrl)) { + dfs(nextUrl); + } + } + }; + + dfs(startUrl); + return vector(visited.begin(), visited.end()); + } +}; +``` + +```javascript +class Solution { + /** + * @param {string} startUrl + * @param {HtmlParser} htmlParser + * @return {string[]} + */ + crawl(startUrl, htmlParser) { + function getHostname(url) { + // split url by slashes + // for instance, "http://example.org/foo/bar" will be split into + // "http:", "", "example.org", "foo", "bar" + // the hostname is the 2nd (0-indexed) element + return url.split('/')[2]; + } + + const startHostname = getHostname(startUrl); + const visited = new Set(); + + function dfs(url) { + visited.add(url); + + for (const nextUrl of htmlParser.getUrls(url)) { + if (getHostname(nextUrl) === startHostname && !visited.has(nextUrl)) { + dfs(nextUrl); + } + } + } + + dfs(startUrl); + + return Array.from(visited); + } +} +``` + +::tabs-end + +### Time & Space Complexity + +- Time complexity: $O(m \cdot l)$ +- Space complexity: $O(m \cdot l)$ + +> Where $m$ is the number of edges in the graph, and $l$ is the maximum length of a URL (`urls[i].length`). + +--- + +## 2. Breadth-first search + +::tabs-start + +```python +class Solution: + def crawl(self, startUrl: str, htmlParser: 'HtmlParser') -> List[str]: + def get_hostname(url): + # split url by slashes + # for instance, "http://example.org/foo/bar" will be split into + # "http:", "", "example.org", "foo", "bar" + # the hostname is the 2-nd (0-indexed) element + return url.split('/')[2] + + start_hostname = get_hostname(startUrl) + q = collections.deque([startUrl]) + visited = set([startUrl]) + while q: + url = q.popleft() + for next_url in htmlParser.getUrls(url): + if get_hostname(next_url) == start_hostname and next_url not in visited: + q.append(next_url) + visited.add(next_url) + return visited +``` + +```java +class Solution { + private String getHostname(String url) { + // split url by slashes + // for instance, "http://example.org/foo/bar" will be split into + // "http:", "", "example.org", "foo", "bar" + // the hostname is the 2-nd (0-indexed) element + return url.split("/")[2]; + } + + public List crawl(String startUrl, HtmlParser htmlParser) { + String startHostname = getHostname(startUrl); + Queue q = new LinkedList(Arrays.asList(startUrl)); + HashSet visited = new HashSet(Arrays.asList(startUrl)); + while (!q.isEmpty()) { + String url = q.remove(); + for (String nextUrl : htmlParser.getUrls(url)) { + if (getHostname(nextUrl).equals(startHostname) && !visited.contains(nextUrl)) { + q.add(nextUrl); + visited.add(nextUrl); + } + } + } + return new ArrayList<>(visited); + } +} +``` + +```cpp +class Solution { +public: + vector crawl(string startUrl, HtmlParser htmlParser) { + function getHostname = [](string url) -> string { + // find the next slash in the url after "http://" + // that is after the 7-th position inclusively + // if there is no such slash, pos will be equal to url.size() + int pos = min(url.size(), url.find('/', 7)); + // return the substring that starts after "http://" and ends + // before the next slash or at the end of the string + return url.substr(7, pos - 7); + }; + + queue q; + q.push(startUrl); + unordered_set visited{startUrl}; + string startHostname = getHostname(startUrl); + while (!q.empty()) { + string url = q.front(); + q.pop(); + for (string nextUrl : htmlParser.getUrls(url)) { + if (getHostname(nextUrl) == startHostname && !visited.count(nextUrl)) { + q.push(nextUrl); + visited.insert(nextUrl); + } + } + } + return vector(visited.begin(), visited.end()); + } +}; +``` +::tabs-end + +### Time & Space Complexity + +- Time complexity: $O(m \cdot l)$ +- Space complexity: $O(n \cdot l)$ + +> Where $m$ is the number of edges in the graph, $l$ is the maximum length of a URL (`urls[i].length`), and $n$ is the total number of URLs (`urls.length`).