|
1 | 1 | """
|
2 | 2 |
|
3 |
| -Copyright (c) 2020-2021 Ayoub Malek and Vanessa Sochat |
| 3 | +Copyright (c) 2020-2022 Ayoub Malek and Vanessa Sochat |
4 | 4 |
|
5 | 5 | This source code is licensed under the terms of the MIT license.
|
6 | 6 | For a copy, see <https://opensource.org/licenses/MIT>.
|
|
12 | 12 | import re
|
13 | 13 | import sys
|
14 | 14 | from urlchecker.core import fileproc
|
| 15 | +from urlchecker.core.worker import Workers |
15 | 16 | from urlchecker.core.urlproc import UrlCheckResult
|
16 | 17 |
|
17 | 18 |
|
@@ -41,6 +42,8 @@ def __init__(
|
41 | 42 | """
|
42 | 43 | # Initiate results object, and checks lookup (holds UrlCheck) for each file
|
43 | 44 | self.results = {"passed": set(), "failed": set(), "excluded": set()}
|
| 45 | + |
| 46 | + # Results organized by filename |
44 | 47 | self.checks = {}
|
45 | 48 |
|
46 | 49 | # Save run parameters
|
@@ -123,12 +126,18 @@ def save_results(self, file_path, sep=",", header=None, relative_paths=True):
|
123 | 126 | else:
|
124 | 127 | file_name = os.path.relpath(file_name)
|
125 | 128 |
|
126 |
| - [writer.writerow([url, "failed", file_name]) for url in result.failed] |
| 129 | + [ |
| 130 | + writer.writerow([url, "failed", file_name]) |
| 131 | + for url in result["failed"] |
| 132 | + ] |
127 | 133 | [
|
128 | 134 | writer.writerow([url, "excluded", file_name])
|
129 |
| - for url in result.excluded |
| 135 | + for url in result["excluded"] |
| 136 | + ] |
| 137 | + [ |
| 138 | + writer.writerow([url, "passed", file_name]) |
| 139 | + for url in result["passed"] |
130 | 140 | ]
|
131 |
| - [writer.writerow([url, "passed", file_name]) for url in result.passed] |
132 | 141 |
|
133 | 142 | return file_path
|
134 | 143 |
|
@@ -161,27 +170,56 @@ def run(
|
161 | 170 | exclude_urls = exclude_urls or []
|
162 | 171 | exclude_patterns = exclude_patterns or []
|
163 | 172 |
|
164 |
| - # loop through files files |
165 |
| - for file_name in file_paths: |
166 |
| - |
167 |
| - # Instantiate a checker to extract urls |
168 |
| - checker = UrlCheckResult( |
169 |
| - file_name=file_name, |
170 |
| - exclude_patterns=exclude_patterns, |
171 |
| - exclude_urls=exclude_urls, |
172 |
| - print_all=self.print_all, |
173 |
| - ) |
174 |
| - |
175 |
| - # Check the urls |
176 |
| - checker.check_urls(retry_count=retry_count, timeout=timeout) |
| 173 | + # Run with multiprocessing |
| 174 | + tasks = {} |
| 175 | + funcs = {} |
| 176 | + workers = Workers() |
177 | 177 |
|
178 |
| - # Update flattened results |
179 |
| - self.results["failed"].update(checker.failed) |
180 |
| - self.results["passed"].update(checker.passed) |
181 |
| - self.results["excluded"].update(checker.excluded) |
| 178 | + # loop through files |
| 179 | + for file_name in file_paths: |
182 | 180 |
|
183 |
| - # Save the checker in the lookup |
184 |
| - self.checks[file_name] = checker |
| 181 | + # Export parameters and functions, use the same check task for all |
| 182 | + tasks[file_name] = { |
| 183 | + "file_name": file_name, |
| 184 | + "exclude_patterns": exclude_patterns, |
| 185 | + "exclude_urls": exclude_urls, |
| 186 | + "print_all": self.print_all, |
| 187 | + "retry_count": retry_count, |
| 188 | + "timeout": timeout, |
| 189 | + } |
| 190 | + funcs[file_name] = check_task |
| 191 | + |
| 192 | + results = workers.run(funcs, tasks) |
| 193 | + for file_name, result in results.items(): |
| 194 | + self.checks[file_name] = result |
| 195 | + self.results["failed"].update(result["failed"]) |
| 196 | + self.results["passed"].update(result["passed"]) |
| 197 | + self.results["excluded"].update(result["excluded"]) |
185 | 198 |
|
186 | 199 | # A flattened dict of passed and failed
|
187 | 200 | return self.results
|
| 201 | + |
| 202 | + |
| 203 | +def check_task(*args, **kwargs): |
| 204 | + """ |
| 205 | + A checking task, the default we use |
| 206 | + """ |
| 207 | + # Instantiate a checker to extract urls |
| 208 | + checker = UrlCheckResult( |
| 209 | + file_name=kwargs["file_name"], |
| 210 | + exclude_patterns=kwargs.get("exclude_patterns", []), |
| 211 | + exclude_urls=kwargs.get("exclude_urls", []), |
| 212 | + print_all=kwargs.get("print_all", True), |
| 213 | + ) |
| 214 | + |
| 215 | + # Check the urls |
| 216 | + checker.check_urls( |
| 217 | + retry_count=kwargs.get("retry_count", 2), timeout=kwargs.get("timeout", 5) |
| 218 | + ) |
| 219 | + |
| 220 | + # Update flattened results |
| 221 | + return { |
| 222 | + "failed": checker.failed, |
| 223 | + "passed": checker.passed, |
| 224 | + "excluded": checker.excluded, |
| 225 | + } |
0 commit comments