Skip to content

Commit

Permalink
Fix share PDF download when redirect shows captcha (#868)
Browse files Browse the repository at this point in the history
* Fix share PDF download when redirect shows captcha

* Perform redirect and then check for captcha case before showing web view

* Simplify redirect and captcha check

* Use Result in RedirectWebViewCompletion

* Use a struct for RedirectWebViewHandler successful result
  • Loading branch information
mvasilak authored Mar 8, 2024
1 parent 5709d20 commit 5eca99c
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 92 deletions.
152 changes: 82 additions & 70 deletions ZShare/Controllers/RedirectWebViewHandler.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,25 @@ import WebKit
import CocoaLumberjackSwift
import RxSwift

typealias RedirectWebViewCompletion = (URL?, String?, String?, String?) -> Void
typealias RedirectWebViewCompletion = RedirectWebViewHandler.Completion

final class RedirectWebViewHandler: NSObject {
typealias Completion = (Result<Redirect, Error>) -> Void

struct Redirect {
let url: URL
let cookies: String?
let userAgent: String?
let referrer: String?
}

enum Error: Swift.Error {
case webViewNil
case invalidURL
case extractionFailed
case timeout
}

private let initialUrl: URL
private let timeout: RxTimeInterval
private let timerScheduler: SerialDispatchQueueScheduler
Expand All @@ -24,86 +40,31 @@ final class RedirectWebViewHandler: NSObject {
private var disposeBag: DisposeBag?

init(url: URL, timeoutPerRedirect timeout: RxTimeInterval, webView: WKWebView) {
self.initialUrl = url
initialUrl = url
self.timeout = timeout
self.webView = webView
self.timerScheduler = SerialDispatchQueueScheduler(internalSerialQueueName: "org.zotero.RedirectWebViewHandler.TimerScheduler")
timerScheduler = SerialDispatchQueueScheduler(internalSerialQueueName: "org.zotero.RedirectWebViewHandler.TimerScheduler")

super.init()

webView.navigationDelegate = self
}

func getPdfUrl(completion: @escaping RedirectWebViewCompletion) {
guard let webView = self.webView else {
completion(nil, nil, nil, nil)
guard let webView else {
completion(.failure(.webViewNil))
return
}

self.completionHandler = completion
webView.load(URLRequest(url: self.initialUrl))
}

private func startTimer() {
let disposeBag = DisposeBag()
self.disposeBag = disposeBag

Single<Int>.timer(self.timeout, scheduler: self.timerScheduler)
.observe(on: MainScheduler.instance)
.subscribe(onSuccess: { [weak self] _ in
guard let self = self else { return }

DDLogInfo("RedirectWebViewHandler: redirection timed out")

self.webView?.stopLoading()
if let completion = self.completionHandler {
completion(nil, nil, nil, nil)
self.completionHandler = nil
}
})
.disposed(by: disposeBag)
}

private func extractData(from webView: WKWebView, completion: @escaping (String?, String?, String?) -> Void) {
guard let url = Bundle.main.url(forResource: "webview_extraction", withExtension: "js"),
let script = try? String(contentsOf: url) else {
DDLogError("RedirectWebViewHandler: can't load extraction javascript")
completion(nil, nil, nil)
return
}

DDLogInfo("RedirectWebViewHandler: call data extraction js")

let disposeBag = DisposeBag()
webView.call(javascript: script)
.observe(on: MainScheduler.instance)
.subscribe(with: self, onSuccess: { `self`, data in
self.disposeBag = nil

guard let payload = data as? [String: Any],
let cookies = payload["cookies"] as? String,
let userAgent = payload["userAgent"] as? String,
let referrer = payload["referrer"] as? String else {
DDLogError("RedirectWebViewHandler: extracted data missing response")
DDLogError("\(String(describing: data as? [String: Any]))")
completion(nil, nil, nil)
return
}

completion(cookies, userAgent, referrer)
}, onFailure: { `self`, _ in
self.disposeBag = nil
completion(nil, nil, nil)
})
.disposed(by: disposeBag)
self.disposeBag = disposeBag
completionHandler = completion
webView.load(URLRequest(url: initialUrl))
}
}

extension RedirectWebViewHandler: WKNavigationDelegate {
func webView(_ webView: WKWebView, decidePolicyFor navigationResponse: WKNavigationResponse, decisionHandler: @escaping (WKNavigationResponsePolicy) -> Void) {
guard let mimeType = navigationResponse.response.mimeType else {
self.startTimer()
startTimer()
decisionHandler(.allow)
return
}
Expand All @@ -112,24 +73,75 @@ extension RedirectWebViewHandler: WKNavigationDelegate {
case "application/pdf":
DDLogInfo("RedirectWebViewHandler: redirection detected pdf - \(navigationResponse.response.url?.absoluteString ?? "-")")
inMainThread { [weak self, weak webView] in
guard let self = self, let webView = webView else { return }
guard let self, let webView else { return }

// Cancel timer
self.disposeBag = nil
disposeBag = nil

// Extract webView data
self.extractData(from: webView) { cookies, userAgent, referrer in
// Return url
self.completionHandler?(navigationResponse.response.url, cookies, userAgent, referrer)
self.completionHandler = nil
extractData(from: webView) { [weak self] cookies, userAgent, referrer in
guard let self else { return }
if let url = navigationResponse.response.url {
// Return url
completionHandler?(.success(Redirect(url: url, cookies: cookies, userAgent: userAgent, referrer: referrer)))
return
}
completionHandler?(.failure(.invalidURL))
}
}
// Don't load web
decisionHandler(.cancel)

default:
self.startTimer()
startTimer()
decisionHandler(.allow)
}

func startTimer() {
let disposeBag = DisposeBag()
self.disposeBag = disposeBag

Single<Int>.timer(timeout, scheduler: timerScheduler)
.observe(on: MainScheduler.instance)
.subscribe(onSuccess: { [weak self] _ in
DDLogInfo("RedirectWebViewHandler: redirection timed out")
self?.completionHandler?(.failure(.timeout))
})
.disposed(by: disposeBag)
}

func extractData(from webView: WKWebView, completion: @escaping (String?, String?, String?) -> Void) {
guard let url = Bundle.main.url(forResource: "webview_extraction", withExtension: "js"), let script = try? String(contentsOf: url) else {
DDLogError("RedirectWebViewHandler: can't load extraction javascript")
completion(nil, nil, nil)
return
}

DDLogInfo("RedirectWebViewHandler: call data extraction js")

let disposeBag = DisposeBag()
self.disposeBag = disposeBag
webView.call(javascript: script)
.observe(on: MainScheduler.instance)
.subscribe(onSuccess: { [weak self] data in
self?.disposeBag = nil

guard let payload = data as? [String: Any],
let cookies = payload["cookies"] as? String,
let userAgent = payload["userAgent"] as? String,
let referrer = payload["referrer"] as? String else {
DDLogError("RedirectWebViewHandler: extracted data missing response")
DDLogError("\(String(describing: data as? [String: Any]))")
completion(nil, nil, nil)
return
}

completion(cookies, userAgent, referrer)
}, onFailure: { [weak self] _ in
self?.disposeBag = nil
completion(nil, nil, nil)
})
.disposed(by: disposeBag)
}
}
}
75 changes: 53 additions & 22 deletions ZShare/ViewModels/ExtensionViewModel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -725,7 +725,7 @@ final class ExtensionViewModel {
}

private func processDownload(of attachment: [String: Any], url: URL, file: File, item: ItemResponse, cookies: String?, userAgent: String?, referrer: String?) {
if self.fileStorage.isPdf(file: file) {
if fileStorage.isPdf(file: file) {
DDLogInfo("ExtensionViewModel: downloaded pdf")
var state = self.state
state.attachmentState = .processed
Expand All @@ -737,42 +737,73 @@ final class ExtensionViewModel {
DDLogInfo("ExtensionViewModel: downloaded unsupported attachment")

// Remove downloaded file, it won't be used anymore
try? self.fileStorage.remove(file)
try? fileStorage.remove(file)

guard (url.host ?? "").contains("sciencedirect") else {
self.state.attachmentState = .failed(.downloadedFileNotPdf)
state.attachmentState = .failed(.downloadedFileNotPdf)
return
}

// Try loading the url in webview to bypass redirects

DDLogInfo("ExtensionViewModel: detected sciencedirect, trying redirect")

self.state.attachmentState = .downloading(0)
self.state.retryCount += 1
state.attachmentState = .downloading(0)
state.retryCount += 1

getRedirectedPdfUrl(from: url) { [weak self] result in
guard let self else { return }
switch result {
case .success(let result):
if result.url != url && state.retryCount < 3 {
download(item: item, attachment: attachment, attachmentUrl: result.url, to: file, cookies: result.cookies, userAgent: result.userAgent, referrer: result.referrer)
webView?.isHidden = true
webView?.stopLoading()
return
}

self.getRedirectedPdfUrl(from: url) { [weak self] newUrl, newCookies, newUserAgent, newReferrer in
guard let self = self else { return }
case .failure(let error):
switch error {
case .timeout:
// Check if there is a captcha, in order to show the web view.
let captchaLocator = ".challenge-form"
let javascript = "document.querySelector('\(captchaLocator)') !== null"
webView?.call(javascript: javascript)
.observe(on: MainScheduler.instance)
.subscribe(onSuccess: { [weak self] result in
guard let self else { return }
if let result = result as? Bool, result == true {
webView?.isHidden = false
} else {
state.attachmentState = .failed(.downloadedFileNotPdf)
}
}, onFailure: { [weak self] _ in
guard let self else { return }
webView?.stopLoading()
state.attachmentState = .failed(.downloadedFileNotPdf)
})
.disposed(by: disposeBag)
return

if let newUrl = newUrl, newUrl != url && self.state.retryCount < 3 {
self.download(item: item, attachment: attachment, attachmentUrl: newUrl, to: file, cookies: newCookies, userAgent: newUserAgent, referrer: newReferrer)
return
default:
break
}
}

// Didn't help, report failed PDF download
self.state.attachmentState = .failed(.downloadedFileNotPdf)
webView?.isHidden = true
webView?.stopLoading()
state.attachmentState = .failed(.downloadedFileNotPdf)
}
}

private func getRedirectedPdfUrl(from url: URL, completion: @escaping (URL?, String?, String?, String?) -> Void) {
guard let webView = self.webView else {
completion(nil, nil, nil, nil)
return
}
func getRedirectedPdfUrl(from url: URL, completion: @escaping RedirectWebViewCompletion) {
guard let webView else {
completion(.failure(.webViewNil))
return
}

let handler = RedirectWebViewHandler(url: url, timeoutPerRedirect: .seconds(2), webView: webView)
handler.getPdfUrl(completion: completion)
self.redirectHandler = handler
let handler = RedirectWebViewHandler(url: url, timeoutPerRedirect: .seconds(2), webView: webView)
handler.getPdfUrl(completion: completion)
redirectHandler = handler
}
}

/// Tries to parse `ItemResponse` from data returned by translation server. It prioritizes items with attachments if there are multiple items.
Expand Down

0 comments on commit 5eca99c

Please sign in to comment.