From 18292154688f8fb0ef2eb8630e6ee37cbcf00d46 Mon Sep 17 00:00:00 2001 From: jvlflame Date: Sun, 11 Oct 2020 15:58:14 -0700 Subject: [PATCH] Fix Javbus url scraper for uncensored (#131) --- src/Javinizer/Public/Get-JavbusUrl.ps1 | 139 ++++++++++++++++++++----- 1 file changed, 112 insertions(+), 27 deletions(-) diff --git a/src/Javinizer/Public/Get-JavbusUrl.ps1 b/src/Javinizer/Public/Get-JavbusUrl.ps1 index 61e2249d..abcfe611 100644 --- a/src/Javinizer/Public/Get-JavbusUrl.ps1 +++ b/src/Javinizer/Public/Get-JavbusUrl.ps1 @@ -14,20 +14,6 @@ function Get-JavbusUrl { Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$Id] [$($MyInvocation.MyCommand.Name)] Performing [GET] on URL [$searchUrl]" $webRequest = Invoke-RestMethod -Uri $searchUrl -Method Get -Verbose:$false } catch { - try { - $searchUrl = "https://www.javbus.com/uncensored/search/$Id&type=0&parent=uc" - Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$Id] [$($MyInvocation.MyCommand.Name)] Performing [GET] on URL [$searchUrl]" - $webRequest = Invoke-RestMethod -Uri $searchUrl -Method Get -Verbose:$false - } catch { - try { - $searchUrl = "https://www.javbus.org/search/$Id&type=0&parent=uc" - Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$Id] [$($MyInvocation.MyCommand.Name)] Performing [GET] on URL [$searchUrl]" - $webRequest = Invoke-RestMethod -Uri $searchUrl -Method Get -Verbose:$false - } catch { - Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Warning -Message "[$Id] [$($MyInvocation.MyCommand.Name)] not matched on JavBus" - return - } - } } $Tries = 5 @@ -35,7 +21,7 @@ function Get-JavbusUrl { try { $searchResults = (($webRequest | ForEach-Object { $_ -split '\n' } | Select-String '').Matches) | ForEach-Object { $_.Groups[1].Value } } catch { - return + $searchResults = $null } $numResults = $searchResults.Count @@ -52,9 +38,10 @@ function Get-JavbusUrl { Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$Id] [$($MyInvocation.MyCommand.Name)] Performing [GET] on URL [$result]" $webRequest = Invoke-RestMethod -Uri $result -Method Get -Verbose:$false } catch { - Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Error -Message "[$Id] [$($MyInvocation.MyCommand.Name)] Error occurred on [GET] on URL [$result]: $PSItem" -Action 'Continue' + Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Error -Message "[$Id] [$($MyInvocation.MyCommand.Name)] Error occurred on [GET] on URL [$result]" -Action 'Continue' } $resultId = Get-JavbusId -WebRequest $webRequest + Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "Result [$count] is [$resultId]" if ($resultId -eq $Id) { $directUrlZh = "https://" + ($result -split '/')[-2] + "/" + ($result -split '/')[-1] $directUrlJa = "https://" + ($result -split '/')[-2] + "/ja/" + ($result -split '/')[-1] @@ -62,27 +49,125 @@ function Get-JavbusUrl { break } - Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "Result [$count] is [$resultId]" - if ($count -eq $Tries) { break } $count++ } + } + + if ($null -eq $directUrl) { + try { + $searchUrl = "https://www.javbus.com/uncensored/search/$Id&type=0&parent=uc" + Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$Id] [$($MyInvocation.MyCommand.Name)] Performing [GET] on URL [$searchUrl]" + $webRequest = Invoke-RestMethod -Uri $searchUrl -Method Get -Verbose:$false + } catch { + } + + $Tries = 5 + # Get the page search results + try { + $searchResults = (($webRequest | ForEach-Object { $_ -split '\n' } | Select-String '').Matches) | ForEach-Object { $_.Groups[1].Value } + } catch { + $searchResults = $null + } + $numResults = $searchResults.Count + + if ($Tries -gt $numResults) { + $Tries = $numResults + } - if ($null -eq $directUrl) { - Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Warning -Message "[$Id] [$($MyInvocation.MyCommand.Name)] not matched on JavBus" - return - } else { - $urlObject = [PSCustomObject]@{ - En = $directUrl - Ja = $directUrlJa - Zh = $directUrlZh + if ($numResults -ge 1) { + Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$Id] [$($MyInvocation.MyCommand.Name)] Searching [$Tries] of [$numResults] results for [$Id]" + + $count = 1 + foreach ($result in $searchResults) { + try { + Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$Id] [$($MyInvocation.MyCommand.Name)] Performing [GET] on URL [$result]" + $webRequest = Invoke-RestMethod -Uri $result -Method Get -Verbose:$false + } catch { + Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Error -Message "[$Id] [$($MyInvocation.MyCommand.Name)] Error occurred on [GET] on URL [$result]" -Action 'Continue' + } + $resultId = Get-JavbusId -WebRequest $webRequest + Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "Result [$count] is [$resultId]" + if ($resultId -eq $Id) { + $directUrlZh = "https://" + ($result -split '/')[-2] + "/" + ($result -split '/')[-1] + $directUrlJa = "https://" + ($result -split '/')[-2] + "/ja/" + ($result -split '/')[-1] + $directUrl = "https://" + ($result -split '/')[-2] + "/en/" + ($result -split '/')[-1] + break + } + + if ($count -eq $Tries) { + break + } + + $count++ + } + } + } + + if ($null -eq $directUrl) { + try { + $searchUrl = "https://www.javbus.org/search/$Id&type=0&parent=uc" + Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$Id] [$($MyInvocation.MyCommand.Name)] Performing [GET] on URL [$searchUrl]" + $webRequest = Invoke-RestMethod -Uri $searchUrl -Method Get -Verbose:$false + } catch { + } + + $Tries = 5 + # Get the page search results + try { + $searchResults = (($webRequest | ForEach-Object { $_ -split '\n' } | Select-String '').Matches) | ForEach-Object { $_.Groups[1].Value } + } catch { + $searchResults = $null + } + $numResults = $searchResults.Count + + if ($Tries -gt $numResults) { + $Tries = $numResults + } + + if ($numResults -ge 1) { + Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$Id] [$($MyInvocation.MyCommand.Name)] Searching [$Tries] of [$numResults] results for [$Id]" + + $count = 1 + foreach ($result in $searchResults) { + try { + Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$Id] [$($MyInvocation.MyCommand.Name)] Performing [GET] on URL [$result]" + $webRequest = Invoke-RestMethod -Uri $result -Method Get -Verbose:$false + } catch { + Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Error -Message "[$Id] [$($MyInvocation.MyCommand.Name)] Error occurred on [GET] on URL [$result]: $PSItem" -Action 'Continue' + } + $resultId = Get-JavbusId -WebRequest $webRequest + Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "Result [$count] is [$resultId]" + if ($resultId -eq $Id) { + $directUrlZh = "https://" + ($result -split '/')[-2] + "/" + ($result -split '/')[-1] + $directUrlJa = "https://" + ($result -split '/')[-2] + "/ja/" + ($result -split '/')[-1] + $directUrl = "https://" + ($result -split '/')[-2] + "/en/" + ($result -split '/')[-1] + break + } + + if ($count -eq $Tries) { + break + } + + $count++ } + } + } - Write-Output $urlObject + if ($null -eq $directUrl) { + Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Warning -Message "[$Id] [$($MyInvocation.MyCommand.Name)] not matched on JavBus" + return + } else { + $urlObject = [PSCustomObject]@{ + En = $directUrl + Ja = $directUrlJa + Zh = $directUrlZh } + + Write-Output $urlObject } } }