Skip to content

Commit

Permalink
Add mgstage scraper functions (#151)
Browse files Browse the repository at this point in the history
  • Loading branch information
jvlflame committed Dec 13, 2020
1 parent a29c166 commit a0caea7
Show file tree
Hide file tree
Showing 3 changed files with 437 additions and 0 deletions.
303 changes: 303 additions & 0 deletions src/Javinizer/Private/Scraper.Mgstage.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,303 @@
function Get-MgstageId {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[Object]$Webrequest
)

process {
$id = (((($Webrequest.Content -split '<th>品番:<\/th>')[1] -split '<\/td>')[0]) -split '<td>')[1]

if ($id -eq '') {
$id = $null
}

Write-Output $Id
}
}

function Get-MgstageTitle {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[Object]$Webrequest
)

process {
$title = (($Webrequest.Content -split '<title>')[1] -split '<\/title>')[0]
$title = Convert-HtmlCharacter -String $title

if ($title -eq '') {
$title = $null
}

Write-Output $Title
}
}

function Get-MgstageDescription {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[Object]$Webrequest
)

process {
if ($Webrequest.Content -match '<p class="txt introduction">') {
$description = (($Webrequest.Content -split '<p class="txt introduction">')[1] -split '<\/p>')[0]
$description = Convert-HtmlCharacter -String $description
} else {
$description = $null
}

Write-Output $description
}
}

function Get-MgstageReleaseDate {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[Object]$Webrequest
)

process {
$releaseDate = (((($Webrequest.Content -split '<th>配信開始日:<\/th>')[1] -split '<\/td>')[0]) -split '<td>')[1]
$releaseDate = Get-Date $releaseDate -Format "yyyy-MM-dd"

Write-Output $releaseDate
}
}

function Get-MgstageReleaseYear {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[Object]$Webrequest
)

process {
$releaseYear = Get-MgstageReleaseDate -WebRequest $Webrequest
$releaseYear = ($releaseYear -split '-')[0]

Write-Output $releaseYear
}
}

function Get-MgstageRuntime {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[Object]$Webrequest
)

process {
$length = ((($Webrequest.Content -split '<th>収録時間:<\/th>')[1] -split '<\/td>')[0] -split '<td>')[1]
$length = ($length -replace 'min').Trim()

Write-Output $length
}
}

function Get-MgstageMaker {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[Object]$Webrequest
)

process {
$maker = (((($Webrequest.Content -split '<th>メーカー:<\/th>')[1] -split '<\/td>')[0] -split '>')[2] -split '<\/a')[0]
$maker = Convert-HtmlCharacter -String $maker

if ($maker -eq '') {
$maker = $null
}

Write-Output $maker
}
}

function Get-MgstageLabel {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[Object]$Webrequest,

[Parameter()]
[Object]$Replace
)

process {
$label = (((($Webrequest.Content -split '<th>レーベル:<\/th>')[1] -split '<\/td>')[0] -split '>')[2] -split '<\/a')[0]
$label = Convert-HtmlCharacter -String $label

if ($label -eq '') {
$label = $null
}

Write-Output $label
}
}

function Get-MgstageSeries {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[Object]$Webrequest,

[Parameter()]
[Object]$Replace
)

process {
$series = (((($Webrequest.Content -split '<th>シリーズ:<\/th>')[1] -split '<\/td>')[0] -split '>')[2] -split '<\/a')[0]
$series = Convert-HtmlCharacter -String $series

if ($series -eq '') {
$series = $null
}

Write-Output $series
}
}

function Get-MgstageRating {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[Object]$Webrequest
)

process {
try {
$rating = ($Webrequest.Content | Select-String -Pattern '<span class="star_.*"><\/span>(.*)').Matches.Groups[1].Value
} catch {
return
}

# Multiply the rating value by 2 to conform to 1-10 rating standard
$newRating = [Decimal]$rating * 2
$newRating = [Math]::Round($newRating, 1)

if ($newRating -eq 0) {
$rating = $null
} else {
$rating = $newRating.ToString()
}

try {
$ratingCount = (($Webrequest.Content | Select-String -Pattern '\((\d) 件\)').Matches.Groups[1].Value).ToString()
} catch {
return
}

if ($ratingCount -eq 0) {
$ratingObject = $null
} else {
$ratingObject = [PSCustomObject]@{
Rating = $rating
Votes = $ratingCount
}
}

Write-Output $ratingObject
}
}

function Get-MgstageGenre {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[Object]$Webrequest
)

process {
$genreArray = @()
$genreHtml = ((($Webrequest.Content -split '<th>ジャンル:<\/th>')[1] -split '<\/td>')[0]) -split '<a href="\/search\/search\.php\?image_word_ids\[\]=.*">' | ForEach-Object { ($_ -replace '<td>' -replace '<\/a>').Trim() } | Where-Object { $_ -ne '' }

foreach ($genre in $genreHtml) {
$genre = Convert-HtmlCharacter -String $genre
$genreArray += $genre
}

if ($genreArray.Count -eq 0) {
$genreArray = $null
}

Write-Output $genreArray
}
}

function Get-MgstageActress {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[Object]$Webrequest
)

process {
$movieActressObject = @()
$movieActress = (((($Webrequest.Content -split '<th>出演:<\/th>')[1] -split '<\/td>')[0]) -replace '<td>' -replace '<\/a>' -replace '<a href="\/search\/search\.php\?image_word_ids\[\]=.*">') -split '\n' `
| ForEach-Object { ($_).Trim() } | Where-Object {$_ -ne ''}

foreach ($actress in $movieActress) {
# Match if the name contains Japanese characters
if ($actress -match '[\u3040-\u309f]|[\u30a0-\u30ff]|[\uff66-\uff9f]|[\u4e00-\u9faf]') {
$movieActressObject += [PSCustomObject]@{
LastName = $null
FirstName = $null
JapaneseName = $actress
ThumbUrl = $null
}
} else {
$movieActressObject += [PSCustomObject]@{
LastName = ($actress -split ' ')[1] -replace '\\', ''
FirstName = ($actress -split ' ')[0] -replace '\\', ''
JapaneseName = $null
ThumbUrl = $null
}
}
}

if ($movieActressObject.Count -eq 0) {
$movieActressObject = $null
}

Write-Output $movieActressObject
}
}

function Get-MgstageCoverUrl {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[Object]$Webrequest
)

process {
try {
#$coverUrl = ($Webrequest.Content | Select-String -Pattern '<img src="(.*)" width=".*" height=".*" class="enlarge_image"').Matches.Groups[1].Value
$coverUrl = ($Webrequest.Content | Select-String -Pattern 'class="link_magnify" href="(.*\.jpg)"').Matches.Groups[1].Value
} catch {
return
}

Write-Output $coverUrl
}
}

function Get-MgstageScreenshotUrl {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[Object]$Webrequest
)

process {
try {
$screenshotUrl = ( $Webrequest.Content | Select-String -Pattern 'class="sample_image" href="(.*.jpg)"' -AllMatches ).Matches | ForEach-Object { $_.Groups[1].Value }
} catch {
return
}

Write-Output $screenshotUrl
}
}

function Get-MgstageTrailerUrl {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[Object]$Webrequest
)

process {
Write-Output $trailerUrl
}
}
52 changes: 52 additions & 0 deletions src/Javinizer/Public/Get-MgstageData.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#Requires -PSEdition Core

function Get-MgstageData {
[CmdletBinding()]
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true, ValueFromPipelineByPropertyName = $true)]
[String]$Url
)

begin {
$session = New-Object Microsoft.PowerShell.Commands.WebRequestSession
$cookie = New-Object System.Net.Cookie
$cookie.Name = 'adc'
$cookie.Value = '1'
$cookie.Domain = '.mgstage.com'
$session.Cookies.Add($cookie)
}

process {
$movieDataObject = @()

try {
Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$($MyInvocation.MyCommand.Name)] Performing [GET] on URL [$Url]"
$webRequest = Invoke-WebRequest -Uri $Url -Method Get -WebSession $session -Verbose:$false
} catch {
Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Error -Message "[$($MyInvocation.MyCommand.Name)] Error [GET] on URL [$Url]: $PSItem" -Action 'Continue'
}

$movieDataObject = [PSCustomObject]@{
Source = 'mgstageja'
Url = $Url
Id = Get-MgstageId -WebRequest $webRequest
Title = Get-MgstageTitle -WebRequest $webRequest
Description = Get-MgstageDescription -WebRequest $webRequest
ReleaseDate = Get-MgstageReleaseDate -WebRequest $webRequest
ReleaseYear = Get-MgstageReleaseYear -WebRequest $webRequest
Runtime = Get-MgstageRuntime -WebRequest $webRequest
Maker = Get-MgstageMaker -WebRequest $webRequest
Label = Get-MgstageLabel -WebRequest $webRequest
Series = Get-MgstageSeries -WebRequest $webRequest
Rating = Get-MGstageRating -WebRequest $webRequest
Actress = Get-MgstageActress -WebRequest $webRequest
Genre = Get-MgstageGenre -WebRequest $webRequest
CoverUrl = Get-MgstageCoverUrl -WebRequest $webRequest
ScreenshotUrl = Get-MgstageScreenshotUrl -WebRequest $webRequest
TrailerUrl = Get-MgstageTrailerUrl -WebRequest $webRequest
}

Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$($MyInvocation.MyCommand.Name)] Mgstage data object: $($movieDataObject | ConvertTo-Json -Depth 32 -Compress)"
Write-Output $movieDataObject
}
}
Loading

0 comments on commit a0caea7

Please sign in to comment.