Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

original search fix #181

Merged
merged 2 commits into from
Jan 2, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 65 additions & 16 deletions imdb.class.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,13 @@ class IMDB
* if you get no result, it will use the advanced method
*/
const IMDB_SEARCH_ORIGINAL = true;


/**
* Set this to true if you want to search for exact titles
* it falls back to false if theres no result
*/
const IMDB_EXACT_SEARCH = true;

/**
* Set the sensitivity for search results in percentage.
*/
Expand Down Expand Up @@ -86,7 +92,7 @@ class IMDB
const IMDB_RELEASE_DATE = '~href="/title/[t0-9]*/releaseinfo">(.*)<~Ui';
const IMDB_RUNTIME = '~<td[^>]*>\s*Runtime\s*</td>\s*<td>(.+)</td>~Ui';
const IMDB_SEARCH_ADV = '~text-primary">1[.]</span>\s*<a.href="\/title\/(tt\d{6,})\/(?:.*?)"(?:\s*)>(?:.*?)<\/a>~Ui';
const IMDB_SEARCH_ORG = '~href="\/title\/(tt\d{6,})\/(?:.*?)"\s>(?!<img)(.*?)<\/a>\s(\(([0-9]{4})\))?\s(?:<br/>aka\s<i>"(.*?)"</i>)?~';
const IMDB_SEARCH_ORG = '~find-title-result">(?:.*?)alt="(.*?)"(?:.*?)href="\/title\/(tt\d{6,})\/(?:.*?)">(.*?)<\/a>~';
const IMDB_SEASONS = '~episodes\?season=(?:\d+)">(\d+)<~Ui';
const IMDB_SOUND_MIX = '~<td[^>]*>\s*Sound\s*Mix\s*</td>\s*<td>(.+)</td>~Ui';
const IMDB_TAGLINE = '~<td[^>]*>\s*Taglines\s*</td>\s*<td>(.+)</td>~Ui';
Expand Down Expand Up @@ -197,6 +203,12 @@ public function __construct($sSearch, $iCache = null, $sSearchFor = 'all')
$this->iCache = (int) $iCache;
}

if (self::IMDB_EXACT_SEARCH) {
if ($this->fetchUrl($sSearch, self::IMDB_SEARCH_ORIGINAL, true)) {
return true;
}
}

if ($this->fetchUrl($sSearch, self::IMDB_SEARCH_ORIGINAL)) {
return true;
}
Expand All @@ -213,7 +225,7 @@ public function __construct($sSearch, $iCache = null, $sSearchFor = 'all')
*
* @return bool True on success, false on failure.
*/
private function fetchUrl($sSearch, $orgSearch = false)
private function fetchUrl($sSearch, $orgSearch = false, $exactSearch = false)
{
$sSearch = trim($sSearch);

Expand Down Expand Up @@ -278,7 +290,10 @@ private function fetchUrl($sSearch, $orgSearch = false)
$sSearch = $sTempSearch . ' (' . $sYear . ')';
}

$this->sUrl = 'https://www.imdb.com/find?q=' . rawurlencode(str_replace(' ', '+', $sSearch)) . $sParameters;
if ($exactSearch) {
$sParameters .= '&exact=true';
}
$this->sUrl = 'https://www.imdb.com/find/?q=' . rawurlencode(str_replace(' ', ' ', $sSearch)) . $sParameters;
}

$bSearch = true;
Expand Down Expand Up @@ -355,6 +370,7 @@ private function fetchUrl($sSearch, $orgSearch = false)
$aReturned = IMDBHelper::matchRegex($sSource, self::IMDB_SEARCH_ORG);

if ($aReturned) {
$rData = [];
$fTempPercent = 0.00;
$iTempId = "";
$sYear = 0;
Expand All @@ -368,32 +384,65 @@ private function fetchUrl($sSearch, $orgSearch = false)
}

foreach ($aReturned[1] as $i => $value) {
$sTitle = $aReturned[2][$i];
$sId = $aReturned[2][$i];
$sTitle = $aReturned[3][$i];
$perc = 0.00;
$year = 0;

if ($sYear === 0) {
$sim = similar_text($sSearch, $sTitle, $perc);
} else {
if ($sYear != $aReturned[4][$i]) {
$sMatch = IMDBHelper::matchRegex($aReturned[1][$i], '~\(?([0-9]{4})\)?~', 1);
if (false !== $sMatch) {
$year = $sMatch;
}

if ($sYear != $year) {
continue;
}

$sim = similar_text($sTempSearch, $sTitle, $perc);
}

if (round($perc, 3) > round($fTempPercent, 3)) {
$iTempId = $value;
$fTempPercent = $perc;
if (true === self::IMDB_DEBUG) {
echo '<pre><b>Set Result:</b> ' . $sSearch . ' => ' . $sTitle . ' (' . $perc . '%) </pre>';
}
}
$rData[] = [
'id' => $sId,
'title' => $sTitle,
'year' => $year,
'match' => floatval($perc)
];


}

if (sizeof($rData) === 0) {
return false;
}

if ((round($fTempPercent, 0) > self::IMDB_SENSITIVITY)) {
$sUrl = 'https://www.imdb.com/title/' . $iTempId . '/reference';
if (true === self::IMDB_DEBUG) {
foreach ($rData as $sArray) {
echo '<pre><b>Found results:</b> ' . $sArray['id'] . ' => ' . $sArray['title'] . ' (' . $sArray['match']. '%) </pre>';
}
}

//get highest match of search results
$matches = array_column($rData, 'match');
$maxv = max($matches);

$marray = array_filter($rData, function($item) use ($maxv) {
return $item['match'] == $maxv;
});

$marray = reset($marray);

if (sizeof($marray) > 0) {
if (!$exactSearch && round($marray['match'], 0) < self::IMDB_SENSITIVITY) {
echo '<pre><b>Bad sensitivity:</b> ' . $marray['id'] . ' => ' . $marray['title'] . ' (' . $marray['match']. '%) </pre>';
return false;
}

$sUrl = 'https://www.imdb.com/title/' . $marray['id'] . '/reference';
if (true === self::IMDB_DEBUG) {
echo '<pre><b>Percentage:</b> ' . $iTempId . ' => ' . $fTempPercent . '% </pre>';
echo '<pre><b>Get best result:</b> ' . $marray['title'] . ' ' . $marray['id'] . ' => ' . $marray['match'] . '% </pre>';
echo '<pre><b>New redirect saved:</b> ' . basename($sRedirectFile) . ' => ' . $sUrl . '</pre>';
}
file_put_contents($sRedirectFile, $sUrl);
Expand Down