Skip to content

Added Search API #13

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 11, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,6 @@ build
composer.lock
docs
vendor
public

index.php
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
#Changelog
All notable changes will be documented in this file

## 0.4 - June 11th, 2015

- [Feature] Added Search API
- [Feature] Added SearchInfo: apart from Entites in a regular EntityIterator as usual, the Search API returns a SearchInfo object, too. See README.

## 0.3 - May 17th, 2015

### Internal changes
Expand Down
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,22 @@ $url = $crawl->buildUrl();
$url->call();
```

## Search API

The Search API is used to quickly search across data obtained through Bulk or Crawl API.

```php
$diffbot = new Diffbot('my_token');
$search = $diffbot->search('author:"Miles Johnson" AND type:article')->call();


foreach ($search as $article) {
echo $article->getTitle();
}
```

Use Search APIs `setCol` method to target a specific collection only - otherwise, all your token's collections are searched.

## Testing

Just run PHPUnit in the root folder of the cloned project.
Expand Down
2 changes: 1 addition & 1 deletion src/Abstracts/Api.php
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ public function buildUrl()
{
$url = rtrim($this->apiUrl, '/').'?';

if (strcmp($url,'crawl') !== 0) {
if (strcmp($this->url,'crawl') !== 0) {
// Add Token
$url .= 'token=' . $this->diffbot->getToken();

Expand Down
157 changes: 157 additions & 0 deletions src/Api/Search.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
<?php

namespace Swader\Diffbot\Api;

use Swader\Diffbot\Abstracts\Api;
use Swader\Diffbot\Entity\SearchInfo;
use Swader\Diffbot\Traits\DiffbotAware;

/**
* Class Search
* @see https://www.diffbot.com/dev/docs/search/
* @package Swader\Diffbot\Api
*/
class Search extends Api
{
use DiffbotAware;

/** @var string API URL to which to send the request */
protected $apiUrl = 'https://api.diffbot.com/v3/search';

/** @var string */
protected $col = null;

/** @var string Search query to execute */
protected $query = '';

/** @var SearchInfo */
protected $info;

const SEARCH_ALL = 'all';

/**
* Search query.
* @see https://www.diffbot.com/dev/docs/search/#query
* @param string $q
*/
public function __construct($q)
{
$this->query = $q;
}

/**
* Name of the collection (Crawlbot or Bulk API job name) to search.
* By default the search will operate on all of your token's collections.
*
* @param null|string $col
* @return $this
*/
public function setCol($col = null)
{
if ($col !== null) {
$this->otherOptions['col'] = $col;
} else {
unset($this->otherOptions['col']);
}

return $this;
}

/**
* Number of results to return. Default is 20. To return all results in
* the search, pass num=all.
* @param int $num
* @return $this
*/
public function setNum($num = 20)
{
if (!is_numeric($num) && $num !== self::SEARCH_ALL) {
throw new \InvalidArgumentException(
'Argument can only be numeric or "all" to return all results.'
);
}
$this->otherOptions['num'] = $num;

return $this;
}

/**
* Ordinal position of first result to return. (First position is 0.)
* Default is 0.
* @param int $start
* @return $this
*/
public function setStart($start = 0)
{
if (!is_numeric($start)) {
throw new \InvalidArgumentException(
'Argument can only be numeric.'
);
}
$this->otherOptions['start'] = $start;

return $this;
}

/**
* Builds out the URL string that gets requested once `call()` is called
*
* @return string
*/
public function buildUrl()
{

$url = rtrim($this->apiUrl, '/') . '?';

// Add token
$url .= 'token=' . $this->diffbot->getToken();

// Add query
$url .= '&query=' . urlencode($this->query);

// Add other options
foreach ($this->otherOptions as $option => $value) {
$url .= '&' . $option . '=' . $value;
}

return $url;
}

/**
* If you pass in `true`, you get back a SearchInfo object related to the
* last call. Keep in mind that passing in true before calling a default
* call() will implicitly call the call(), and then get the SearchInfo.
*
* So:
*
* $searchApi->call() // gets entities
* $searchApi->call(true) // gets SearchInfo about the executed query
*
* @todo: remove error avoidance when issue 12 is fixed: https://github.com/Swader/diffbot-php-client/issues/12
* @param bool $info
* @return \Swader\Diffbot\Entity\EntityIterator|SearchInfo
*/
public function call($info = false)
{
if (!$info) {
$ei = parent::call();

set_error_handler(function() { /* ignore errors */ });
$arr = $ei->getResponse()->json(['big_int_strings' => true]);
restore_error_handler();

unset($arr['request']);
unset($arr['objects']);

$this->info = new SearchInfo($arr);

return $ei;
}

if ($info && !$this->info) {
$this->call();
}

return $this->info;
}
}
16 changes: 16 additions & 0 deletions src/Diffbot.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use Swader\Diffbot\Api\Crawl;
use Swader\Diffbot\Api\Custom;
use Swader\Diffbot\Api\Search;
use Swader\Diffbot\Exceptions\DiffbotException;
use Swader\Diffbot\Api\Product;
use Swader\Diffbot\Api\Image;
Expand Down Expand Up @@ -252,4 +253,19 @@ public function crawl($name = null, Api $api = null)
return $api->registerDiffbot($this);
}

/**
* Search query.
* @see https://www.diffbot.com/dev/docs/search/#query
* @param string $q
* @return Search
*/
public function search($q)
{
$api = new Search($q);
if (!$this->getHttpClient()) {
$this->setHttpClient();
$this->setEntityFactory();
}
return $api->registerDiffbot($this);
}
}
130 changes: 130 additions & 0 deletions src/Entity/SearchInfo.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
<?php

namespace Swader\Diffbot\Entity;

use Swader\Diffbot\Abstracts\Entity;

class SearchInfo extends Entity
{

/**
* Should always return "searchInfo"
* @return string
*/
public function getType()
{
return $this->data['searchInfo'];
}

/**
* Current UTC time as timestamp
* @return int
*/
public function getCurrentTimeUTC()
{
return (int)$this->data['currentTimeUTC'];
}

/**
* Response time in milliseconds. Time it took to process the query on
* Diffbot's end.
* @return int
*/
public function getResponseTimeMS()
{
return (int)$this->data['responseTimeMS'];
}

/**
* Number of results skipped for any reason
* @todo: find out why results might be omitted
* @return int
*/
public function getNumResultsOmitted()
{
return (int)$this->data['numResultsOmitted'];
}

/**
* Number of skipped shards
* @todo: find out what shards are
* @return int
*/
public function getNumShardsSkipped()
{
return (int)$this->data['numShardsSkipped'];
}

/**
* Total number of shards
* @todo: find out what shards are
* @return int
*/
public function getTotalShards()
{
return (int)$this->data['totalShards'];
}

/**
* Total number of documents in collection.
* Should resemble the total number you got on the crawl job.
* @todo: find out why not identical
* @return int
*/
public function getDocsInCollection()
{
return (int)$this->data['docsInCollection'];
}

/**
* Number of results that match - NOT the number of *returned* results!
* @return int
*/
public function getHits()
{
return (int)$this->data['hits'];
}

/**
* Returns an assoc. array containing the following keys and example values:
*

"fullQuery" => "type:json AND (author:\"Miles Johnson\" AND type:article)",
"queryLanguageAbbr" => "xx",
"queryLanguage" => "Unknown",
"terms" => [
[
"termNum" => 0,
"termStr" => "Miles Johnson",
"termFreq" => 2621376,
"termHash48" => 224575481707228,
"termHash64" => 4150001371756911641,
"prefixHash64" => 3732660069076179349
],
[
"termNum" => 1,
"termStr" => "type:json",
"termFreq" => 2621664,
"termHash48" => 272064464231140,
"termHash64" => 9877301297136722857,
"prefixHash64" => 7586288672657224048
],
[
"termNum" => 2,
"termStr" => "type:article",
"termFreq" => 524448,
"termHash48" => 210861560163398,
"termHash64" => 12449358332005671483,
"prefixHash64" => 7586288672657224048
]
]

* @todo: find out what hashes are, and to what the freq is relative
* @return array
*/
public function getQueryInfo()
{
return (array)$this->data['queryInfo'];
}

}
Loading