@@ -57,6 +57,13 @@ class Gwt_Client
57
57
*/
58
58
protected $ _website = null ;
59
59
60
+ /**
61
+ * List of sites on account with options
62
+ *
63
+ * @var array
64
+ */
65
+ protected $ _sites = null ;
66
+
60
67
/**
61
68
* Tables to download
62
69
*
@@ -193,21 +200,21 @@ function ($item)
193
200
* @param DateTime $dateStart
194
201
* @param DateTime $dateEnd
195
202
* @param string $lang
196
- * @return string
203
+ * @return mixed
197
204
*/
198
205
public function getTableData ($ tableName )
199
206
{
200
207
switch ($ tableName ) {
201
208
case 'CRAWL_ERRORS ' :
202
- return $ this ->downloadCSV_CrawlErrors ($ this ->getWebsite ());
209
+ $ data = $ this ->downloadCSV_CrawlErrors ($ this ->getWebsite ());
203
210
break ;
204
211
case 'CONTENT_ERRORS ' :
205
212
case 'CONTENT_KEYWORDS ' :
206
213
case 'INTERNAL_LINKS ' :
207
214
case 'EXTERNAL_LINKS ' :
208
215
case 'SOCIAL_ACTIVITY ' :
209
216
case 'LATEST_BACKLINKS ' :
210
- return $ this ->downloadCSV_XTRA (
217
+ $ data = $ this ->downloadCSV_XTRA (
211
218
$ this ->getWebsite (),
212
219
$ tableName ,
213
220
$ this ->getDateStart (),
@@ -221,8 +228,14 @@ public function getTableData($tableName)
221
228
$ finalUrl ,
222
229
$ this ->getDateStart ()->format ('Ymd ' ), $ this ->getDateEnd ()->format ('Ymd ' )
223
230
);
224
- return $ this ->getData ($ finalUrl );
231
+ $ data = $ this ->getData ($ finalUrl );
225
232
}
233
+
234
+ foreach ($ this ->_processors as $ processor ) {
235
+ $ data = $ processor ->process ($ data , $ tableName );
236
+ }
237
+
238
+ return $ data ;
226
239
}
227
240
228
241
/**
@@ -335,11 +348,23 @@ public function getLanguage()
335
348
/**
336
349
* Set website value
337
350
*
351
+ * @throws Exception
338
352
* @param $website
339
353
* @return $this
340
354
*/
341
355
public function setWebsite ($ website )
342
356
{
357
+ $ sites = $ this ->getSites ();
358
+
359
+ // if wrong name is given, no requests could be made
360
+ if (!array_key_exists ($ website , $ sites )) {
361
+ throw new Exception ('Site ' . var_export ($ website , true ) . ' not in current account. ' );
362
+ }
363
+ // if site is not verified, no requests could be made
364
+ if (!$ sites [$ website ]['verified ' ]) {
365
+ throw new Exception ('Site ' . var_export ($ website , true ) . ' is not verified. ' );
366
+ }
367
+
343
368
$ this ->_website = $ website ;
344
369
345
370
return $ this ;
@@ -423,6 +448,20 @@ public function getDateEnd()
423
448
return $ this ->_dateEnd ;
424
449
}
425
450
451
+ /**
452
+ * Add data processor
453
+ *
454
+ * @param Gwt_Processor_ProcessorInterface $processor
455
+ * @return $this
456
+ */
457
+ public function addProcessor (Gwt_Processor_ProcessorInterface $ processor )
458
+ {
459
+ $ processor ->setClient ($ this );
460
+ $ this ->_processors [] = $ processor ;
461
+
462
+ return $ this ;
463
+ }
464
+
426
465
/**
427
466
* Returns array of downloaded filenames.
428
467
*
@@ -528,6 +567,7 @@ protected function getData($url)
528
567
if ($ info ['http_code ' ] != 200 ) {
529
568
throw new Exception (
530
569
'Bad response code: ' . var_export ($ info ['http_code ' ], true )
570
+ . ' for url ' . var_export ($ url , true )
531
571
);
532
572
}
533
573
@@ -538,26 +578,35 @@ protected function getData($url)
538
578
* Gets all available sites from Google Webmaster Tools account.
539
579
*
540
580
* @throws Exception
581
+ * @param bool $reload
541
582
* @return array Array with all site URLs registered in GWT account
542
583
*/
543
- public function getSites ()
584
+ public function getSites ($ reload = false )
544
585
{
545
- $ feed = $ this ->getData (self ::SERVICEURI . 'feeds/sites/ ' );
546
- if ($ feed !== false ) {
547
- $ sites = array ();
548
- $ doc = new DOMDocument ();
549
- $ doc ->loadXML ($ feed );
550
- foreach ($ doc ->getElementsByTagName ('entry ' ) as $ node ) {
551
- array_push (
552
- $ sites ,
553
- $ node ->getElementsByTagName ('title ' )->item (0 )->nodeValue
554
- );
555
- }
586
+ if (null === $ this ->_sites || $ reload ) {
587
+ $ feed = $ this ->getData (self ::SERVICEURI . 'feeds/sites/ ' );
588
+ if ($ feed !== false ) {
589
+ $ sites = array ();
590
+ $ doc = new DOMDocument ();
591
+ $ doc ->loadXML ($ feed );
592
+ foreach ($ doc ->getElementsByTagName ('entry ' ) as $ node ) {
593
+ $ verified = $ node
594
+ ->getElementsByTagNameNS (
595
+ 'http://schemas.google.com/webmasters/tools/2007 ' , 'verified '
596
+ )->item (0 )
597
+ ->nodeValue ;
598
+ $ sites [$ node ->getElementsByTagName ('title ' )->item (0 )->nodeValue ] = array (
599
+ 'verified ' => $ verified == 'true ' ,
600
+ );
601
+ }
556
602
557
- return $ sites ;
603
+ $ this ->_sites = $ sites ;
604
+ } else {
605
+ throw new Exception ('Got no feed data for sites. ' );
606
+ }
558
607
}
559
608
560
- throw new Exception ( ' Got no feed data for sites. ' ) ;
609
+ return $ this -> _sites ;
561
610
}
562
611
563
612
/**
@@ -615,7 +664,7 @@ private function downloadCSV_XTRA($site, $tableName, DateTime $dateStart, DateTi
615
664
private function downloadCSV_CrawlErrors ($ site , $ separated = false )
616
665
{
617
666
$ type_param = 'we ' ;
618
- $ filename = parse_url ($ site , PHP_URL_HOST ) . '- ' . date ('Ymd-His ' );
667
+ // $filename = parse_url($site, PHP_URL_HOST) . '-' . date('Ymd-His');
619
668
if ($ separated ) {
620
669
$ data = array ();
621
670
foreach ($ this ->getErrTablesSort () as $ sortid => $ sortname ) {
@@ -653,49 +702,6 @@ private function downloadCSV_CrawlErrors($site, $separated = false)
653
702
}
654
703
}
655
704
656
- /**
657
- * Downloads the file based on the given URL.
658
- *
659
- * @param string $site Site URL available in GWT Account.
660
- * @param string $savepath Optional path to save CSV to (no trailing slash!).
661
- * @return $this
662
- * @deprecated
663
- */
664
- public function downloadCSV ($ site , $ savepath = '. ' )
665
- {
666
- $ filename = parse_url ($ site , PHP_URL_HOST ) . '- ' . date ('Ymd-His ' );
667
- $ tables = $ this ->_tables ;
668
- foreach ($ tables as $ table ) {
669
- $ this ->saveData (
670
- $ this ->getTableData ($ table , $ site , $ this ->getDateStart (), $ this ->getDateEnd (), $ this ->getLanguage ()),
671
- "$ savepath/ $ table- $ filename.csv "
672
- );
673
- }
674
-
675
- return $ this ;
676
- }
677
-
678
- /**
679
- * Saves data to a CSV file based on the given URL.
680
- *
681
- * @param string $data Downloaded CSV data
682
- * @param string $finalName Filepointer to save location.
683
- * @return bool
684
- * @deprecated
685
- */
686
- private function saveData (&$ data , $ finalName )
687
- {
688
- if (strlen ($ data ) > 1 && file_put_contents ($ finalName , utf8_decode ($ data ))) {
689
- array_push ($ this ->_downloaded , realpath ($ finalName ));
690
-
691
- return true ;
692
- } else {
693
- array_push ($ this ->_skipped , $ finalName );
694
-
695
- return false ;
696
- }
697
- }
698
-
699
705
/**
700
706
* Regular Expression to find the Security Token for a download file.
701
707
*
@@ -708,6 +714,7 @@ private function getToken($uri, $delimiter, $dlUri='')
708
714
{
709
715
$ matches = array ();
710
716
$ tmp = $ this ->getData ($ uri );
717
+
711
718
preg_match_all ("# $ dlUri.*?46security_token(.*?) $ delimiter#si " , $ tmp , $ matches );
712
719
return isset ($ matches [1 ][0 ])
713
720
? substr ($ matches [1 ][0 ], 3 , -1 )
0 commit comments