@@ -123,6 +123,13 @@ export default class Interpreter extends EventEmitter {
123
123
this . isAborted = true ;
124
124
}
125
125
126
+ /**
127
+ * Returns the current abort status
128
+ */
129
+ public getIsAborted ( ) : boolean {
130
+ return this . isAborted ;
131
+ }
132
+
126
133
private async applyAdBlocker ( page : Page ) : Promise < void > {
127
134
if ( this . blocker ) {
128
135
try {
@@ -610,6 +617,13 @@ export default class Interpreter extends EventEmitter {
610
617
611
618
if ( methodName === 'waitForLoadState' ) {
612
619
try {
620
+ let args = step . args ;
621
+
622
+ if ( Array . isArray ( args ) && args . length === 1 ) {
623
+ args = [ args [ 0 ] , { timeout : 30000 } ] ;
624
+ } else if ( ! Array . isArray ( args ) ) {
625
+ args = [ args , { timeout : 30000 } ] ;
626
+ }
613
627
await executeAction ( invokee , methodName , step . args ) ;
614
628
} catch ( error ) {
615
629
await executeAction ( invokee , methodName , 'domcontentloaded' ) ;
@@ -670,7 +684,19 @@ export default class Interpreter extends EventEmitter {
670
684
return ;
671
685
}
672
686
673
- const results = await page . evaluate ( ( cfg ) => window . scrapeList ( cfg ) , config ) ;
687
+ const evaluationPromise = page . evaluate ( ( cfg ) => window . scrapeList ( cfg ) , config ) ;
688
+ const timeoutPromise = new Promise < any [ ] > ( ( _ , reject ) =>
689
+ setTimeout ( ( ) => reject ( new Error ( 'Page evaluation timeout' ) ) , 10000 )
690
+ ) ;
691
+
692
+ let results ;
693
+ try {
694
+ results = await Promise . race ( [ evaluationPromise , timeoutPromise ] ) ;
695
+ } catch ( error ) {
696
+ debugLog ( `Page evaluation failed: ${ error . message } ` ) ;
697
+ return ;
698
+ }
699
+
674
700
const newResults = results . filter ( item => {
675
701
const uniqueKey = JSON . stringify ( item ) ;
676
702
if ( scrapedItems . has ( uniqueKey ) ) return false ;
@@ -691,43 +717,94 @@ export default class Interpreter extends EventEmitter {
691
717
return false ;
692
718
} ;
693
719
720
+ // Helper function to detect if a selector is XPath
721
+ const isXPathSelector = ( selector : string ) : boolean => {
722
+ return selector . startsWith ( '//' ) ||
723
+ selector . startsWith ( '/' ) ||
724
+ selector . startsWith ( './' ) ||
725
+ selector . includes ( 'contains(@' ) ||
726
+ selector . includes ( '[count(' ) ||
727
+ selector . includes ( '@class=' ) ||
728
+ selector . includes ( '@id=' ) ||
729
+ selector . includes ( ' and ' ) ||
730
+ selector . includes ( ' or ' ) ;
731
+ } ;
732
+
733
+ // Helper function to wait for selector (CSS or XPath)
734
+ const waitForSelectorUniversal = async ( selector : string , options : any = { } ) : Promise < ElementHandle | null > => {
735
+ try {
736
+ if ( isXPathSelector ( selector ) ) {
737
+ // Use XPath locator
738
+ const locator = page . locator ( `xpath=${ selector } ` ) ;
739
+ await locator . waitFor ( {
740
+ state : 'attached' ,
741
+ timeout : options . timeout || 10000
742
+ } ) ;
743
+ return await locator . elementHandle ( ) ;
744
+ } else {
745
+ // Use CSS selector
746
+ return await page . waitForSelector ( selector , {
747
+ state : 'attached' ,
748
+ timeout : options . timeout || 10000
749
+ } ) ;
750
+ }
751
+ } catch ( error ) {
752
+ return null ;
753
+ }
754
+ } ;
755
+
694
756
// Enhanced button finder with retry mechanism
695
- const findWorkingButton = async ( selectors : string [ ] ) : Promise < {
696
- button : ElementHandle | null ,
757
+ const findWorkingButton = async ( selectors : string [ ] ) : Promise < {
758
+ button : ElementHandle | null ,
697
759
workingSelector : string | null ,
698
760
updatedSelectors : string [ ]
699
761
} > => {
700
- let updatedSelectors = [ ...selectors ] ;
701
-
762
+ const startTime = Date . now ( ) ;
763
+ const MAX_BUTTON_SEARCH_TIME = 15000 ;
764
+ let updatedSelectors = [ ...selectors ] ;
765
+
702
766
for ( let i = 0 ; i < selectors . length ; i ++ ) {
767
+ if ( Date . now ( ) - startTime > MAX_BUTTON_SEARCH_TIME ) {
768
+ debugLog ( `Button search timeout reached (${ MAX_BUTTON_SEARCH_TIME } ms), aborting` ) ;
769
+ break ;
770
+ }
703
771
const selector = selectors [ i ] ;
704
772
let retryCount = 0 ;
705
773
let selectorSuccess = false ;
706
774
707
775
while ( retryCount < MAX_RETRIES && ! selectorSuccess ) {
708
776
try {
709
- const button = await page . waitForSelector ( selector , {
710
- state : 'attached' ,
711
- timeout : 10000
712
- } ) ;
713
-
777
+ const button = await waitForSelectorUniversal ( selector , { timeout : 2000 } ) ;
778
+
714
779
if ( button ) {
715
780
debugLog ( 'Found working selector:' , selector ) ;
716
- return {
717
- button,
781
+ return {
782
+ button,
718
783
workingSelector : selector ,
719
- updatedSelectors
784
+ updatedSelectors
720
785
} ;
786
+ } else {
787
+ retryCount ++ ;
788
+ debugLog ( `Selector "${ selector } " not found: attempt ${ retryCount } /${ MAX_RETRIES } ` ) ;
789
+
790
+ if ( retryCount < MAX_RETRIES ) {
791
+ await page . waitForTimeout ( RETRY_DELAY ) ;
792
+ } else {
793
+ debugLog ( `Removing failed selector "${ selector } " after ${ MAX_RETRIES } attempts` ) ;
794
+ updatedSelectors = updatedSelectors . filter ( s => s !== selector ) ;
795
+ selectorSuccess = true ;
796
+ }
721
797
}
722
798
} catch ( error ) {
723
799
retryCount ++ ;
724
- debugLog ( `Selector "${ selector } " failed : attempt ${ retryCount } /${ MAX_RETRIES } ` ) ;
725
-
800
+ debugLog ( `Selector "${ selector } " error : attempt ${ retryCount } /${ MAX_RETRIES } - ${ error . message } ` ) ;
801
+
726
802
if ( retryCount < MAX_RETRIES ) {
727
803
await page . waitForTimeout ( RETRY_DELAY ) ;
728
804
} else {
729
805
debugLog ( `Removing failed selector "${ selector } " after ${ MAX_RETRIES } attempts` ) ;
730
806
updatedSelectors = updatedSelectors . filter ( s => s !== selector ) ;
807
+ selectorSuccess = true ;
731
808
}
732
809
}
733
810
}
@@ -1347,9 +1424,35 @@ export default class Interpreter extends EventEmitter {
1347
1424
}
1348
1425
1349
1426
private async ensureScriptsLoaded ( page : Page ) {
1350
- const isScriptLoaded = await page . evaluate ( ( ) => typeof window . scrape === 'function' && typeof window . scrapeSchema === 'function' && typeof window . scrapeList === 'function' && typeof window . scrapeListAuto === 'function' && typeof window . scrollDown === 'function' && typeof window . scrollUp === 'function' ) ;
1351
- if ( ! isScriptLoaded ) {
1352
- await page . addInitScript ( { path : path . join ( __dirname , 'browserSide' , 'scraper.js' ) } ) ;
1427
+ try {
1428
+ const evaluationPromise = page . evaluate ( ( ) =>
1429
+ typeof window . scrape === 'function' &&
1430
+ typeof window . scrapeSchema === 'function' &&
1431
+ typeof window . scrapeList === 'function' &&
1432
+ typeof window . scrapeListAuto === 'function' &&
1433
+ typeof window . scrollDown === 'function' &&
1434
+ typeof window . scrollUp === 'function'
1435
+ ) ;
1436
+
1437
+ const timeoutPromise = new Promise < boolean > ( ( _ , reject ) =>
1438
+ setTimeout ( ( ) => reject ( new Error ( 'Script check timeout' ) ) , 3000 )
1439
+ ) ;
1440
+
1441
+ const isScriptLoaded = await Promise . race ( [
1442
+ evaluationPromise ,
1443
+ timeoutPromise
1444
+ ] ) ;
1445
+
1446
+ if ( ! isScriptLoaded ) {
1447
+ await page . addInitScript ( { path : path . join ( __dirname , 'browserSide' , 'scraper.js' ) } ) ;
1448
+ }
1449
+ } catch ( error ) {
1450
+ this . log ( `Script check failed, adding script anyway: ${ error . message } ` , Level . WARN ) ;
1451
+ try {
1452
+ await page . addInitScript ( { path : path . join ( __dirname , 'browserSide' , 'scraper.js' ) } ) ;
1453
+ } catch ( scriptError ) {
1454
+ this . log ( `Failed to add script: ${ scriptError . message } ` , Level . ERROR ) ;
1455
+ }
1353
1456
}
1354
1457
}
1355
1458
0 commit comments