Skip to content

Commit

Permalink
Fix google images selector + baidu images ads tag
Browse files Browse the repository at this point in the history
  • Loading branch information
karust committed May 11, 2024
1 parent fc49deb commit aeb0616
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 33 deletions.
32 changes: 13 additions & 19 deletions baidu/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,11 @@ type imageDataJson struct {
Height int
Width int
IsCopyright int

URL []struct {
AdType string `json:"adType"`
URL []struct {
SourcePage string `json:"FromURL"`
Original string `json:"ObjURL"`
} `json:"replaceUrl"`

// Versions []struct {
// Height int
// Width int
// ImgSourcePage string `json:"fromURL"`
// URL string `json:"objURL"`
// Type string
// } `json:"setList"`
}
}

Expand All @@ -63,18 +55,12 @@ func (baid *Baidu) GetRateLimiter() *rate.Limiter {

func (baid *Baidu) isCaptcha(page *rod.Page) bool {
_, err := page.Timeout(baid.GetSelectorTimeout()).Search("div.passMod_dialog-body")
if err != nil {
return false
}
return true
return err == nil
}

func (baid *Baidu) isTimeout(page *rod.Page) bool {
_, err := page.Timeout(baid.GetSelectorTimeout()).Search("button.timeout-button")
if err != nil {
return false
}
return true
return err == nil
}

func (baid *Baidu) Search(query core.Query) ([]core.SearchResult, error) {
Expand Down Expand Up @@ -226,7 +212,15 @@ func (baid *Baidu) SearchImage(query core.Query) ([]core.SearchResult, error) {
Rank: (searchPage * 30) + (i + 1),
URL: img.URL[0].Original,
Title: img.Title,
Description: fmt.Sprintf("%v,%v,%vx%x,copyright:%v", img.PictureDate, img.Type, img.Height, img.Width, img.IsCopyright)}
Description: fmt.Sprintf("%v,%v,%vx%x,copyright:%v", img.PictureDate, img.Type, img.Height, img.Width, img.IsCopyright),
Ad: func() bool {
if img.AdType != "0" {
return true
} else {
return false
}
}(),
}
searchResults = append(searchResults, res)
}

Expand Down
20 changes: 6 additions & 14 deletions google/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -319,21 +319,12 @@ func (gogl *Google) SearchImage(query core.Query) ([]core.SearchResult, error) {
page := gogl.Navigate(url)
defer gogl.close(page)

//// TODO: Case with cookie accept (appears with VPN)
// if page.MustInfo().URL != url {
// results, _ := page.Search("button[aria-label][jsaction]")
// if results != nil {
// //buttons, _ := results.All()
// //buttons[1].Click(proto.InputMouseButtonLeft, 1)
// }
// }

for len(searchResultsMap) < query.Limit {
page.WaitLoad()
page.Mouse.Scroll(0, 1000000, 1)
page.WaitLoad()

results, err := page.Timeout(gogl.Timeout).Search("div[data-hveid][data-ved][jsaction]")
results, err := page.Timeout(gogl.Timeout).Search("div[data-hveid][data-ved][jsaction][jsdata]")
if err != nil {
logrus.Errorf("Cannot parse search results: %s", err)
return *core.ConvertSearchResultsMap(searchResultsMap), core.ErrSearchTimeout
Expand Down Expand Up @@ -365,18 +356,19 @@ func (gogl *Google) SearchImage(query core.Query) ([]core.SearchResult, error) {
continue
}

dataID, err := r.Attribute("data-id")
dataVed, err := r.Attribute("data-ved")
if err != nil {
logrus.Error("Cannot find `data-ved` attr")
continue
}

// If already have image with this ID
if _, ok := searchResultsMap[*dataID]; ok {
if _, ok := searchResultsMap[*dataVed]; ok {
continue
}

// Get URLs
link, err := r.Element("a[tabindex][role]")
link, err := r.Element("a:not([ping])")
if err != nil {
continue
}
Expand Down Expand Up @@ -411,7 +403,7 @@ func (gogl *Google) SearchImage(query core.Query) ([]core.SearchResult, error) {
Title: title,
Description: fmt.Sprintf("Height:%v, Width:%v, Source Page: %v", imgSrc.Height, imgSrc.Width, imgSrc.PageURL),
}
searchResultsMap[*dataID] = gR
searchResultsMap[*dataVed] = gR

r.Remove()
}
Expand Down

0 comments on commit aeb0616

Please sign in to comment.