diff --git a/facebook_scraper/__init__.py b/facebook_scraper/__init__.py index d512b939..43fbff37 100644 --- a/facebook_scraper/__init__.py +++ b/facebook_scraper/__init__.py @@ -497,6 +497,18 @@ def handle_pagination_url(url): output_file.close() +def get_groups_by_search( + word: str, + **kwargs, +): + """Searches Facebook groups and yields ids for each result + on the first page""" + _scraper.requests_kwargs['timeout'] = kwargs.pop('timeout', DEFAULT_REQUESTS_TIMEOUT) + cookies = kwargs.pop('cookies', None) + set_cookies(cookies) + return _scraper.get_groups_by_search(word, **kwargs) + + def enable_logging(level=logging.DEBUG): handler = logging.StreamHandler() handler.setLevel(level) diff --git a/facebook_scraper/facebook_scraper.py b/facebook_scraper/facebook_scraper.py index 84299486..539b0b0b 100755 --- a/facebook_scraper/facebook_scraper.py +++ b/facebook_scraper/facebook_scraper.py @@ -1098,3 +1098,23 @@ def _generic_get_posts( if remove_source: post.pop('source', None) yield post + + def get_groups_by_search(self, word: str, **kwargs): + group_search_url = utils.urljoin(FB_MOBILE_BASE_URL, f"search/groups/?q={word}") + r = self.get(group_search_url) + for group_element in r.html.find('div[role="button"]'): + button_id = group_element.attrs["id"] + group_id = self.find_group_id(button_id, r.text) + try: + yield self.get_group_info(group_id) + except AttributeError: + continue + + + @staticmethod + def find_group_id(button_id, raw_html): + """Each group button has an id, which appears later in the script + tag followed by the group id.""" + s = raw_html[raw_html.rfind(button_id) :] + group_id = s[s.find("result_id:") :].split(",")[0].split(":")[1] + return int(group_id) \ No newline at end of file