Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions task_examples/find_company_mail/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Find Company Mail

Find a company's email by querying the web with the format `mail: {company}` and extracting emails from search results.

## Usage

```bash
python task_examples/find_company_mail/find_company_mail.py --company "Linktera"
```

Example output:

```json
{
"company": "Linktera",
"email": "info@linktera.com"
}
```

## Notes

- Uses Serper (`SERPER_API_KEY` in your `.env`) to perform the query `mail: {company}`.
- Extracts emails from result titles, snippets, and links.

14 changes: 14 additions & 0 deletions task_examples/find_company_mail/email_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import re
from typing import List, Set


EMAIL_REGEX = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")


def extract_emails_from_text(text: str) -> List[str]:
if not text:
return []
emails: Set[str] = set(re.findall(EMAIL_REGEX, text))
return sorted(emails)


103 changes: 103 additions & 0 deletions task_examples/find_company_mail/find_company_mail.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import sys
import os
import argparse
from typing import Optional
from urllib.parse import urlparse

# Allow running as a script
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))

from pydantic import BaseModel

try:
from task_examples.find_company_mail.serper_client import search_mail_query
from task_examples.find_company_mail.email_utils import extract_emails_from_text
except ImportError:
from serper_client import search_mail_query
from email_utils import extract_emails_from_text


class MailResponse(BaseModel):
company: str
email: Optional[str] = None
source: Optional[str] = None


def _normalize_to_domain(company_or_url: str) -> str:
"""Accepts a domain (e.g., linktera.com) or URL (https://linktera.com/) and returns the domain."""
text = company_or_url.strip()
if not text:
return text
has_scheme = text.startswith("http://") or text.startswith("https://")
to_parse = text if has_scheme else f"http://{text}"
parsed = urlparse(to_parse)
host = parsed.netloc or parsed.path
host = host.strip().lower()
if host.startswith("www."):
host = host[4:]
# Remove trailing slashes if any leaked into host
host = host.split("/")[0]
return host


def find_company_mail(company: str) -> MailResponse:
domain = _normalize_to_domain(company)
query = f"mail: {domain}" if domain else f"mail: {company}"

try:
data = search_mail_query(query)
except Exception as e:
return MailResponse(company=company, email=None, source=None)

# Try to extract from organic results: titles, snippets, links
candidates = []
sources = []
for item in data.get("organic", []):
page_link = item.get("link")
for field in ("title", "snippet"):
val = item.get(field)
if val:
emails = extract_emails_from_text(val)
if emails:
candidates.extend(emails)
sources.extend([page_link] * len(emails))
link = item.get("link")
if link:
emails = extract_emails_from_text(link)
if emails:
candidates.extend(emails)
sources.extend([link] * len(emails))

# De-duplicate while preserving order
seen = set()
unique_candidates = []
unique_sources = []
for idx, c in enumerate(candidates):
if c not in seen:
seen.add(c)
unique_candidates.append(c)
# align the corresponding source if available
src = sources[idx] if idx < len(sources) else None
unique_sources.append(src)

# Prefer emails that match the provided domain
if domain:
for idx, email in enumerate(unique_candidates):
email_l = email.lower()
if email_l.endswith("@" + domain) or email_l.endswith("@www." + domain):
return MailResponse(company=company, email=email, source=unique_sources[idx])

if unique_candidates:
return MailResponse(company=company, email=unique_candidates[0], source=unique_sources[0])
return MailResponse(company=company, email=None, source=None)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Find a company's email via web search")
parser.add_argument("--company", required=True, help="Company name")
args = parser.parse_args()

result = find_company_mail(args.company)
print(result.model_dump_json(indent=2))


19 changes: 19 additions & 0 deletions task_examples/find_company_mail/serper_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import os
import requests
from dotenv import load_dotenv

load_dotenv()

SERPER_API_KEY = os.getenv("SERPER_API_KEY")
SERPER_URL = "https://google.serper.dev/search"


def search_mail_query(query: str) -> dict:
if not SERPER_API_KEY:
raise ValueError("Missing SERPER_API_KEY in .env")
headers = {"X-API-KEY": SERPER_API_KEY, "Content-Type": "application/json"}
resp = requests.post(SERPER_URL, headers=headers, json={"q": query})
resp.raise_for_status()
return resp.json()