Skip to content

Commit

Permalink
exclude transparent functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
YoongiKim committed May 23, 2023
1 parent 12c868a commit cc9f51f
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 13 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Google, Naver multiprocess image crawler (High Quality & Speed & Customizable)
# Arguments
usage:
```
python3 main.py [--skip true] [--threads 4] [--google true] [--transparent false] [--naver true] [--full false] [--face false] [--no_gui auto] [--limit 0]
python3 main.py [--skip true] [--threads 4] [--google true] [--naver true] [--full false] [--face false] [--no_gui auto] [--limit 0]
```

```
Expand Down
17 changes: 5 additions & 12 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def get_face_url(code):


class AutoCrawler:
def __init__(self, skip_already_exist=True, n_threads=4, do_google=True, transparent=False, do_naver=True, download_path='download',
def __init__(self, skip_already_exist=True, n_threads=4, do_google=True, do_naver=True, download_path='download',
full_resolution=False, face=False, no_gui=False, limit=0, proxy_list=None):
"""
:param skip_already_exist: Skips keyword already downloaded before. This is needed when re-downloading.
Expand All @@ -71,7 +71,6 @@ def __init__(self, skip_already_exist=True, n_threads=4, do_google=True, transpa
self.skip = skip_already_exist
self.n_threads = n_threads
self.do_google = do_google
self.transparent = transparent
self.do_naver = do_naver
self.download_path = download_path
self.full_resolution = full_resolution
Expand Down Expand Up @@ -236,16 +235,12 @@ def download_from_site(self, keyword, site_code):
print('Collecting links... {} from {}'.format(keyword, site_name))

if site_code == Sites.GOOGLE:
if self.transparent:
add_url += '&tbs=ic:trans'
links = collect.google(keyword, add_url)

elif site_code == Sites.NAVER:
links = collect.naver(keyword, add_url)

elif site_code == Sites.GOOGLE_FULL:
if self.transparent:
add_url += '&tbs=ic:trans'
links = collect.google_full(keyword, add_url, self.limit)

elif site_code == Sites.NAVER_FULL:
Expand Down Expand Up @@ -361,7 +356,6 @@ def imbalance_check(self):
help='Skips keyword already downloaded before. This is needed when re-downloading.')
parser.add_argument('--threads', type=int, default=4, help='Number of threads to download.')
parser.add_argument('--google', type=str, default='true', help='Download from google.com (boolean)')
parser.add_argument('--transparent', type=str, default='false', help='Filter for transparent background images(for google)')
parser.add_argument('--naver', type=str, default='true', help='Download from naver.com (boolean)')
parser.add_argument('--full', type=str, default='false',
help='Download full resolution image instead of thumbnails (slow)')
Expand All @@ -370,7 +364,7 @@ def imbalance_check(self):
help='No GUI mode. Acceleration for full_resolution mode. '
'But unstable on thumbnail mode. '
'Default: "auto" - false if full=false, true if full=true')
parser.add_argument('--limit', type=int, default=100,
parser.add_argument('--limit', type=int, default=0,
help='Maximum count of images to download per site.')
parser.add_argument('--proxy-list', type=str, default='',
help='The comma separated proxy list like: "socks://127.0.0.1:1080,http://127.0.0.1:1081". '
Expand All @@ -380,7 +374,6 @@ def imbalance_check(self):
_skip = False if str(args.skip).lower() == 'false' else True
_threads = args.threads
_google = False if str(args.google).lower() == 'false' else True
_transparent = False if str(args.transparent).lower() == 'false' else True
_naver = False if str(args.naver).lower() == 'false' else True
_full = False if str(args.full).lower() == 'false' else True
_face = False if str(args.face).lower() == 'false' else True
Expand All @@ -396,10 +389,10 @@ def imbalance_check(self):
_no_gui = False

print(
'Options - skip:{}, threads:{}, google:{}, transparent:{}, naver:{}, full_resolution:{}, face:{}, no_gui:{}, limit:{}, _proxy_list:{}'
.format(_skip, _threads, _google, _transparent, _naver, _full, _face, _no_gui, _limit, _proxy_list))
'Options - skip:{}, threads:{}, google:{}, naver:{}, full_resolution:{}, face:{}, no_gui:{}, limit:{}, _proxy_list:{}'
.format(_skip, _threads, _google, _naver, _full, _face, _no_gui, _limit, _proxy_list))

crawler = AutoCrawler(skip_already_exist=_skip, n_threads=_threads,
do_google=_google, transparent=_transparent, do_naver=_naver, full_resolution=_full,
do_google=_google, do_naver=_naver, full_resolution=_full,
face=_face, no_gui=_no_gui, limit=_limit, proxy_list=_proxy_list)
crawler.do_crawling()

0 comments on commit cc9f51f

Please sign in to comment.