Skip to content

Commit

Permalink
feat: rename images
Browse files Browse the repository at this point in the history
  • Loading branch information
CatchZeng committed Mar 4, 2021
1 parent 3f29006 commit fc10c6f
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 35 deletions.
8 changes: 7 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
{
"python.linting.pylintEnabled": true,
"python.linting.enabled": true
"python.linting.enabled": true,
"cSpell.words": [
"adlt",
"imap",
"murl",
"posixpath"
]
}
70 changes: 44 additions & 26 deletions bing_images/bing.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
try:
from util import file_name, make_image_dir, download_image
from util import file_name, rename, make_image_dir, download_image
except ImportError: # Python 3
from .util import file_name, make_image_dir, download_image
from .util import file_name, rename, make_image_dir, download_image
from typing import Counter, List
from multiprocessing.pool import ThreadPool
from time import time as timer
Expand Down Expand Up @@ -82,46 +82,64 @@ def download_images(
filters: str = '',
force_replace=False
):
start = timer()
image_dir = make_image_dir(output_dir, force_replace)

urls = fetch_image_urls(query, limit, adult, file_type, filters)
index = 1
print("Save path: {}".format(image_dir))
entries = []
for url in urls:
name = file_name(url, index, query)
path = os.path.join(image_dir, name)
entries.append((url, path, index))
index += 1

start = timer()
urls = fetch_image_urls(query, limit, adult, file_type, filters)
entries = get_image_entries(urls, image_dir)

failedIndices = []
ps = pool_size
if limit < pool_size:
ps = limit
results = ThreadPool(ps).imap_unordered(
download_image_with_thread, entries)
for (index, result) in results:
if result:
print("Image #{} Downloaded".format(index))
else:
failedIndices.append(index)
download_image_entries(entries, ps)

rename_images(image_dir, query)

print("Done")
if len(failedIndices) > 0:
print("Failed Indices: {}".format(failedIndices))
elapsed = timer() - start
print("Elapsed Time: %.2fs" % elapsed)

def rename_images(dir, prefix):
files = os.listdir(dir)
index = 1
for f in files:
if f.startswith("."):
print("Escape {}".format(f))
continue
src = os.path.join(dir, f)
name = rename(f, index, prefix)
dst = os.path.join(dir, name)
os.rename(src, dst)
print("Rename {} to {}".format(src, dst))
index = index + 1

def download_image_entries(entries, pool_size):
counter = 0
results = ThreadPool(pool_size).imap_unordered(
download_image_with_thread, entries)
for (url, result, path) in results:
if result:
print("#{} {} Downloaded {}".format(counter, url, path))
counter = counter + 1

def get_image_entries(urls, dir):
entries = []
i = 0
for url in urls:
name = file_name(url, i, "#tmp#")
path = os.path.join(dir, name)
entries.append((url, path))
i = i + 1
return entries

def download_image_with_thread(entry):
url, path, index = entry
print("Downloading image #{} from {}".format(index, url))
url, path = entry
print("Downloading image from {}".format(url))
result = download_image(url, path)
return (index, result)
return (url, result, path)


if __name__ == '__main__':
download_images("cat", 20, output_dir="/Users/catchzeng/Desktop/cat", pool_size=10,
file_type="png", force_replace=True)
file_type="jpg", force_replace=True)
33 changes: 25 additions & 8 deletions bing_images/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import urllib
import os

DEFAULT_OUTPUT_DIR = "bing-images"

def download_image(url, path) -> bool:
try:
Expand All @@ -12,28 +13,44 @@ def download_image(url, path) -> bool:
with open(path, 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
return True
return True
else:
print("[!] download image: {}\n[!] Err :: {}".format(url, r.content))
return False
except Exception as e:
print("[!] download image: {}\n[!] Err :: {}".format(url, e))
return False


def file_name(url, index, prefix='image') -> str:
try:
path = urllib.parse.urlsplit(url).path
filename = posixpath.basename(path).split('?')[0]
file_type = filename.split(".")[-1]
if file_type.lower() not in ["jpe", "jpeg", "jfif", "exif", "tiff", "gif", "bmp", "png", "webp", "jpg"]:
file_type = "jpg"
result = "{}_{}.{}".format(prefix, str(index), file_type)
type, _ = file_data(filename)
result = "{}_{}.{}".format(prefix, index, type)
return result
except Exception as e:
print("[!] Issue getting: {}\n[!] Err :: {}".format(url, e))
return prefix

def rename(name, index, prefix='image') -> str:
try:
type, _ = file_data(name)
result = "{}_{}.{}".format(prefix, index, type)
return result
except Exception as e:
print("[!] Issue getting: {}\n[!] Err :: {}".format(name, e))
return prefix

DEFAULT_OUTPUT_DIR = "bing-images"

def file_data(name):
try:
type = name.split(".")[-1]
name = name.split(".")[0]
if type.lower() not in ["jpe", "jpeg", "jfif", "exif", "tiff", "gif", "bmp", "png", "webp", "jpg"]:
type = "jpg"
return (type, name)
except Exception as e:
print("[!] Issue getting: {}\n[!] Err :: {}".format(name, e))
return (name, "jpg")

def make_image_dir(output_dir, force_replace=False) -> str:
image_dir = output_dir
Expand Down

0 comments on commit fc10c6f

Please sign in to comment.