Skip to content

Commit

Permalink
use a separate flask server for searching similar images using faiss
Browse files Browse the repository at this point in the history
  • Loading branch information
Hooram Nam committed Mar 27, 2019
1 parent e40bc6c commit 26a4b61
Show file tree
Hide file tree
Showing 21 changed files with 427 additions and 267 deletions.
116 changes: 59 additions & 57 deletions api/autoalbum.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,88 +12,83 @@

import ipdb

# from api.flags import \
# is_auto_albums_being_processed, \
# is_photos_being_added, \
# set_auto_album_processing_flag_on, \
# set_auto_album_processing_flag_off
from django_rq import job

from tqdm import tqdm
import rq
from api.util import logger
import pytz

@job
def regenerate_event_titles(user):
job_id = rq.get_current_job().id

# def is_auto_albums_being_processed():
# global FLAG_IS_AUTO_ALBUMS_BEING_PROCESSED
# return {"status":FLAG_IS_AUTO_ALBUMS_BEING_PROCESSED}

# # check if there are auto albums being generated right now
# if AlbumAuto.objects.count() > 0:
# last_album_auto_created_on = AlbumAuto.objects.order_by('-created_on')[0].created_on
# now = datetime.utcnow().replace(tzinfo=last_album_auto_created_on.tzinfo)
# td = (now-last_album_auto_created_on).total_seconds()
# if abs(td) < 10:
# status = True
# else:
# status = False
# else:
# status = False
# return {"status":status}

# go through all photos
if LongRunningJob.objects.filter(job_id=job_id).exists():
lrj = LongRunningJob.objects.get(job_id=job_id)
lrj.started_at = datetime.now().replace(tzinfo=pytz.utc)
lrj.save()
else:
lrj = LongRunningJob.objects.create(
started_by=user,
job_id=job_id,
queued_at=datetime.now().replace(tzinfo=pytz.utc),
started_at=datetime.now().replace(tzinfo=pytz.utc),
job_type=LongRunningJob.JOB_GENERATE_AUTO_ALBUM_TITLES)
lrj.save()


@job
def regenerate_event_titles(user):
lrj = LongRunningJob(
started_by=user,
job_id=rq.get_current_job().id,
started_at=datetime.now(),
job_type=LongRunningJob.JOB_GENERATE_AUTO_ALBUM_TITLES)
lrj.save()

try:

aus = AlbumAuto.objects.filter(owner=user).prefetch_related('photos')
for au in tqdm(aus):
target_count = len(aus)
for idx,au in enumerate(aus):
logger.info('job {}: {}'.format(job_id,idx))
au._autotitle()
au.save()

lrj.result = {
'progress': {
"current": idx + 1,
"target": target_count
}
}
lrj.save()

status = True
message = 'success'
res = {'status': status, 'message': message}

lrj = LongRunningJob.objects.get(job_id=rq.get_current_job().id)
lrj.finished = True
lrj.finished_at = datetime.now()
lrj.result = res
lrj.finished_at = datetime.now().replace(tzinfo=pytz.utc)
lrj.save()
logger.info('job {}: updated lrj entry to db'.format(job_id))

except:
status = False
res = {'status': status, 'message': 'failed'}

lrj = LongRunningJob.objects.get(job_id=rq.get_current_job().id)
lrj.failed = True
lrj.finished = True
lrj.finished_at = datetime.now()
lrj.finished_at = datetime.now().replace(tzinfo=pytz.utc)
lrj.save()

return True
return 1


@job
def generate_event_albums(user):
job_id = rq.get_current_job().id
lrj = LongRunningJob(
started_by=user,
job_id=job_id,
started_at=datetime.now(),
job_type=LongRunningJob.JOB_GENERATE_AUTO_ALBUMS)
lrj.save()

if LongRunningJob.objects.filter(job_id=job_id).exists():
lrj = LongRunningJob.objects.get(job_id=job_id)
lrj.started_at = datetime.now().replace(tzinfo=pytz.utc)
lrj.save()
else:
lrj = LongRunningJob.objects.create(
started_by=user,
job_id=job_id,
queued_at=datetime.now().replace(tzinfo=pytz.utc),
started_at=datetime.now().replace(tzinfo=pytz.utc),
job_type=LongRunningJob.JOB_GENERATE_AUTO_ALBUMS)
lrj.save()


try:
Expand All @@ -109,7 +104,7 @@ def group(photos_with_timestamp, dt=timedelta(hours=6)):
photos_with_timestamp = sorted(
photos_with_timestamp, key=lambda x: x[0])
groups = []
for photo in photos_with_timestamp:
for idx,photo in enumerate(photos_with_timestamp):
if len(groups) == 0:
groups.append([])
groups[-1].append(photo[1])
Expand All @@ -119,14 +114,18 @@ def group(photos_with_timestamp, dt=timedelta(hours=6)):
else:
groups.append([])
groups[-1].append(photo[1])
logger.info('job {}: {}'.format(job_id,idx))
return groups

groups = group(photos_with_timestamp, dt=timedelta(days=1, hours=12))
logger.info('job {}: made groups'.format(job_id))

album_locations = []

target_count = len(groups)

date_format = "%Y:%m:%d %H:%M:%S"
for group in groups:
for idx, group in enumerate(groups):
key = group[0].exif_timestamp
logger.info('job {}: processing auto album with date: '.format(job_id) + key.strftime(date_format))
items = group
Expand All @@ -153,24 +152,27 @@ def group(photos_with_timestamp, dt=timedelta(hours=6)):
album._autotitle()
album.save()
logger.info('job {}: generated auto album {}'.format(job_id,album.id))

lrj.result = {
'progress': {
"current": idx + 1,
"target": target_count
}
}
lrj.save()

status = True
message = 'success'
res = {'status': status, 'message': message}

lrj = LongRunningJob.objects.get(job_id=rq.get_current_job().id)
lrj.finished = True
lrj.finished_at = datetime.now()
lrj.result = res
lrj.finished_at = datetime.now().replace(tzinfo=pytz.utc)
lrj.save()

except:
status = False
res = {'status': status, 'message': 'failed'}

lrj = LongRunningJob.objects.get(job_id=rq.get_current_job().id)
lrj.failed = True
lrj.finished = True
lrj.finished_at = datetime.now()
lrj.finished_at = datetime.now().replace(tzinfo=pytz.utc)
lrj.save()

return 1
27 changes: 18 additions & 9 deletions api/directory_watcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
from config import image_dirs

import api.util as util
from api.image_similarity import build_image_similarity_index

import ipdb
from django_rq import job
import time
import numpy as np
import rq

from api.vector_bank import im2vec_bank

from django.db.models import Q
import json
Expand Down Expand Up @@ -141,7 +141,6 @@ def handle_new_image(user, image_path, job_id):

start = datetime.datetime.now()
photo._im2vec()
im2vec_bank.add_photo_to_index(photo)
elapsed = (datetime.datetime.now() - start).total_seconds()
elapsed_times['im2vec'] = elapsed
# util.logger.info('im2vec took %.2f' % elapsed)
Expand All @@ -165,12 +164,22 @@ def handle_new_image(user, image_path, job_id):
@job
def scan_photos(user):
job_id = rq.get_current_job().id
lrj = LongRunningJob(
started_by=user,
job_id=rq.get_current_job().id,
started_at=datetime.datetime.now().replace(tzinfo=pytz.utc),
job_type=LongRunningJob.JOB_SCAN_PHOTOS)
lrj.save()

if LongRunningJob.objects.filter(job_id=job_id).exists():
lrj = LongRunningJob.objects.get(job_id=job_id)
lrj.started_at = datetime.datetime.now().replace(tzinfo=pytz.utc)
lrj.save()
else:
lrj = LongRunningJob.objects.create(
started_by=user,
job_id=job_id,
queued_at=datetime.datetime.now().replace(tzinfo=pytz.utc),
started_at=datetime.datetime.now().replace(tzinfo=pytz.utc),
job_type=LongRunningJob.JOB_SCAN_PHOTOS)
lrj.save()




added_photo_count = 0
already_existing_photo = 0
Expand Down Expand Up @@ -214,6 +223,7 @@ def scan_photos(user):
'''

util.logger.info("Added {} photos".format(len(image_paths_to_add)))
build_image_similarity_index(user)

lrj = LongRunningJob.objects.get(job_id=rq.get_current_job().id)
lrj.finished = True
Expand All @@ -225,7 +235,6 @@ def scan_photos(user):
lrj.save()
except Exception as e:
util.logger.error(str(e))
util.logger.error(str(traceback.format_exc()))
lrj = LongRunningJob.objects.get(job_id=rq.get_current_job().id)
lrj.finished = True
lrj.failed = True
Expand Down
42 changes: 30 additions & 12 deletions api/face_classify.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from api.models import Face
from api.models import Person
from api.models import LongRunningJob
from api.util import logger

import base64
import pickle
Expand All @@ -22,6 +23,7 @@
import seaborn as sns
from django_rq import job
import rq
import pytz

import datetime

Expand Down Expand Up @@ -68,12 +70,20 @@ def cluster_faces(user):

@job
def train_faces(user):
lrj = LongRunningJob(
started_by=user,
job_id=rq.get_current_job().id,
started_at=datetime.datetime.now(),
job_type=LongRunningJob.JOB_TRAIN_FACES)
lrj.save()
job_id = rq.get_current_job().id

if LongRunningJob.objects.filter(job_id=job_id).exists():
lrj = LongRunningJob.objects.get(job_id=job_id)
lrj.started_at = datetime.datetime.now().replace(tzinfo=pytz.utc)
lrj.save()
else:
lrj = LongRunningJob.objects.create(
started_by=user,
job_id=job_id,
queued_at=datetime.datetime.now().replace(tzinfo=pytz.utc),
started_at=datetime.datetime.now().replace(tzinfo=pytz.utc),
job_type=LongRunningJob.JOB_TRAIN_FACES)
lrj.save()

try:

Expand Down Expand Up @@ -137,30 +147,38 @@ def train_faces(user):
face_ids_unknown = [f['id'] for f in id2face_unknown.values()]
pred = clf.predict(face_encodings_unknown)
probs = np.max(clf.predict_proba(face_encodings_unknown), 1)
for face_id, person_name, probability in zip(face_ids_unknown, pred,
probs):

target_count = len(face_ids_unknown)

for idx, (face_id, person_name, probability) in enumerate(zip(face_ids_unknown, pred, probs)):
person = Person.objects.get(name=person_name)
face = Face.objects.get(id=face_id)
face.person = person
face.person_label_is_inferred = True
face.person_label_probability = probability
face.save()

lrj.result = {
'progress': {
"current": idx + 1,
"target": target_count
}
}
lrj.save()

# res = cluster_faces()
# print(res)

lrj = LongRunningJob.objects.get(job_id=rq.get_current_job().id)
lrj.finished = True
lrj.failed = False
lrj.finished_at = datetime.datetime.now()
lrj.result = {}
lrj.save()
return True

except:
except BaseException as e:
logger.error(str(e))
res = []

lrj = LongRunningJob.objects.get(job_id=rq.get_current_job().id)
lrj.failed = True
lrj.finished = True
lrj.finished_at = datetime.datetime.now()
Expand Down
46 changes: 46 additions & 0 deletions api/image_similarity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from api.models import Photo, User
from api.util import logger
import requests
import numpy as np
from ownphotos.settings import IMAGE_SIMILARITY_SERVER

def search_similar_image(user,photo):
if type(user) == int:
user_id = user
else:
user_id = user.id

image_embedding = np.array(
np.frombuffer(bytes.fromhex(photo.encoding)), dtype=np.float32)
post_data = {
"user_id":user_id,
"image_embedding":image_embedding.tolist()
}
res = requests.post(IMAGE_SIMILARITY_SERVER+'/search/',json=post_data)
if res.status_code==200:
return res.json()
else:
logger.error('error retrieving similar photos to {} belonging to user {}'.format(photo.image_hash,user.username))
return []

def build_image_similarity_index(user):
logger.info('builing similarity index for user {}'.format(user.username))
photos = Photo.objects.filter(owner=user).exclude(encoding=None).only('encoding')

image_hashes = []
image_embeddings = []

for photo in photos:
image_hashes.append(photo.image_hash)
image_embedding = np.array(
np.frombuffer(bytes.fromhex(photo.encoding)), dtype=np.float32)
image_embeddings.append(image_embedding.tolist())

post_data = {
"user_id":user.id,
"image_hashes":image_hashes,
"image_embeddings":image_embeddings
}
res = requests.post(IMAGE_SIMILARITY_SERVER+'/build/',json=post_data)
return res.json()

Empty file added api/management/__init__.py
Empty file.
Loading

0 comments on commit 26a4b61

Please sign in to comment.