added FAISS Searcher

This commit is contained in:
Paolo Bolettieri 2022-07-08 18:19:41 +02:00
parent 647a8778ba
commit 2761ccbe95
26 changed files with 772 additions and 466 deletions

View File

@ -7,7 +7,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
git \
wget \
nano \
unzip
unzip \
sqlite3 \
libsqlite3-dev
RUN pip install numpy tornado flask-restful pillow numpy matplotlib tqdm scikit-learn h5py requests faiss-cpu==1.7.2
ADD . /workspace

2
run.sh
View File

@ -1 +1 @@
docker run --net=host -p 8190:8190 -v /media/data2/data/swoads/data:/workspace/data -it image-recognition:swoads python3 /workspace/src/beniculturali.py /workspace/data/conf/img_rec_conf.json
docker run --net=host -p 8190:8190 -v /media/data2/data/swoads/data:/workspace/data -it image-recognition:swoads python3 /workspace/src/ImageRecognitionService.py /workspace/data/conf/img_rec_conf.json

View File

@ -8,7 +8,7 @@ import LFUtilities
import BEBLIDParameters as params
detector = cv2.ORB_create(params.KEYPOINTS)
descriptor = cv2.xfeatures2d.BEBLID_create(0.75)
descriptor = cv2.xfeatures2d.BEBLID_create(0.75, 101)
def extract(img_path):

View File

@ -1,5 +1,5 @@
NN_MATCH_RATIO = 0.8
MIN_GOOD_MATCHES = 12
MIN_INLIERS = 10
KEYPOINTS = 500
MIN_GOOD_MATCHES = 22
MIN_INLIERS = 15
KEYPOINTS = 800
IMG_SIZE = 500

View File

@ -3,7 +3,7 @@ import numpy as np
import LFUtilities
import BEBLIDParameters
import beniculturaliSettings as settings
import ImageRecognitionSettings as settings
class BEBLIDRescorer:
@ -15,38 +15,44 @@ class BEBLIDRescorer:
self.bf = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING)
def rescore_by_id(self, query_id, resultset):
query_idx = self.ids.index(query_id)
query = LFUtilities.load_img_lf(settings.DATASET_BEBLID, query_id)
#query_idx = self.ids.index(query_id)
query = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, query_id)
return self.rescore_by_img(query, resultset)
def rescore_by_img(self, query, resultset):
max_inliers = -1
res = []
counter = 0
for data_id, _ in resultset:
try:
data_el = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, data_id)
if len(query[0]) > 0:
for data_id, _ in resultset:
try:
data_el = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, data_id)
nn_matches = self.bf.knnMatch(query[1], data_el[1], 2)
good = [m for m, n in nn_matches if m.distance < BEBLIDParameters.NN_MATCH_RATIO * n.distance]
if len(data_el[1]) > 0:
nn_matches = self.bf.knnMatch(query[1], data_el[1], 2)
good = [m for m, n in nn_matches if m.distance < BEBLIDParameters.NN_MATCH_RATIO * n.distance]
if len(good) > BEBLIDParameters.MIN_GOOD_MATCHES:
src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
if len(good) > BEBLIDParameters.MIN_GOOD_MATCHES:
src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 1.0)
matches_mask = mask.ravel().tolist()
# print(len(good))
inliers = np.count_nonzero(matches_mask)
# print(inliers)
if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers):
max_inliers = inliers
res.append((data_id, round(inliers/len(good), 3)))
print(f'candidate n. {counter}')
except:
print('rescore error evaluating ' + data_id)
pass
counter += 1
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 3.0)
matches_mask = mask.ravel().tolist()
# print(len(good))
inliers = np.count_nonzero(matches_mask)
# print(inliers)
if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers):
max_inliers = inliers
res.append((data_id, round(inliers/len(good), 3)))
print(data_id)
print(f'candidate n. {counter}')
#to get just the first candidate
break
except Exception as e:
print('rescore error evaluating ' + data_id)
print(e)
pass
counter += 1
if res:
res.sort(key=lambda result: result[1], reverse=True)

81
src/BEBLIDRescorerDB.py Normal file
View File

@ -0,0 +1,81 @@
import cv2
import numpy as np
import LFUtilities
import BEBLIDParameters
import ImageRecognitionSettings as settings
from LFDB import LFDB
class BEBLIDRescorerDB:
def __init__(self):
#self.lf = LFUtilities.load(settings.DATASET_BEBLID)
#self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
#self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
self.bf = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING)
self.lf_db = LFDB(settings.DB_LF)
def rescore_by_id(self, query_id, resultset):
#query_idx = self.ids.index(query_id)
query = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, query_id)
return self.rescore_by_img(query, resultset)
def rescore_by_img(self, query, resultset):
max_inliers = -1
res = []
counter = 0
if len(query[0]) > 0:
for data_id, _ in resultset:
try:
blob = self.lf_db.get(data_id)
serialized_obj = LFUtilities.deserialize_object(blob)
data_el = LFUtilities.unpickle_keypoints(serialized_obj)
if len(data_el[1]) > 0:
nn_matches = self.bf.knnMatch(query[1], data_el[1], 2)
good = [m for m, n in nn_matches if m.distance < BEBLIDParameters.NN_MATCH_RATIO * n.distance]
if len(good) > BEBLIDParameters.MIN_GOOD_MATCHES:
src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 3.0)
matches_mask = mask.ravel().tolist()
# print(len(good))
inliers = np.count_nonzero(matches_mask)
# print(inliers)
if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers):
max_inliers = inliers
res.append((data_id, round(inliers/len(good), 3)))
print(data_id)
print(f'candidate n. {counter}')
#to get just the first candidate
break
except Exception as e:
print('rescore error evaluating ' + data_id)
print(e)
pass
counter += 1
if res:
res.sort(key=lambda result: result[1], reverse=True)
return res
def add(self, lf):
self.lf.append(lf)
def remove(self, idx):
self.descs = np.delete(self.descs, idx, axis=0)
def save(self, is_backup=False):
lf_save_file = settings.DATASET_LF
ids_file = settings.DATASET_IDS_LF
if lf_save_file != "None":
if is_backup:
lf_save_file += '.bak'
ids_file += '.bak'
LFUtilities.save(lf_save_file, self.lf)
np.savetxt(ids_file, self.ids, fmt='%s')

75
src/BEBLIDRescorerGPU.py Normal file
View File

@ -0,0 +1,75 @@
import cv2
import numpy as np
import LFUtilities
import BEBLIDParameters
import ImageRecognitionSettings as settings
class BEBLIDRescorerGPU:
def __init__(self):
#self.lf = LFUtilities.load(settings.DATASET_BEBLID)
#self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
#self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
#self.bf = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING)
self.bf = cv2.cuda.DescriptorMatcher_createBFMatcher(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING)
def rescore_by_id(self, query_id, resultset):
#query_idx = self.ids.index(query_id)
query = LFUtilities.load_img_lf_GPU(settings.DATASET_LF_FOLDER, query_id)
return self.rescore_by_img(query, resultset)
def rescore_by_img(self, query, resultset):
max_inliers = -1
res = []
counter = 0
for data_id, _ in resultset:
try:
data_el = LFUtilities.load_img_lf_GPU(settings.DATASET_LF_FOLDER, data_id)
nn_matches = self.bf.knnMatch(query[1], data_el[1], 2)
good = [m for m, n in nn_matches if m.distance < BEBLIDParameters.NN_MATCH_RATIO * n.distance]
if len(good) > BEBLIDParameters.MIN_GOOD_MATCHES:
src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 1.0)
matches_mask = mask.ravel().tolist()
# print(len(good))
inliers = np.count_nonzero(matches_mask)
# print(inliers)
if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers):
max_inliers = inliers
res.append((data_id, round(inliers/len(good), 3)))
print(data_id)
print(f'candidate n. {counter}')
#to get just the first candidate
break
except Exception as e:
print('rescore error evaluating ' + data_id)
print(e)
pass
counter += 1
if res:
res.sort(key=lambda result: result[1], reverse=True)
return res
def add(self, lf):
self.lf.append(lf)
def remove(self, idx):
self.descs = np.delete(self.descs, idx, axis=0)
def save(self, is_backup=False):
lf_save_file = settings.DATASET_LF
ids_file = settings.DATASET_IDS_LF
if lf_save_file != "None":
if is_backup:
lf_save_file += '.bak'
ids_file += '.bak'
LFUtilities.save(lf_save_file, self.lf)
np.savetxt(ids_file, self.ids, fmt='%s')

76
src/BEBLIDRescorerV2.py Normal file
View File

@ -0,0 +1,76 @@
import cv2
import numpy as np
import LFUtilities
import BEBLIDParameters
import ImageRecognitionSettings as settings
class BEBLIDRescorer:
def __init__(self):
#self.lf = LFUtilities.load(settings.DATASET_BEBLID)
#self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
#self.bf = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING)
def rescore_by_id(self, query_id, resultset):
#query_idx = self.ids.index(query_id)
query = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, query_id)
return self.rescore_by_img(query, resultset)
def rescore_by_img(self, query, resultset):
max_inliers = -1
res = []
counter = 0
if len(query[0]) > 0:
for data_id, _ in resultset:
try:
data_el = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, data_id)
if len(data_el[1]) > 0:
nn_matches = self.bf.knnMatch(query[1], data_el[1], 2)
good = [m for m, n in nn_matches if m.distance < BEBLIDParameters.NN_MATCH_RATIO * n.distance]
if len(good) > BEBLIDParameters.MIN_GOOD_MATCHES:
src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
matches_mask = mask.ravel().tolist()
# print(len(good))
inliers = np.count_nonzero(matches_mask)
# print(inliers)
if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers):
max_inliers = inliers
res.append((data_id, round(inliers/len(good), 3)))
print(data_id)
print(f'candidate n. {counter}')
#to get just the first candidate
break
except Exception as e:
print('rescore error evaluating ' + data_id)
print(e)
pass
counter += 1
if res:
res.sort(key=lambda result: result[1], reverse=True)
return res
def add(self, lf):
self.lf.append(lf)
def remove(self, idx):
self.descs = np.delete(self.descs, idx, axis=0)
def save(self, is_backup=False):
lf_save_file = settings.DATASET_LF
ids_file = settings.DATASET_IDS_LF
if lf_save_file != "None":
if is_backup:
lf_save_file += '.bak'
ids_file += '.bak'
LFUtilities.save(lf_save_file, self.lf)
np.savetxt(ids_file, self.ids, fmt='%s')

View File

@ -1,66 +0,0 @@
import cv2
import numpy as np
import LFUtilities
import beniculturaliSettings as settings
class BeniCulturaliRescorer:
def __init__(self):
self.lf = LFUtilities.load(settings.DATASET_LF)
self.ids = np.loadtxt(settings.DATASET_IDS_LF, dtype=str).tolist()
self.orb = cv2.ORB_create()
self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
def rescore_by_id(self, query_id, resultset):
query_idx = self.ids.index(query_id)
return self.rescore_by_img(self.lf[query_idx], resultset)
def rescore_by_img(self, query, resultset):
max_inliers = -1
res = []
for data_id, _ in resultset:
data_idx = self.ids.index(data_id)
try:
data_el = self.lf[data_idx]
matches = self.bf.match(query[1], data_el[1])
good = [m for m in matches if m.distance <= LFUtilities.THRESHOLD]
if len(good) > LFUtilities.MIN_GOOD_MATCHES:
src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 1.0)
matches_mask = mask.ravel().tolist()
# print(len(good))
inliers = np.count_nonzero(matches_mask)
# print(inliers)
if (inliers >= LFUtilities.MIN_INLIERS and inliers > max_inliers):
max_inliers = inliers
res.append((data_id, inliers))
except:
print('rescore error evaluating ' + data_id)
pass
if res:
res.sort(key=lambda result: result[1], reverse=True)
return res
def add(self, lf):
self.lf.append(lf)
def remove(self, idx):
self.descs = np.delete(self.descs, idx, axis=0)
def save(self, is_backup=False):
lf_save_file = settings.DATASET_LF
ids_file = settings.DATASET_IDS_LF
if lf_save_file != "None":
if is_backup:
lf_save_file += '.bak'
ids_file += '.bak'
LFUtilities.save(lf_save_file, self.lf)
np.savetxt(ids_file, self.ids, fmt='%s')

View File

@ -1,60 +0,0 @@
import numpy as np
import beniculturaliSettings as settings
class BeniCulturaliSearchEngine:
def __init__(self):
#self.dataset = h5py.File(settings.dataset_file, 'r')['rmac'][...]
#np.save('/media/Data/data/beni_culturali/deploy/dataset', self.dataset)
self.descs = np.load(settings.DATASET)
#self.desc1 = np.load(settings.DATASET1)
#self.desc2 = np.load(settings.DATASET2)
#self.descs = (self.desc1 + self.desc2) / 2
#self.descs /= np.linalg.norm(self.descs, axis=1, keepdims=True)
self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
def get_id(self, idx):
return self.ids[idx]
def add(self, desc, id):
self.ids.append(id)
self.descs = np.vstack((self.descs, desc))
self.save()
def remove(self, id):
idx = self.ids.index(id)
del self.ids[idx]
self.descs = np.delete(self.descs, idx, axis=0)
def search_by_id(self, query_id, k=10):
query_idx = self.ids.index(query_id)
return self.search_by_img(self.descs[query_idx], k)
def search_by_img(self, query, k=10):
print('----------query features-------')
print(query)
dot_product = np.dot(self.descs, query)
idx = dot_product.argsort()[::-1][:k]
res = []
for i in idx:
res.append((self.ids[i], round(float(dot_product[i]), 3)))
return res
def save(self, is_backup=False):
descs_file = settings.DATASET
ids_file = settings.DATASET_IDS
if is_backup:
descs_file += '.bak'
ids_file += '.bak'
np.save(descs_file, self.descs)
np.savetxt(ids_file, self.ids, fmt='%s')

View File

@ -1,68 +0,0 @@
import cv2
import numpy as np
import pickle as pickle
import LFUtilities
import beniculturaliSettings as settings
from BeniCulturaliRescorer import BeniCulturaliRescorer
from BeniCulturaliSearchEngine import BeniCulturaliSearchEngine
import FeatureExtractor as fe
#import ORBExtractor as lf
class BeniCulturaliSearcher:
K_REORDERING = 15
def __init__(self):
# self.dataset = h5py.File(settings.dataset_file, 'r')['rmac'][...]
# np.save('/media/Data/data/beni_culturali/deploy/dataset', self.dataset)
self.search_engine = BeniCulturaliSearchEngine()
#self.rescorer = BeniCulturaliRescorer()
def get_id(self, idx):
return self.search_engine.get_id(idx)
def add(self, img_file, id):
self.save(True)
desc = fe.extract(img_file)
#orb = lf.extract(img_file)
self.search_engine.add(desc, id)
#self.rescorer.add(orb)
self.save()
print('added ' + id)
def remove(self, id):
self.save(True)
self.search_engine.remove(id)
#self.rescorer.remove(idx)
self.save()
print('removed ' + id)
def search_by_id(self, query_id, k=10, rescorer=False):
kq = k
if rescorer:
kq = self.K_REORDERING
res = self.search_engine.search_by_id(query_id, kq)
# if rescorer:
# res_lf = self.rescorer.rescore_by_id(query_id, res)
# res = res_lf if res_lf else res[:k]
return res
def search_by_img(self, query_img, k=10, rescorer=False):
kq = k
if rescorer:
kq = self.K_REORDERING
query_desc = fe.extract(query_img)
res = self.search_engine.search_by_img(query_desc, kq)
#if rescorer:
# query_lf = lf.extract(query_img)
# res_lf = self.rescorer.rescore_by_img(query_lf, res)
# res = res_lf if res_lf else res[:k]
return res
def save(self, is_backup=False):
self.search_engine.save(is_backup)
#self.rescorer.save(is_backup)

38
src/BulkSearch.py Normal file
View File

@ -0,0 +1,38 @@
import requests
from pathlib import Path
import tqdm
import argparse
import os
IMG_REC_SERVICE = 'http://localhost:8290/bcir/'
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Img Recognition Bulk Analysis')
parser.add_argument('src', type=str, help='img src folder path')
parser.add_argument('dest', type=str, help='dest file path')
args = parser.parse_args()
src = args.src
dest = args.dest
paths = Path(src).rglob('*.*')
paths_list = list(paths)
print('Analyzing images...')
with open(dest, 'w', encoding='UTF8') as f:
for path in tqdm.tqdm(paths_list):
try:
img_file = {'image': (
'query', open(os.path.join(path.parent, path.name), 'rb'))}
r = requests.post(IMG_REC_SERVICE + 'searchByImg', files=img_file)
res = r.json()
tmp = ';'.join([str(i) for x in res for i in x])
row = path.name + ";" + tmp
f.write(row + '\n')
except Exception as e:
print("cannot process '%s'" % path)
print(e)
pass

View File

@ -1,5 +1,5 @@
import numpy as np
import beniculturaliSettings as settings
import ImageRecognitionSettings as settings
import faiss

View File

@ -1,5 +1,5 @@
import numpy as np
import beniculturaliSettings as settings
import ImageRecognitionSettings as settings
import requests

View File

@ -0,0 +1,63 @@
import requests
from pathlib import Path
import tqdm
import argparse
import os
IMG_REC_SERVICE = 'http://localhost:8290/bcir/'
groundtruth_file = '/media/ssd2/data/swoads/workdir/data/groundtruth_no_ext.txt'
precision_at = [0] * 10
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Img Recognition Bulk Analysis')
parser.add_argument('src', type=str, help='img src folder path')
#parser.add_argument('dest', type=str, help='dest file path')
args = parser.parse_args()
src = args.src
#dest = args.dest
groundtruth = {}
with open(groundtruth_file, 'r') as f:
for line in f:
line = line.rstrip() # removes trailing whitespace and '\n' chars
if "," not in line: continue # skips blanks and comments w/o =
if line.startswith("#"): continue # skips comments which contain =
k, v = line.split(",", 1)
groundtruth[k] = v
paths = Path(src).rglob('*.*')
paths_list = list(paths)
print('Analyzing images...')
for path in tqdm.tqdm(paths_list):
key = path.name
exprected_id = groundtruth[key]
print(exprected_id)
try:
img_file = {'image': (
'query', open(os.path.join(path.parent, path.name), 'rb'))}
params = {'rescorer':'true'}
r = requests.post(IMG_REC_SERVICE + 'searchByImg', data=params, files=img_file)
res = r.json()
for i in range (0, len(res)):
print(res[i][0])
if res[i][0] in exprected_id:
precision_at[i] = precision_at[i] + 1
except Exception as e:
print("cannot process '%s'" % path)
print(e)
pass
print(precision_at)

View File

@ -1,200 +1,210 @@
from flask import Flask, request, redirect, url_for, flash, render_template, send_from_directory, abort
from random import randint
import cv2
import io
import numpy as np
import json
import urllib
#from BeniCulturaliSearcher import BeniCulturaliSearcher
from Searcher import Searcher
from BeniCulturaliSearchEngine import BeniCulturaliSearchEngine
import beniculturaliSettings as settings
import uuid
import os, os.path
import tornado.wsgi
import tornado.httpserver
import argparse
app = Flask(__name__)
@app.route('/bcir/')
def api_root():
print('index_with_randoms.html')
random_ids = []
for i in range(0, 15):
random_ids.append(searcher.get_id(randint(0, 600)))
return render_template('index_with_randoms.html', random_ids=random_ids)
def url_to_file(url):
dest_file = uuid.uuid4().hex + ".png"
dest_path = settings.logs + "/" + dest_file
req = urllib.request.Request(
url,
data=None,
headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
}
)
resp = urllib.request.urlopen(req)
image = np.asarray(bytearray(resp.read()), dtype="uint8")
decoded = cv2.imdecode(image, cv2.IMREAD_COLOR)
cv2.imwrite(dest_path, decoded)
#im = Image.fromarray(image)
#im.save(dest_path)
return dest_path
def post_to_file(image):
dest_file = uuid.uuid4().hex + ".png"
dest_path = settings.logs + "/" + dest_file
image.save(dest_path)
return dest_path
def get_res(results, query_url=None):
if query_url is not None:
return render_template('search.html', results=results, query_url=query_url)
json_res = json.dumps(results)
return json_res
@app.route('/bcir/searchById')
def search_by_id():
id = request.args.get('id')
rescorer = False
if request.args.get("rescorer") == 'true':
rescorer = True
results = searcher.search_by_id(id, settings.k, rescorer)
query_url = None
if request.args.get("tohtml") is not None:
query_url = id + ".jpg"
return get_res(results, query_url)
@app.route('/bcir/searchByImg', methods=['POST'])
def search_by_img():
if 'image' not in request.files:
flash('No file part')
return redirect(request.url)
file = request.files['image']
img_file = post_to_file(file)
rescorer = False
if request.form.get("rescorer") == 'true':
rescorer = True
#dest_file = uuid.uuid4().hex + ".jpg"
#dest_path = settings.logs + "/" + dest_file
#file.save(dest_path)
#files = {'image': (dest_file, open(dest_path, 'rb'))}
#r = requests.post(settings.rmac_service, files=files)
#results = search_engine.search_by_img(np.array(r.json()), settings.k)
results = searcher.search_by_img(img_file, settings.k, rescorer)
query_url = None
if request.form.get("tohtml") is not None:
query_url = ""
return get_res(results, query_url)
@app.route('/bcir/searchByURL')
def search_by_url():
url = request.args.get('url')
rescorer = False
if request.args.get("rescorer") == 'true':
rescorer = True
img_file = url_to_file(url)
# query = cv2.imdecode(image, cv2.IMREAD_COLOR)
# dest_file = uuid.uuid4().hex + ".jpg"
# dest_path = settings.logs + "/" + dest_file
# cv2.imwrite(dest_path, query)
# files = {'image': open(dest_path, 'rb')}
# r = requests.post(settings.rmac_service, files=files)
# results = search_engine.search_by_img(np.array(r.json()), settings.k)
results = searcher.search_by_img(img_file, settings.k, rescorer)
query_url = None
if request.args.get("tohtml") is not None:
query_url = url
return get_res(results, query_url)
@app.route('/bcir/addImg', methods=['POST'])
def add_img():
if 'image' not in request.files:
flash('No file part')
return redirect(request.url)
try:
file = request.files['image']
id = request.files['image'].filename
id, _ = os.path.splitext(id)
img_file = post_to_file(file)
searcher.add(img_file, id)
json_res = json.dumps("done")
return json_res
except:
abort(500)
@app.route('/bcir/rmImg')
def remove_img():
try:
id = request.args.get('id')
searcher.remove(id)
json_res = json.dumps("done")
return json_res
except:
abort(500)
@app.route('/bcir/<path:filename>')
def download_file(filename):
print(filename)
values = filename.split('/')
print(values)
return send_from_directory(settings.img_folder, filename, as_attachment=False)
"""
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Reading configuration file')
parser.add_argument('conf', type=str, help='Configuration file path')
args = parser.parse_args()
settings.load_setting(args.conf)
global searcher
searcher = BeniCulturaliSearcher()
#app.run(host='0.0.0.0', port=8090, ssl_context='adhoc')
app.run(host='0.0.0.0', port=settings.port)
# app.run(host='0.0.0.0', port=settings.port)
"""
def start_tornado(app, port=8190):
http_server = tornado.httpserver.HTTPServer(tornado.wsgi.WSGIContainer(app))
http_server.listen(port)
app.logger.info("Tornado server starting on port {}".format(port))
tornado.ioloop.IOLoop.instance().start()
def start_from_terminal(app):
parser = argparse.ArgumentParser(description='Reading configuration file')
parser.add_argument('conf', type=str, help='Configuration file path')
args = parser.parse_args()
settings.load_setting(args.conf)
global searcher
searcher = Searcher()
#if args.debug:
# app.run(debug=True, host='0.0.0.0', port=settings.port)
# else:
#start_tornado(app, settings.port)
app.run(debug=False, host='0.0.0.0', port=settings.port)
if __name__ == '__main__':
start_from_terminal(app)
from flask import Flask, request, redirect, url_for, flash, render_template, send_from_directory, abort
from random import randint
import cv2
import io
import numpy as np
import json
import urllib
#from BeniCulturaliSearcher import BeniCulturaliSearcher
from Searcher import Searcher
import ImageRecognitionSettings as settings
import uuid
import os, os.path
import tornado.wsgi
import tornado.httpserver
import argparse
app = Flask(__name__)
@app.route('/bcir/')
def api_root():
print('index_with_randoms.html')
random_ids = []
for i in range(0, 15):
random_ids.append(searcher.get_id(randint(0, 600)))
return render_template('index_with_randoms.html', random_ids=random_ids)
def url_to_file(url):
dest_file = uuid.uuid4().hex + ".png"
dest_path = settings.logs + "/" + dest_file
req = urllib.request.Request(
url,
data=None,
headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
}
)
resp = urllib.request.urlopen(req)
image = np.asarray(bytearray(resp.read()), dtype="uint8")
decoded = cv2.imdecode(image, cv2.IMREAD_COLOR)
cv2.imwrite(dest_path, decoded)
#im = Image.fromarray(image)
#im.save(dest_path)
return dest_path
def post_to_file(image):
dest_file = uuid.uuid4().hex + ".png"
dest_path = settings.logs + "/" + dest_file
image.save(dest_path)
return dest_path
def get_res(results, query_url=None):
if query_url is not None:
return render_template('search.html', results=results, query_url=query_url)
json_res = json.dumps(results)
return json_res
@app.route('/bcir/searchById')
def search_by_id():
id = request.args.get('id')
rescorer = True
if request.args.get("rescorer") == 'false':
rescorer = False
results = searcher.search_by_id(id, settings.k, rescorer)
query_url = None
if request.args.get("tohtml") is not None:
query_url = id + ".jpg"
return get_res(results, query_url)
@app.route('/bcir/searchByImg', methods=['POST'])
def search_by_img():
if 'image' not in request.files:
flash('No file part')
return redirect(request.url)
file = request.files['image']
img_file = post_to_file(file)
rescorer = True
if request.form.get("rescorer") == 'false':
rescorer = False
#dest_file = uuid.uuid4().hex + ".jpg"
#dest_path = settings.logs + "/" + dest_file
#file.save(dest_path)
#files = {'image': (dest_file, open(dest_path, 'rb'))}
#r = requests.post(settings.rmac_service, files=files)
#results = search_engine.search_by_img(np.array(r.json()), settings.k)
results = searcher.search_by_img(img_file, settings.k, rescorer)
query_url = None
if request.form.get("tohtml") is not None:
query_url = ""
return get_res(results, query_url)
@app.route('/bcir/searchByURL')
def search_by_url():
url = request.args.get('url')
rescorer = True
if request.args.get("rescorer") == 'false':
rescorer = False
img_file = url_to_file(url)
# query = cv2.imdecode(image, cv2.IMREAD_COLOR)
# dest_file = uuid.uuid4().hex + ".jpg"
# dest_path = settings.logs + "/" + dest_file
# cv2.imwrite(dest_path, query)
# files = {'image': open(dest_path, 'rb')}
# r = requests.post(settings.rmac_service, files=files)
# results = search_engine.search_by_img(np.array(r.json()), settings.k)
results = searcher.search_by_img(img_file, settings.k, rescorer)
query_url = None
if request.args.get("tohtml") is not None:
query_url = url
return get_res(results, query_url)
@app.route('/bcir/addImg', methods=['POST'])
def add_img():
if 'image' not in request.files:
flash('No file part')
return redirect(request.url)
try:
file = request.files['image']
id = request.files['image'].filename
id, _ = os.path.splitext(id)
img_file = post_to_file(file)
searcher.add(img_file, id)
json_res = json.dumps("done")
return json_res
except:
abort(500)
@app.route('/bcir/rmImg')
def remove_img():
try:
id = request.args.get('id')
searcher.remove(id)
json_res = json.dumps("done")
return json_res
except:
abort(500)
@app.route('/bcir/<path:filename>')
def download_file(filename):
print(filename)
values = filename.split('/')
print(values)
return send_from_directory(settings.img_folder, filename, as_attachment=False)
@app.route('/bcir/queries/<path:filename>')
def queries(filename):
print(filename)
values = filename.split('/')
folder = values[0]
name = values[1]
print(folder)
print(name)
return send_from_directory(settings.working_folder + '/' + folder, name, as_attachment=False)
"""
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Reading configuration file')
parser.add_argument('conf', type=str, help='Configuration file path')
args = parser.parse_args()
settings.load_setting(args.conf)
global searcher
searcher = BeniCulturaliSearcher()
#app.run(host='0.0.0.0', port=8090, ssl_context='adhoc')
app.run(host='0.0.0.0', port=settings.port)
# app.run(host='0.0.0.0', port=settings.port)
"""
def start_tornado(app, port=8190):
http_server = tornado.httpserver.HTTPServer(tornado.wsgi.WSGIContainer(app))
http_server.listen(port)
app.logger.info("Tornado server starting on port {}".format(port))
tornado.ioloop.IOLoop.instance().start()
def start_from_terminal(app):
parser = argparse.ArgumentParser(description='Reading configuration file')
parser.add_argument('conf', type=str, help='Configuration file path')
args = parser.parse_args()
settings.load_setting(args.conf)
global searcher
searcher = Searcher()
#if args.debug:
# app.run(debug=True, host='0.0.0.0', port=settings.port)
# else:
#start_tornado(app, settings.port)
app.run(debug=False, host='0.0.0.0', port=settings.port)
if __name__ == '__main__':
start_from_terminal(app)

View File

@ -1,35 +1,33 @@
import json
import os
def load_setting(conf_file):
global port, feature_extractor, k, img_folder, logs, working_folder, data_folder, DATASET, DATASET1, DATASET2, DATASET_LF_FOLDER, DATASET_IDS, DATASET_IDS_LF
with open(conf_file) as settings_file:
settings = json.load(settings_file)
port = settings['port']
feature_extractor = settings['fe_service']
k = settings['k']
working_folder = settings['working_folder']
data_folder = os.path.join(working_folder, settings['data_folder'])
if not os.path.isdir(data_folder):
os.mkdir(data_folder)
DATASET = os.path.join(data_folder, 'dataset.npy')
#DATASET1 = os.path.join(data_folder, 'dataset_resized.npy')
#DATASET2 = os.path.join(data_folder, 'dataset_bw.npy')
DATASET_LF_FOLDER = os.path.join(data_folder, 'lf')
DATASET_IDS = os.path.join(data_folder, 'dataset.ids')
#DATASET_IDS_LF = os.path.join(data_folder, 'dataset_lf.ids')
img_folder = settings['img_folder']
logs = os.path.join(working_folder, settings['log_folder'])
if not os.path.isdir(logs):
os.mkdir(logs)
import json
import os
def load_setting(conf_file):
global port, feature_extractor, k, img_folder, logs, working_folder, data_folder, DATASET, DATASET_LF_FOLDER, DATASET_IDS, DB_LF
with open(conf_file) as settings_file:
settings = json.load(settings_file)
port = settings['port']
feature_extractor = settings['fe_service']
k = settings['k']
working_folder = settings['working_folder']
data_folder = os.path.join(working_folder, settings['data_folder'])
if not os.path.isdir(data_folder):
os.mkdir(data_folder)
DATASET = os.path.join(data_folder, 'dataset.npy')
DATASET_LF_FOLDER = os.path.join(data_folder, 'lf')
DATASET_IDS = os.path.join(data_folder, 'dataset.ids')
DB_LF = os.path.join(data_folder, 'sqlite_lf/lf.db')
img_folder = settings['img_folder']
logs = os.path.join(working_folder, settings['log_folder'])
if not os.path.isdir(logs):
os.mkdir(logs)

View File

@ -0,0 +1,40 @@
from pathlib import Path
import tqdm
import LFUtilities
import BEBLIDExtractor as lf
import argparse
import os
from LFDB import LFDB
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='LF bulk extraction')
parser.add_argument('src', type=str, help='img src folder path')
parser.add_argument('dest', type=str, help='LF DB file')
args = parser.parse_args()
src = args.src
dest = args.dest
lf_db = LFDB(dest)
paths = Path(src).rglob('*.*')
paths_list = list(paths)
print('Extracting lf...')
for path in tqdm.tqdm(paths_list):
try:
kp, des = lf.extract(os.path.join(path.parent, path.name))
features = LFUtilities.pickle_keypoints(kp, des)
blob = LFUtilities.serialize_object(features)
filename = os.path.splitext(path.name)[0]
lf_db.put(filename, blob)
except Exception as e:
print("cannot process '%s'" % path)
print(e)
pass
lf_db.commit()
lf_db.close()
print('lf extracted.')

55
src/LFDB.py Normal file
View File

@ -0,0 +1,55 @@
import os
import sqlite3
from sqlite3 import Error
from werkzeug.datastructures import FileStorage
class LFDB:
def __init__(self, db_path):
# self.lf = LFUtilities.load(settings.DATASET_BEBLID)
# self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist()
# self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
self.conn = sqlite3.connect(db_path, check_same_thread=False)
def close(self):
if self.conn:
self.conn.close()
def put(self, docId, features):
try:
self.conn.text_factory = str
#print("[INFO] : Successful connection!")
cur = self.conn.cursor()
insert_file = '''INSERT INTO lf(docId, features) VALUES(?, ?)'''
cur = self.conn.cursor()
cur.execute(insert_file, (docId, features,))
#print("[INFO] : The blob for ", docId, " is in the database.")
except Error as e:
print(e)
def commit(self):
try:
if self.conn:
self.conn.commit()
print("committing...")
except Error as e:
print(e)
def get(self, docId):
try:
self.conn.text_factory = str
cur = self.conn.cursor()
# print("[INFO] : Connected to SQLite to read_blob_data")
sql_fetch_blob_query = """SELECT * from lf where docId = ?"""
cur.execute(sql_fetch_blob_query, (docId,))
record = cur.fetchall()
for row in record:
converted_file_name = row[1]
blob = row[2]
# parse out the file name from converted_file_name
cur.close()
except sqlite3.Error as error:
print("[INFO] : Failed to read blob data from sqlite table", error)
return blob

View File

@ -3,6 +3,7 @@ import numpy as np
import pickle as pickle
import os
def resize(max_side, img):
if img.shape[1] > img.shape[0]:
r = max_side / img.shape[1]
@ -27,6 +28,14 @@ def pickle_keypoints(keypoints, descriptors):
return temp_array
def serialize_object(obj):
return pickle.dumps(obj)
def deserialize_object(serialized_obj):
return pickle.loads(serialized_obj)
def unpickle_keypoints(array):
keypoints = []
descriptors = []
@ -74,3 +83,20 @@ def load_img_lf(lf_path, id):
data = pickle.load(open(dest_path, "rb"))
kp, desc = unpickle_keypoints(data)
return (kp, desc)
def load_img_lf_GPU(lf_path, id):
dest_folder_name = id[0:3]
filename = id + '.dat'
dest_folder_path = os.path.join(lf_path, dest_folder_name)
dest_path = os.path.join(dest_folder_path, filename)
data = pickle.load(open(dest_path, "rb"))
kp, desc = unpickle_keypoints(data)
data_gpu_mat = cv2.cuda_GpuMat(np.zeros((1500,), dtype=int))
if len(desc) > 0:
data_gpu_mat = cv2.cuda_GpuMat(desc)
desc = data_gpu_mat
return (kp, desc)

View File

@ -3,8 +3,9 @@ import numpy as np
import pickle as pickle
import LFUtilities
import beniculturaliSettings as settings
from BEBLIDRescorer import BEBLIDRescorer
import ImageRecognitionSettings as settings
from BEBLIDRescorerDB import BEBLIDRescorerDB
#from BEBLIDRescorerGPU import BEBLIDRescorerGPU
from FAISSSearchEngine import FAISSSearchEngine
import FeatureExtractor as fe
import BEBLIDExtractor as lf
@ -18,7 +19,7 @@ class Searcher:
# np.save('/media/Data/data/beni_culturali/deploy/dataset', self.dataset)
self.search_engine = FAISSSearchEngine()
self.rescorer = BEBLIDRescorer()
self.rescorer = BEBLIDRescorerDB()
def get_id(self, idx):
return self.search_engine.get_id(idx)

View File

@ -9,7 +9,7 @@ import urllib
from BeniCulturaliSearchEngine import BeniCulturaliSearchEngine
import beniculturaliSettings as settings
import ImageRecognitionSettings as settings
import uuid
import requests

14
src/extract_lf.sh Executable file
View File

@ -0,0 +1,14 @@
#!/bin/bash
IMG_FOLDER=/workspace/workdir
DATA_FOLDER=/workspace/workdir/data/lf
mkdir $DATA_FOLDER
#if [[ $2 = '-o' ]]; then
# echo "deleting existing features"
python3 /workspace/src/LFBulkExtraction4File.py $IMG_FOLDER/$1 $DATA_FOLDER
chmod 777 $DATA_FOLDER/*
echo "Done"

12
src/extract_lf_db.sh Executable file
View File

@ -0,0 +1,12 @@
#!/bin/bash
IMG_FOLDER=/workspace/workdir
DB_PATH=/workspace/workdir/data/sqlite_lf/lf.db
#if [[ $2 = '-o' ]]; then
# echo "deleting existing features"
python3 /workspace/src/LFBulkExtractionToDB.py $IMG_FOLDER/$1 $DB_PATH
chmod 777 $DB_PATH/*
echo "Done"

View File

@ -31,7 +31,7 @@
<td valign="top">
<input type="hidden" value="" name="" id="objId">
<input type="hidden" value="true" name="tohtml">
<input type="text" value="true" name="rescorer">
<input type="hidden" value="true" name="rescorer">
<input style="display: none;" id="urlToUpload" name="url" type="text" size="49" onclick="" onchange="document.getElementById('queryImage').value=''">
<input id="imageToUpload" name="image" type="file" size="38" onclick="" onchange="document.getElementById('queryImage').value=''">

View File

@ -36,6 +36,8 @@
<td valign="top">
<input type="hidden" value="" name="" id="objId">
<input type="hidden" value="true" name="tohtml">
<input type="hidden" value="true" name="rescorer">
<input style="display: none;" id="urlToUpload" name="url" type="text" size="49" onclick="" onchange="document.getElementById('queryImage').value=''">
<input id="imageToUpload" name="image" type="file" size="38" onclick="" onchange="document.getElementById('queryImage').value=''">