diff --git a/src/BEBLIDBulkExtraction.py b/src/BEBLIDBulkExtraction.py new file mode 100644 index 0000000..16e0ded --- /dev/null +++ b/src/BEBLIDBulkExtraction.py @@ -0,0 +1,33 @@ +from pathlib import Path +import tqdm + +import LFUtilities +import BEBLIDExtractor as lf +import argparse +import os + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='BEBLID bulk extraction') + parser.add_argument('src', type=str, help='src folder file containing a list of img paths') + parser.add_argument('dest', type=str, help='BEBLID dest file') + + args = parser.parse_args() + src = args.src + dest = args.dest + + with open(src, 'r') as src_file: + dataset = [] + + print('Extracting lf...') + for line in src_file: + try: + kp, des = lf.extract(line.strip()) + dataset.append((kp, des)) + except: + print("cannot process '%s'" % line) + pass + + LFUtilities.save(dataset, dest) + + print('lf extracted.') diff --git a/src/BEBLIDBulkExtractionFromFileList.py b/src/BEBLIDBulkExtractionFromFileList.py new file mode 100644 index 0000000..44f7062 --- /dev/null +++ b/src/BEBLIDBulkExtractionFromFileList.py @@ -0,0 +1,33 @@ +from pathlib import Path +import tqdm + +import LFUtilities +import BEBLIDExtractor as lf +import argparse +import os + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='BEBLID bulk extraction') + parser.add_argument('src', type=str, help='text file containing a list of img paths') + parser.add_argument('dest', type=str, help='BEBLID dest file') + + args = parser.parse_args() + src = args.src + dest = args.dest + + with open(src, 'r') as src_file: + dataset = [] + + print('Extracting lf...') + for line in src_file: + try: + kp, des = lf.extract(line.strip()) + dataset.append((kp, des)) + except: + print("cannot process '%s'" % line) + pass + + LFUtilities.save(dataset, dest) + + print('lf extracted.') diff --git a/src/BEBLIDExtractor.py b/src/BEBLIDExtractor.py new file mode 100644 index 0000000..a48d128 --- /dev/null +++ b/src/BEBLIDExtractor.py @@ -0,0 +1,19 @@ +import cv2 +from pathlib import Path +import tqdm +import pickle +import os +import LFUtilities + +import BEBLIDParameters as params + +detector = cv2.ORB_create(params.KEYPOINTS) +descriptor = cv2.xfeatures2d.BEBLID_create(0.75) + + +def extract(img_path): + img = LFUtilities.resize(params.IMG_SIZE, cv2.imread(img_path)) + kp = detector.detect(img, None) + kp, des = descriptor.compute(img, kp) + return (kp, des) + diff --git a/src/BEBLIDParameters.py b/src/BEBLIDParameters.py new file mode 100644 index 0000000..21c1a17 --- /dev/null +++ b/src/BEBLIDParameters.py @@ -0,0 +1,5 @@ +NN_MATCH_RATIO = 0.8 +MIN_GOOD_MATCHES = 12 +MIN_INLIERS = 10 +KEYPOINTS = 500 +IMG_SIZE = 500 \ No newline at end of file diff --git a/src/BEBLIDRescorer.py b/src/BEBLIDRescorer.py new file mode 100644 index 0000000..c60eaa4 --- /dev/null +++ b/src/BEBLIDRescorer.py @@ -0,0 +1,70 @@ +import cv2 +import numpy as np + +import LFUtilities +import BEBLIDParameters +import beniculturaliSettings as settings + + +class BEBLIDRescorer: + + def __init__(self): + #self.lf = LFUtilities.load(settings.DATASET_BEBLID) + #self.ids = np.loadtxt(settings.DATASET_IDS, dtype=str).tolist() + #self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) + self.bf = cv2.DescriptorMatcher_create(cv2.DescriptorMatcher_BRUTEFORCE_HAMMING) + + def rescore_by_id(self, query_id, resultset): + query_idx = self.ids.index(query_id) + query = LFUtilities.load_img_lf(settings.DATASET_BEBLID, query_id) + return self.rescore_by_img(query, resultset) + + def rescore_by_img(self, query, resultset): + max_inliers = -1 + res = [] + counter = 0 + for data_id, _ in resultset: + try: + data_el = LFUtilities.load_img_lf(settings.DATASET_LF_FOLDER, data_id) + + nn_matches = self.bf.knnMatch(query[1], data_el[1], 2) + good = [m for m, n in nn_matches if m.distance < BEBLIDParameters.NN_MATCH_RATIO * n.distance] + + if len(good) > BEBLIDParameters.MIN_GOOD_MATCHES: + src_pts = np.float32([query[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2) + dst_pts = np.float32([data_el[0][m.trainIdx].pt for m in good]).reshape(-1, 1, 2) + + M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 1.0) + matches_mask = mask.ravel().tolist() + # print(len(good)) + inliers = np.count_nonzero(matches_mask) + # print(inliers) + if (inliers >= BEBLIDParameters.MIN_INLIERS and inliers > max_inliers): + max_inliers = inliers + res.append((data_id, round(inliers/len(good), 3))) + print(f'candidate n. {counter}') + except: + print('rescore error evaluating ' + data_id) + pass + counter += 1 + + if res: + res.sort(key=lambda result: result[1], reverse=True) + return res + + def add(self, lf): + self.lf.append(lf) + + def remove(self, idx): + self.descs = np.delete(self.descs, idx, axis=0) + + def save(self, is_backup=False): + lf_save_file = settings.DATASET_LF + ids_file = settings.DATASET_IDS_LF + if lf_save_file != "None": + if is_backup: + lf_save_file += '.bak' + ids_file += '.bak' + + LFUtilities.save(lf_save_file, self.lf) + np.savetxt(ids_file, self.ids, fmt='%s') diff --git a/src/FeatureExtractor.py b/src/FeatureExtractor.py index c561ff7..6a60b85 100644 --- a/src/FeatureExtractor.py +++ b/src/FeatureExtractor.py @@ -7,4 +7,4 @@ def extract(img_path): files = {'image': ('img', open(img_path, 'rb'))} data = {'resize': 'true', 'bw': 'true'} r = requests.post(settings.feature_extractor, data=data, files=files) - return np.array(r.json()) + return np.array(r.json(), dtype='f') diff --git a/src/LFBulkExtraction.py b/src/LFBulkExtraction.py index 510d19c..34bb8f5 100644 --- a/src/LFBulkExtraction.py +++ b/src/LFBulkExtraction.py @@ -2,7 +2,7 @@ from pathlib import Path import tqdm import LFUtilities -import ORBExtractor as lf +import BEBLIDExtractor as lf import argparse import os diff --git a/src/LFBulkExtraction4File.py b/src/LFBulkExtraction4File.py new file mode 100644 index 0000000..556ecf5 --- /dev/null +++ b/src/LFBulkExtraction4File.py @@ -0,0 +1,32 @@ +from pathlib import Path +import tqdm + +import LFUtilities +import BEBLIDExtractor as lf +import argparse +import os + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='LF bulk extraction') + parser.add_argument('src', type=str, help='img src folder path') + parser.add_argument('dest', type=str, help='lf dest folder') + + args = parser.parse_args() + src = args.src + dest = args.dest + + paths = Path(src).rglob('*.*') + paths_list = list(paths) + + print('Extracting lf...') + for path in tqdm.tqdm(paths_list): + try: + kp, des = lf.extract(os.path.join(path.parent, path.name)) + filename = os.path.splitext(path.name)[0] + LFUtilities.save_img_lf(dest, filename, kp, des) + except: + print("cannot process '%s'" % path) + pass + + print('lf extracted.') diff --git a/src/LFUtilities.py b/src/LFUtilities.py index fd79ccc..0e1fffa 100644 --- a/src/LFUtilities.py +++ b/src/LFUtilities.py @@ -3,14 +3,6 @@ import numpy as np import pickle as pickle import os - -THRESHOLD = 35 -MIN_GOOD_MATCHES = 12 -MIN_INLIERS = 6 -KEYPOINTS = 128 -IMG_SIZE = 500 - - def resize(max_side, img): if img.shape[1] > img.shape[0]: r = max_side / img.shape[1] @@ -39,7 +31,7 @@ def unpickle_keypoints(array): keypoints = [] descriptors = [] for point in array: - temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1],_size=point[1], _angle=point[2], _response=point[3], _octave=point[4], _class_id=point[5]) + temp_feature = cv2.KeyPoint(x=point[0][0],y=point[0][1], size=point[1], angle=point[2], response=point[3], octave=point[4], class_id=point[5]) temp_descriptor = point[6] keypoints.append(temp_feature) descriptors.append(temp_descriptor) @@ -47,7 +39,7 @@ def unpickle_keypoints(array): def load(lf_path): - print('loading LF dataset') + print('loading LF dataset ' + lf_path) ser_dataset = pickle.load(open(lf_path, "rb")) lf_dataset = [] for item in ser_dataset: @@ -63,5 +55,22 @@ def save(lf_data, lf_path): pickle.dump(data, open(lf_path, 'wb')) +def save_img_lf(dest, id, keypoints, descriptors): + dest_folder_name = id[0:3] + filename = id + '.dat' + dest_folder_path = os.path.join(dest, dest_folder_name) + if (not os.path.exists(dest_folder_path)): + os.mkdir(dest_folder_path) + dest_path = os.path.join(dest_folder_path, filename) + data = pickle_keypoints(keypoints, descriptors) + pickle.dump(data, open(dest_path, 'wb')) +def load_img_lf(lf_path, id): + dest_folder_name = id[0:3] + filename = id + '.dat' + dest_folder_path = os.path.join(lf_path, dest_folder_name) + dest_path = os.path.join(dest_folder_path, filename) + data = pickle.load(open(dest_path, "rb")) + kp, desc = unpickle_keypoints(data) + return (kp, desc) diff --git a/src/ORBExtractor.py b/src/ORBExtractor.py deleted file mode 100644 index 636b2ce..0000000 --- a/src/ORBExtractor.py +++ /dev/null @@ -1,16 +0,0 @@ -import cv2 -from pathlib import Path -import tqdm -import pickle -import os - -import LFUtilities as lf - -orb = cv2.ORB.create(lf.KEYPOINTS) - - -def extract(img_path): - img = lf.resize(lf.IMG_SIZE, cv2.imread(img_path)) - kp, des = orb.detectAndCompute(img, mask=None) - return (kp, des) - diff --git a/src/Searcher.py b/src/Searcher.py index 7c1ce3b..c323d04 100644 --- a/src/Searcher.py +++ b/src/Searcher.py @@ -4,21 +4,21 @@ import pickle as pickle import LFUtilities import beniculturaliSettings as settings -from BeniCulturaliRescorer import BeniCulturaliRescorer +from BEBLIDRescorer import BEBLIDRescorer from FAISSSearchEngine import FAISSSearchEngine import FeatureExtractor as fe -import ORBExtractor as lf +import BEBLIDExtractor as lf class Searcher: - K_REORDERING = 15 + K_REORDERING = 1000 def __init__(self): # self.dataset = h5py.File(settings.dataset_file, 'r')['rmac'][...] # np.save('/media/Data/data/beni_culturali/deploy/dataset', self.dataset) self.search_engine = FAISSSearchEngine() - #self.rescorer = BeniCulturaliRescorer() + self.rescorer = BEBLIDRescorer() def get_id(self, idx): return self.search_engine.get_id(idx) @@ -46,9 +46,9 @@ class Searcher: if rescorer: kq = self.K_REORDERING res = self.search_engine.search_by_id(query_id, kq) - # if rescorer: - # res_lf = self.rescorer.rescore_by_id(query_id, res) - # res = res_lf if res_lf else res[:k] + if rescorer: + res_lf = self.rescorer.rescore_by_id(query_id, res) + res = res_lf if res_lf else res[:k] return res def search_by_img(self, query_img, k=10, rescorer=False): @@ -57,10 +57,10 @@ class Searcher: kq = self.K_REORDERING query_desc = fe.extract(query_img) res = self.search_engine.search_by_img(query_desc, kq) - #if rescorer: - # query_lf = lf.extract(query_img) - # res_lf = self.rescorer.rescore_by_img(query_lf, res) - # res = res_lf if res_lf else res[:k] + if rescorer: + query_lf = lf.extract(query_img) + res_lf = self.rescorer.rescore_by_img(query_lf, res) + res = res_lf if res_lf else res[:k] return res def save(self, is_backup=False): diff --git a/src/beniculturali.py b/src/beniculturali.py index 0f46164..e0ec24e 100644 --- a/src/beniculturali.py +++ b/src/beniculturali.py @@ -26,7 +26,7 @@ def api_root(): print('index_with_randoms.html') random_ids = [] for i in range(0, 15): - random_ids.append(searcher.get_id(randint(0, 30))) + random_ids.append(searcher.get_id(randint(0, 600))) return render_template('index_with_randoms.html', random_ids=random_ids) diff --git a/src/beniculturaliSettings.py b/src/beniculturaliSettings.py index 81c3815..9dd4c58 100644 --- a/src/beniculturaliSettings.py +++ b/src/beniculturaliSettings.py @@ -2,7 +2,7 @@ import json import os def load_setting(conf_file): - global port, feature_extractor, k, img_folder, logs, working_folder, data_folder, DATASET, DATASET1, DATASET2, DATASET_LF, DATASET_IDS, DATASET_IDS_LF + global port, feature_extractor, k, img_folder, logs, working_folder, data_folder, DATASET, DATASET1, DATASET2, DATASET_LF_FOLDER, DATASET_IDS, DATASET_IDS_LF with open(conf_file) as settings_file: @@ -22,7 +22,7 @@ def load_setting(conf_file): DATASET = os.path.join(data_folder, 'dataset.npy') #DATASET1 = os.path.join(data_folder, 'dataset_resized.npy') #DATASET2 = os.path.join(data_folder, 'dataset_bw.npy') - DATASET_LF = os.path.join(data_folder, 'dataset_lf.dat') + DATASET_LF_FOLDER = os.path.join(data_folder, 'lf') DATASET_IDS = os.path.join(data_folder, 'dataset.ids') #DATASET_IDS_LF = os.path.join(data_folder, 'dataset_lf.ids') diff --git a/src/templates/index_with_randoms.html b/src/templates/index_with_randoms.html index 90734a9..e2afe72 100644 --- a/src/templates/index_with_randoms.html +++ b/src/templates/index_with_randoms.html @@ -31,7 +31,7 @@ - +