-
Notifications
You must be signed in to change notification settings - Fork 0
/
pysift.py
426 lines (384 loc) · 24.2 KB
/
pysift.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
from numpy import all, any, array, arctan2, cos, sin, exp, dot, log, logical_and, roll, sqrt, stack, trace, unravel_index, pi, deg2rad, rad2deg, where, zeros, floor, full, nan, isnan, round, float32
from numpy.linalg import det, lstsq, norm
from cv2 import resize, GaussianBlur, subtract, KeyPoint, INTER_LINEAR, INTER_NEAREST
from functools import cmp_to_key
import logging
####################
# Global variables #
####################
logger = logging.getLogger(__name__)
float_tolerance = 1e-7
#################
# Main function #
#################
def computeKeypointsAndDescriptors(image, sigma=1.6, num_intervals=3, assumed_blur=0.5, image_border_width=5):
"""Compute SIFT keypoints and descriptors for an input image
"""
image = image.astype('float32')
base_image = generateBaseImage(image, sigma, assumed_blur)
num_octaves = computeNumberOfOctaves(base_image.shape)
gaussian_kernels = generateGaussianKernels(sigma, num_intervals)
gaussian_images = generateGaussianImages(base_image, num_octaves, gaussian_kernels)
dog_images = generateDoGImages(gaussian_images)
keypoints = findScaleSpaceExtrema(gaussian_images, dog_images, num_intervals, sigma, image_border_width)
keypoints = removeDuplicateKeypoints(keypoints)
keypoints = convertKeypointsToInputImageSize(keypoints)
descriptors = generateDescriptors(keypoints, gaussian_images)
return keypoints, descriptors
#########################
# Image pyramid related #
#########################
def generateBaseImage(image, sigma, assumed_blur):
"""Generate base image from input image by upsampling by 2 in both directions and blurring
"""
logger.debug('Generating base image...')
image = resize(image, (0, 0), fx=2, fy=2, interpolation=INTER_LINEAR)
sigma_diff = sqrt(max((sigma ** 2) - ((2 * assumed_blur) ** 2), 0.01))
return GaussianBlur(image, (0, 0), sigmaX=sigma_diff, sigmaY=sigma_diff) # the image blur is now sigma instead of assumed_blur
def computeNumberOfOctaves(image_shape):
"""Compute number of octaves in image pyramid as function of base image shape (OpenCV default)
"""
return int(round(log(min(image_shape)) / log(2) - 1))
def generateGaussianKernels(sigma, num_intervals):
"""Generate list of gaussian kernels at which to blur the input image. Default values of sigma, intervals, and octaves follow section 3 of Lowe's paper.
"""
logger.debug('Generating scales...')
num_images_per_octave = num_intervals + 3
k = 2 ** (1. / num_intervals)
gaussian_kernels = zeros(num_images_per_octave) # scale of gaussian blur necessary to go from one blur scale to the next within an octave
gaussian_kernels[0] = sigma
for image_index in range(1, num_images_per_octave):
sigma_previous = (k ** (image_index - 1)) * sigma
sigma_total = k * sigma_previous
gaussian_kernels[image_index] = sqrt(sigma_total ** 2 - sigma_previous ** 2)
return gaussian_kernels
def generateGaussianImages(image, num_octaves, gaussian_kernels):
"""Generate scale-space pyramid of Gaussian images
"""
logger.debug('Generating Gaussian images...')
gaussian_images = []
for octave_index in range(num_octaves):
gaussian_images_in_octave = []
gaussian_images_in_octave.append(image) # first image in octave already has the correct blur
for gaussian_kernel in gaussian_kernels[1:]:
image = GaussianBlur(image, (0, 0), sigmaX=gaussian_kernel, sigmaY=gaussian_kernel)
gaussian_images_in_octave.append(image)
gaussian_images.append(gaussian_images_in_octave)
octave_base = gaussian_images_in_octave[-3]
image = resize(octave_base, (int(octave_base.shape[1] / 2), int(octave_base.shape[0] / 2)), interpolation=INTER_NEAREST)
return array(gaussian_images)
def generateDoGImages(gaussian_images):
"""Generate Difference-of-Gaussians image pyramid
"""
logger.debug('Generating Difference-of-Gaussian images...')
dog_images = []
for gaussian_images_in_octave in gaussian_images:
dog_images_in_octave = []
for first_image, second_image in zip(gaussian_images_in_octave, gaussian_images_in_octave[1:]):
dog_images_in_octave.append(subtract(second_image, first_image)) # ordinary subtraction will not work because the images are unsigned integers
dog_images.append(dog_images_in_octave)
return array(dog_images)
###############################
# Scale-space extrema related #
###############################
def findScaleSpaceExtrema(gaussian_images, dog_images, num_intervals, sigma, image_border_width, contrast_threshold=0.04):
"""Find pixel positions of all scale-space extrema in the image pyramid
"""
logger.debug('Finding scale-space extrema...')
threshold = floor(0.5 * contrast_threshold / num_intervals * 255) # from OpenCV implementation
keypoints = []
for octave_index, dog_images_in_octave in enumerate(dog_images):
for image_index, (first_image, second_image, third_image) in enumerate(zip(dog_images_in_octave, dog_images_in_octave[1:], dog_images_in_octave[2:])):
# (i, j) is the center of the 3x3 array
for i in range(image_border_width, first_image.shape[0] - image_border_width):
for j in range(image_border_width, first_image.shape[1] - image_border_width):
if isPixelAnExtremum(first_image[i-1:i+2, j-1:j+2], second_image[i-1:i+2, j-1:j+2], third_image[i-1:i+2, j-1:j+2], threshold):
localization_result = localizeExtremumViaQuadraticFit(i, j, image_index + 1, octave_index, num_intervals, dog_images_in_octave, sigma, contrast_threshold, image_border_width)
if localization_result is not None:
keypoint, localized_image_index = localization_result
keypoints_with_orientations = computeKeypointsWithOrientations(keypoint, octave_index, gaussian_images[octave_index][localized_image_index])
for keypoint_with_orientation in keypoints_with_orientations:
keypoints.append(keypoint_with_orientation)
return keypoints
def isPixelAnExtremum(first_subimage, second_subimage, third_subimage, threshold):
"""Return True if the center element of the 3x3x3 input array is strictly greater than or less than all its neighbors, False otherwise
"""
center_pixel_value = second_subimage[1, 1]
if abs(center_pixel_value) > threshold:
if center_pixel_value > 0:
return all(center_pixel_value >= first_subimage) and \
all(center_pixel_value >= third_subimage) and \
all(center_pixel_value >= second_subimage[0, :]) and \
all(center_pixel_value >= second_subimage[2, :]) and \
center_pixel_value >= second_subimage[1, 0] and \
center_pixel_value >= second_subimage[1, 2]
elif center_pixel_value < 0:
return all(center_pixel_value <= first_subimage) and \
all(center_pixel_value <= third_subimage) and \
all(center_pixel_value <= second_subimage[0, :]) and \
all(center_pixel_value <= second_subimage[2, :]) and \
center_pixel_value <= second_subimage[1, 0] and \
center_pixel_value <= second_subimage[1, 2]
return False
def localizeExtremumViaQuadraticFit(i, j, image_index, octave_index, num_intervals, dog_images_in_octave, sigma, contrast_threshold, image_border_width, eigenvalue_ratio=10, num_attempts_until_convergence=5):
"""Iteratively refine pixel positions of scale-space extrema via quadratic fit around each extremum's neighbors
"""
logger.debug('Localizing scale-space extrema...')
extremum_is_outside_image = False
image_shape = dog_images_in_octave[0].shape
for attempt_index in range(num_attempts_until_convergence):
# need to convert from uint8 to float32 to compute derivatives and need to rescale pixel values to [0, 1] to apply Lowe's thresholds
first_image, second_image, third_image = dog_images_in_octave[image_index-1:image_index+2]
pixel_cube = stack([first_image[i-1:i+2, j-1:j+2],
second_image[i-1:i+2, j-1:j+2],
third_image[i-1:i+2, j-1:j+2]]).astype('float32') / 255.
gradient = computeGradientAtCenterPixel(pixel_cube)
hessian = computeHessianAtCenterPixel(pixel_cube)
extremum_update = -lstsq(hessian, gradient, rcond=None)[0]
if abs(extremum_update[0]) < 0.5 and abs(extremum_update[1]) < 0.5 and abs(extremum_update[2]) < 0.5:
break
j += int(round(extremum_update[0]))
i += int(round(extremum_update[1]))
image_index += int(round(extremum_update[2]))
# make sure the new pixel_cube will lie entirely within the image
if i < image_border_width or i >= image_shape[0] - image_border_width or j < image_border_width or j >= image_shape[1] - image_border_width or image_index < 1 or image_index > num_intervals:
extremum_is_outside_image = True
break
if extremum_is_outside_image:
logger.debug('Updated extremum moved outside of image before reaching convergence. Skipping...')
return None
if attempt_index >= num_attempts_until_convergence - 1:
logger.debug('Exceeded maximum number of attempts without reaching convergence for this extremum. Skipping...')
return None
functionValueAtUpdatedExtremum = pixel_cube[1, 1, 1] + 0.5 * dot(gradient, extremum_update)
if abs(functionValueAtUpdatedExtremum) * num_intervals >= contrast_threshold:
xy_hessian = hessian[:2, :2]
xy_hessian_trace = trace(xy_hessian)
xy_hessian_det = det(xy_hessian)
if xy_hessian_det > 0 and eigenvalue_ratio * (xy_hessian_trace ** 2) < ((eigenvalue_ratio + 1) ** 2) * xy_hessian_det:
# Contrast check passed -- construct and return OpenCV KeyPoint object
keypoint = KeyPoint()
keypoint.pt = ((j + extremum_update[0]) * (2 ** octave_index), (i + extremum_update[1]) * (2 ** octave_index))
keypoint.octave = octave_index + image_index * (2 ** 8) + int(round((extremum_update[2] + 0.5) * 255)) * (2 ** 16)
keypoint.size = sigma * (2 ** ((image_index + extremum_update[2]) / float32(num_intervals))) * (2 ** (octave_index + 1)) # octave_index + 1 because the input image was doubled
keypoint.response = abs(functionValueAtUpdatedExtremum)
return keypoint, image_index
return None
def computeGradientAtCenterPixel(pixel_array):
"""Approximate gradient at center pixel [1, 1, 1] of 3x3x3 array using central difference formula of order O(h^2), where h is the step size
"""
# With step size h, the central difference formula of order O(h^2) for f'(x) is (f(x + h) - f(x - h)) / (2 * h)
# Here h = 1, so the formula simplifies to f'(x) = (f(x + 1) - f(x - 1)) / 2
# NOTE: x corresponds to second array axis, y corresponds to first array axis, and s (scale) corresponds to third array axis
dx = 0.5 * (pixel_array[1, 1, 2] - pixel_array[1, 1, 0])
dy = 0.5 * (pixel_array[1, 2, 1] - pixel_array[1, 0, 1])
ds = 0.5 * (pixel_array[2, 1, 1] - pixel_array[0, 1, 1])
return array([dx, dy, ds])
def computeHessianAtCenterPixel(pixel_array):
"""Approximate Hessian at center pixel [1, 1, 1] of 3x3x3 array using central difference formula of order O(h^2), where h is the step size
"""
# With step size h, the central difference formula of order O(h^2) for f''(x) is (f(x + h) - 2 * f(x) + f(x - h)) / (h ^ 2)
# Here h = 1, so the formula simplifies to f''(x) = f(x + 1) - 2 * f(x) + f(x - 1)
# With step size h, the central difference formula of order O(h^2) for (d^2) f(x, y) / (dx dy) = (f(x + h, y + h) - f(x + h, y - h) - f(x - h, y + h) + f(x - h, y - h)) / (4 * h ^ 2)
# Here h = 1, so the formula simplifies to (d^2) f(x, y) / (dx dy) = (f(x + 1, y + 1) - f(x + 1, y - 1) - f(x - 1, y + 1) + f(x - 1, y - 1)) / 4
# NOTE: x corresponds to second array axis, y corresponds to first array axis, and s (scale) corresponds to third array axis
center_pixel_value = pixel_array[1, 1, 1]
dxx = pixel_array[1, 1, 2] - 2 * center_pixel_value + pixel_array[1, 1, 0]
dyy = pixel_array[1, 2, 1] - 2 * center_pixel_value + pixel_array[1, 0, 1]
dss = pixel_array[2, 1, 1] - 2 * center_pixel_value + pixel_array[0, 1, 1]
dxy = 0.25 * (pixel_array[1, 2, 2] - pixel_array[1, 2, 0] - pixel_array[1, 0, 2] + pixel_array[1, 0, 0])
dxs = 0.25 * (pixel_array[2, 1, 2] - pixel_array[2, 1, 0] - pixel_array[0, 1, 2] + pixel_array[0, 1, 0])
dys = 0.25 * (pixel_array[2, 2, 1] - pixel_array[2, 0, 1] - pixel_array[0, 2, 1] + pixel_array[0, 0, 1])
return array([[dxx, dxy, dxs],
[dxy, dyy, dys],
[dxs, dys, dss]])
#########################
# Keypoint orientations #
#########################
def computeKeypointsWithOrientations(keypoint, octave_index, gaussian_image, radius_factor=3, num_bins=36, peak_ratio=0.8, scale_factor=1.5):
"""Compute orientations for each keypoint
"""
logger.debug('Computing keypoint orientations...')
keypoints_with_orientations = []
image_shape = gaussian_image.shape
scale = scale_factor * keypoint.size / float32(2 ** (octave_index + 1)) # compare with keypoint.size computation in localizeExtremumViaQuadraticFit()
radius = int(round(radius_factor * scale))
weight_factor = -0.5 / (scale ** 2)
raw_histogram = zeros(num_bins)
smooth_histogram = zeros(num_bins)
for i in range(-radius, radius + 1):
region_y = int(round(keypoint.pt[1] / float32(2 ** octave_index))) + i
if region_y > 0 and region_y < image_shape[0] - 1:
for j in range(-radius, radius + 1):
region_x = int(round(keypoint.pt[0] / float32(2 ** octave_index))) + j
if region_x > 0 and region_x < image_shape[1] - 1:
dx = gaussian_image[region_y, region_x + 1] - gaussian_image[region_y, region_x - 1]
dy = gaussian_image[region_y - 1, region_x] - gaussian_image[region_y + 1, region_x]
gradient_magnitude = sqrt(dx * dx + dy * dy)
gradient_orientation = rad2deg(arctan2(dy, dx))
weight = exp(weight_factor * (i ** 2 + j ** 2)) # constant in front of exponential can be dropped because we will find peaks later
histogram_index = int(round(gradient_orientation * num_bins / 360.))
raw_histogram[histogram_index % num_bins] += weight * gradient_magnitude
for n in range(num_bins):
smooth_histogram[n] = (6 * raw_histogram[n] + 4 * (raw_histogram[n - 1] + raw_histogram[(n + 1) % num_bins]) + raw_histogram[n - 2] + raw_histogram[(n + 2) % num_bins]) / 16.
orientation_max = max(smooth_histogram)
orientation_peaks = where(logical_and(smooth_histogram > roll(smooth_histogram, 1), smooth_histogram > roll(smooth_histogram, -1)))[0]
for peak_index in orientation_peaks:
peak_value = smooth_histogram[peak_index]
if peak_value >= peak_ratio * orientation_max:
# Quadratic peak interpolation
# The interpolation update is given by equation (6.30) in https://ccrma.stanford.edu/~jos/sasp/Quadratic_Interpolation_Spectral_Peaks.html
left_value = smooth_histogram[(peak_index - 1) % num_bins]
right_value = smooth_histogram[(peak_index + 1) % num_bins]
interpolated_peak_index = (peak_index + 0.5 * (left_value - right_value) / (left_value - 2 * peak_value + right_value)) % num_bins
orientation = 360. - interpolated_peak_index * 360. / num_bins
if abs(orientation - 360.) < float_tolerance:
orientation = 0
new_keypoint = KeyPoint(*keypoint.pt, keypoint.size, orientation, keypoint.response, keypoint.octave)
keypoints_with_orientations.append(new_keypoint)
return keypoints_with_orientations
##############################
# Duplicate keypoint removal #
##############################
def compareKeypoints(keypoint1, keypoint2):
"""Return True if keypoint1 is less than keypoint2
"""
if keypoint1.pt[0] != keypoint2.pt[0]:
return keypoint1.pt[0] - keypoint2.pt[0]
if keypoint1.pt[1] != keypoint2.pt[1]:
return keypoint1.pt[1] - keypoint2.pt[1]
if keypoint1.size != keypoint2.size:
return keypoint2.size - keypoint1.size
if keypoint1.angle != keypoint2.angle:
return keypoint1.angle - keypoint2.angle
if keypoint1.response != keypoint2.response:
return keypoint2.response - keypoint1.response
if keypoint1.octave != keypoint2.octave:
return keypoint2.octave - keypoint1.octave
return keypoint2.class_id - keypoint1.class_id
def removeDuplicateKeypoints(keypoints):
"""Sort keypoints and remove duplicate keypoints
"""
if len(keypoints) < 2:
return keypoints
keypoints.sort(key=cmp_to_key(compareKeypoints))
unique_keypoints = [keypoints[0]]
for next_keypoint in keypoints[1:]:
last_unique_keypoint = unique_keypoints[-1]
if last_unique_keypoint.pt[0] != next_keypoint.pt[0] or \
last_unique_keypoint.pt[1] != next_keypoint.pt[1] or \
last_unique_keypoint.size != next_keypoint.size or \
last_unique_keypoint.angle != next_keypoint.angle:
unique_keypoints.append(next_keypoint)
return unique_keypoints
#############################
# Keypoint scale conversion #
#############################
def convertKeypointsToInputImageSize(keypoints):
"""Convert keypoint point, size, and octave to input image size
"""
converted_keypoints = []
for keypoint in keypoints:
keypoint.pt = tuple(0.5 * array(keypoint.pt))
keypoint.size *= 0.5
keypoint.octave = (keypoint.octave & ~255) | ((keypoint.octave - 1) & 255)
converted_keypoints.append(keypoint)
return converted_keypoints
#########################
# Descriptor generation #
#########################
def unpackOctave(keypoint):
"""Compute octave, layer, and scale from a keypoint
"""
octave = keypoint.octave & 255
layer = (keypoint.octave >> 8) & 255
if octave >= 128:
octave = octave | -128
scale = 1 / float32(1 << octave) if octave >= 0 else float32(1 << -octave)
return octave, layer, scale
def generateDescriptors(keypoints, gaussian_images, window_width=4, num_bins=8, scale_multiplier=3, descriptor_max_value=0.2):
"""Generate descriptors for each keypoint
"""
logger.debug('Generating descriptors...')
descriptors = []
for keypoint in keypoints:
octave, layer, scale = unpackOctave(keypoint)
gaussian_image = gaussian_images[octave + 1, layer]
num_rows, num_cols = gaussian_image.shape
point = round(scale * array(keypoint.pt)).astype('int')
bins_per_degree = num_bins / 360.
angle = 360. - keypoint.angle
cos_angle = cos(deg2rad(angle))
sin_angle = sin(deg2rad(angle))
weight_multiplier = -0.5 / ((0.5 * window_width) ** 2)
row_bin_list = []
col_bin_list = []
magnitude_list = []
orientation_bin_list = []
histogram_tensor = zeros((window_width + 2, window_width + 2, num_bins)) # first two dimensions are increased by 2 to account for border effects
# Descriptor window size (described by half_width) follows OpenCV convention
hist_width = scale_multiplier * 0.5 * scale * keypoint.size
half_width = int(round(hist_width * sqrt(2) * (window_width + 1) * 0.5)) # sqrt(2) corresponds to diagonal length of a pixel
half_width = int(min(half_width, sqrt(num_rows ** 2 + num_cols ** 2))) # ensure half_width lies within image
for row in range(-half_width, half_width + 1):
for col in range(-half_width, half_width + 1):
row_rot = col * sin_angle + row * cos_angle
col_rot = col * cos_angle - row * sin_angle
row_bin = (row_rot / hist_width) + 0.5 * window_width - 0.5
col_bin = (col_rot / hist_width) + 0.5 * window_width - 0.5
if row_bin > -1 and row_bin < window_width and col_bin > -1 and col_bin < window_width:
window_row = int(round(point[1] + row))
window_col = int(round(point[0] + col))
if window_row > 0 and window_row < num_rows - 1 and window_col > 0 and window_col < num_cols - 1:
dx = gaussian_image[window_row, window_col + 1] - gaussian_image[window_row, window_col - 1]
dy = gaussian_image[window_row - 1, window_col] - gaussian_image[window_row + 1, window_col]
gradient_magnitude = sqrt(dx * dx + dy * dy)
gradient_orientation = rad2deg(arctan2(dy, dx)) % 360
weight = exp(weight_multiplier * ((row_rot / hist_width) ** 2 + (col_rot / hist_width) ** 2))
row_bin_list.append(row_bin)
col_bin_list.append(col_bin)
magnitude_list.append(weight * gradient_magnitude)
orientation_bin_list.append((gradient_orientation - angle) * bins_per_degree)
for row_bin, col_bin, magnitude, orientation_bin in zip(row_bin_list, col_bin_list, magnitude_list, orientation_bin_list):
# Smoothing via trilinear interpolation
# Notations follows https://en.wikipedia.org/wiki/Trilinear_interpolation
# Note that we are really doing the inverse of trilinear interpolation here (we take the center value of the cube and distribute it among its eight neighbors)
row_bin_floor, col_bin_floor, orientation_bin_floor = floor([row_bin, col_bin, orientation_bin]).astype(int)
row_fraction, col_fraction, orientation_fraction = row_bin - row_bin_floor, col_bin - col_bin_floor, orientation_bin - orientation_bin_floor
if orientation_bin_floor < 0:
orientation_bin_floor += num_bins
if orientation_bin_floor >= num_bins:
orientation_bin_floor -= num_bins
c1 = magnitude * row_fraction
c0 = magnitude * (1 - row_fraction)
c11 = c1 * col_fraction
c10 = c1 * (1 - col_fraction)
c01 = c0 * col_fraction
c00 = c0 * (1 - col_fraction)
c111 = c11 * orientation_fraction
c110 = c11 * (1 - orientation_fraction)
c101 = c10 * orientation_fraction
c100 = c10 * (1 - orientation_fraction)
c011 = c01 * orientation_fraction
c010 = c01 * (1 - orientation_fraction)
c001 = c00 * orientation_fraction
c000 = c00 * (1 - orientation_fraction)
histogram_tensor[row_bin_floor + 1, col_bin_floor + 1, orientation_bin_floor] += c000
histogram_tensor[row_bin_floor + 1, col_bin_floor + 1, (orientation_bin_floor + 1) % num_bins] += c001
histogram_tensor[row_bin_floor + 1, col_bin_floor + 2, orientation_bin_floor] += c010
histogram_tensor[row_bin_floor + 1, col_bin_floor + 2, (orientation_bin_floor + 1) % num_bins] += c011
histogram_tensor[row_bin_floor + 2, col_bin_floor + 1, orientation_bin_floor] += c100
histogram_tensor[row_bin_floor + 2, col_bin_floor + 1, (orientation_bin_floor + 1) % num_bins] += c101
histogram_tensor[row_bin_floor + 2, col_bin_floor + 2, orientation_bin_floor] += c110
histogram_tensor[row_bin_floor + 2, col_bin_floor + 2, (orientation_bin_floor + 1) % num_bins] += c111
descriptor_vector = histogram_tensor[1:-1, 1:-1, :].flatten() # Remove histogram borders
# Threshold and normalize descriptor_vector
threshold = norm(descriptor_vector) * descriptor_max_value
descriptor_vector[descriptor_vector > threshold] = threshold
descriptor_vector /= max(norm(descriptor_vector), float_tolerance)
# Multiply by 512, round, and saturate between 0 and 255 to convert from float32 to unsigned char (OpenCV convention)
descriptor_vector = round(512 * descriptor_vector)
descriptor_vector[descriptor_vector < 0] = 0
descriptor_vector[descriptor_vector > 255] = 255
descriptors.append(descriptor_vector)
return array(descriptors, dtype='float32')