diff --git a/colorize3_poisson.py b/colorize3_poisson.py index e4f32af..4cf1737 100644 --- a/colorize3_poisson.py +++ b/colorize3_poisson.py @@ -1,52 +1,55 @@ import cv2 as cv -import numpy as np -import matplotlib.pyplot as plt +import numpy as np +import matplotlib.pyplot as plt import scipy.interpolate as si -import scipy.ndimage as scim +import scipy.ndimage as scim import scipy.ndimage.interpolation as sii import os import os.path as osp -#import cPickle as cp +# import cPickle as cp import _pickle as cp -#import Image +# import Image from PIL import Image from poisson_reconstruct import blit_images import pickle + def sample_weighted(p_dict): ps = p_dict.keys() - return ps[np.random.choice(len(ps),p=p_dict.values())] + return ps[np.random.choice(len(ps), p=p_dict.values())] + class Layer(object): - def __init__(self,alpha,color): + def __init__(self, alpha, color): # alpha for the whole image: - assert alpha.ndim==2 + assert alpha.ndim == 2 self.alpha = alpha - [n,m] = alpha.shape[:2] + [n, m] = alpha.shape[:2] - color=np.atleast_1d(np.array(color)).astype('uint8') + color = np.atleast_1d(np.array(color)).astype('uint8') # color for the image: - if color.ndim==1: # constant color for whole layer + if color.ndim == 1: # constant color for whole layer ncol = color.size - if ncol == 1 : #grayscale layer - self.color = color * np.ones((n,m,3),'uint8') - if ncol == 3 : - self.color = np.ones((n,m,3),'uint8') * color[None,None,:] - elif color.ndim==2: # grayscale image - self.color = np.repeat(color[:,:,None],repeats=3,axis=2).copy().astype('uint8') - elif color.ndim==3: #rgb image + if ncol == 1: # grayscale layer + self.color = color * np.ones((n, m, 3), 'uint8') + if ncol == 3: + self.color = np.ones((n, m, 3), 'uint8') * color[None, None, :] + elif color.ndim == 2: # grayscale image + self.color = np.repeat(color[:, :, None], repeats=3, axis=2).copy().astype('uint8') + elif color.ndim == 3: # rgb image self.color = color.copy().astype('uint8') else: - print (color.shape) + print(color.shape) raise Exception("color datatype not understood") + class FontColor(object): def __init__(self, col_file): - with open(col_file,'rb') as f: - #self.colorsRGB = cp.load(f) + with open(col_file, 'rb') as f: + # self.colorsRGB = cp.load(f) u = pickle._Unpickler(f) u.encoding = 'latin1' p = u.load() @@ -55,9 +58,8 @@ def __init__(self, col_file): # convert color-means from RGB to LAB for better nearest neighbour # computations: - self.colorsLAB = np.r_[self.colorsRGB[:,0:3], self.colorsRGB[:,6:9]].astype('uint8') - self.colorsLAB = np.squeeze(cv.cvtColor(self.colorsLAB[None,:,:],cv.COLOR_RGB2Lab)) - + self.colorsLAB = np.r_[self.colorsRGB[:, 0:3], self.colorsRGB[:, 6:9]].astype('uint8') + self.colorsLAB = np.squeeze(cv.cvtColor(self.colorsLAB[None, :, :], cv.COLOR_RGB2Lab)) def sample_normal(self, col_mean, col_std): """ @@ -76,19 +78,19 @@ def sample_from_data(self, bg_mat): """ bg_orig = bg_mat.copy() bg_mat = cv.cvtColor(bg_mat, cv.COLOR_RGB2Lab) - bg_mat = np.reshape(bg_mat, (np.prod(bg_mat.shape[:2]),3)) - bg_mean = np.mean(bg_mat,axis=0) + bg_mat = np.reshape(bg_mat, (np.prod(bg_mat.shape[:2]), 3)) + bg_mean = np.mean(bg_mat, axis=0) - norms = np.linalg.norm(self.colorsLAB-bg_mean[None,:], axis=1) + norms = np.linalg.norm(self.colorsLAB - bg_mean[None, :], axis=1) # choose a random color amongst the top 3 closest matches: - #nn = np.random.choice(np.argsort(norms)[:3]) + # nn = np.random.choice(np.argsort(norms)[:3]) nn = np.argmin(norms) ## nearest neighbour color: - data_col = self.colorsRGB[np.mod(nn,self.ncol),:] + data_col = self.colorsRGB[np.mod(nn, self.ncol), :] - col1 = self.sample_normal(data_col[:3],data_col[3:6]) - col2 = self.sample_normal(data_col[6:9],data_col[9:12]) + col1 = self.sample_normal(data_col[:3], data_col[3:6]) + col2 = self.sample_normal(data_col[6:9], data_col[9:12]) if nn < self.ncol: return (col2, col1) @@ -98,9 +100,9 @@ def sample_from_data(self, bg_mat): def mean_color(self, arr): col = cv.cvtColor(arr, cv.COLOR_RGB2HSV) - col = np.reshape(col, (np.prod(col.shape[:2]),3)) - col = np.mean(col,axis=0).astype('uint8') - return np.squeeze(cv.cvtColor(col[None,None,:],cv.COLOR_HSV2RGB)) + col = np.reshape(col, (np.prod(col.shape[:2]), 3)) + col = np.mean(col, axis=0).astype('uint8') + return np.squeeze(cv.cvtColor(col[None, None, :], cv.COLOR_HSV2RGB)) def invert(self, rgb): rgb = 127 + rgb @@ -110,9 +112,9 @@ def complement(self, rgb_color): """ return a color which is complementary to the RGB_COLOR. """ - col_hsv = np.squeeze(cv.cvtColor(rgb_color[None,None,:], cv.COLOR_RGB2HSV)) - col_hsv[0] = col_hsv[0] + 128 #uint8 mods to 255 - col_comp = np.squeeze(cv.cvtColor(col_hsv[None,None,:],cv.COLOR_HSV2RGB)) + col_hsv = np.squeeze(cv.cvtColor(rgb_color[None, None, :], cv.COLOR_RGB2HSV)) + col_hsv[0] = col_hsv[0] + 128 # uint8 mods to 255 + col_comp = np.squeeze(cv.cvtColor(col_hsv[None, None, :], cv.COLOR_HSV2RGB)) return col_comp def triangle_color(self, col1, col2): @@ -120,43 +122,42 @@ def triangle_color(self, col1, col2): Returns a color which is "opposite" to both col1 and col2. """ col1, col2 = np.array(col1), np.array(col2) - col1 = np.squeeze(cv.cvtColor(col1[None,None,:], cv.COLOR_RGB2HSV)) - col2 = np.squeeze(cv.cvtColor(col2[None,None,:], cv.COLOR_RGB2HSV)) + col1 = np.squeeze(cv.cvtColor(col1[None, None, :], cv.COLOR_RGB2HSV)) + col2 = np.squeeze(cv.cvtColor(col2[None, None, :], cv.COLOR_RGB2HSV)) h1, h2 = col1[0], col2[0] - if h2 < h1 : h1,h2 = h2,h1 #swap - dh = h2-h1 - if dh < 127: dh = 255-dh - col1[0] = h1 + dh/2 - return np.squeeze(cv.cvtColor(col1[None,None,:],cv.COLOR_HSV2RGB)) + if h2 < h1: h1, h2 = h2, h1 # swap + dh = h2 - h1 + if dh < 127: dh = 255 - dh + col1[0] = h1 + dh / 2 + return np.squeeze(cv.cvtColor(col1[None, None, :], cv.COLOR_HSV2RGB)) def change_value(self, col_rgb, v_std=50): - col = np.squeeze(cv.cvtColor(col_rgb[None,None,:], cv.COLOR_RGB2HSV)) + col = np.squeeze(cv.cvtColor(col_rgb[None, None, :], cv.COLOR_RGB2HSV)) x = col[2] - vs = np.linspace(0,1) - ps = np.abs(vs - x/255.0) + vs = np.linspace(0, 1) + ps = np.abs(vs - x / 255.0) ps /= np.sum(ps) - v_rand = np.clip(np.random.choice(vs,p=ps) + 0.1*np.random.randn(),0,1) - col[2] = 255*v_rand - return np.squeeze(cv.cvtColor(col[None,None,:],cv.COLOR_HSV2RGB)) + v_rand = np.clip(np.random.choice(vs, p=ps) + 0.1 * np.random.randn(), 0, 1) + col[2] = 255 * v_rand + return np.squeeze(cv.cvtColor(col[None, None, :], cv.COLOR_HSV2RGB)) class Colorize(object): - def __init__(self, model_dir='data'):#, im_path): + def __init__(self, model_dir='data'): # , im_path): # # get a list of background-images: # imlist = [osp.join(im_path,f) for f in os.listdir(im_path)] # self.bg_list = [p for p in imlist if osp.isfile(p)] - self.font_color = FontColor(col_file=osp.join(model_dir,'models/colors_new.cp')) + self.font_color = FontColor(col_file=osp.join(model_dir, 'models/colors_new.cp')) # probabilities of different text-effects: - self.p_bevel = 0.05 # add bevel effect to text - self.p_outline = 0.05 # just keep the outline of the text + self.p_bevel = 0.05 # add bevel effect to text + self.p_outline = 0.05 # just keep the outline of the text self.p_drop_shadow = 0.15 self.p_border = 0.15 - self.p_displacement = 0.30 # add background-based bump-mapping - self.p_texture = 0.0 # use an image for coloring text - + self.p_displacement = 0.30 # add background-based bump-mapping + self.p_texture = 0.0 # use an image for coloring text def drop_shadow(self, alpha, theta, shift, size, op=0.80): """ @@ -169,12 +170,12 @@ def drop_shadow(self, alpha, theta, shift, size, op=0.80): @return : alpha of the shadow layer (it is assumed that the color is black/white) """ - if size%2==0: + if size % 2 == 0: size -= 1 - size = max(1,size) - shadow = cv.GaussianBlur(alpha,(size,size),0) - [dx,dy] = shift * np.array([-np.sin(theta), np.cos(theta)]) - shadow = op*sii.shift(shadow, shift=[dx,dy],mode='constant',cval=0) + size = max(1, size) + shadow = cv.GaussianBlur(alpha, (size, size), 0) + [dx, dy] = shift * np.array([-np.sin(theta), np.cos(theta)]) + shadow = op * sii.shift(shadow, shift=[dx, dy], mode='constant', cval=0) return shadow.astype('uint8') def border(self, alpha, size, kernel_type='RECT'): @@ -185,38 +186,38 @@ def border(self, alpha, size, kernel_type='RECT'): @return : alpha layer of the border (color to be added externally). """ - kdict = {'RECT':cv.MORPH_RECT, 'ELLIPSE':cv.MORPH_ELLIPSE, - 'CROSS':cv.MORPH_CROSS} - kernel = cv.getStructuringElement(kdict[kernel_type],(size,size)) - border = cv.dilate(alpha,kernel,iterations=1) # - alpha + kdict = {'RECT': cv.MORPH_RECT, 'ELLIPSE': cv.MORPH_ELLIPSE, + 'CROSS': cv.MORPH_CROSS} + kernel = cv.getStructuringElement(kdict[kernel_type], (size, size)) + border = cv.dilate(alpha, kernel, iterations=1) # - alpha return border - def blend(self,cf,cb,mode='normal'): + def blend(self, cf, cb, mode='normal'): return cf - def merge_two(self,fore,back,blend_type=None): + def merge_two(self, fore, back, blend_type=None): """ merge two FOREground and BACKground layers. ref: https://en.wikipedia.org/wiki/Alpha_compositing ref: Chapter 7 (pg. 440 and pg. 444): http://partners.adobe.com/public/developer/en/pdf/PDFReference.pdf """ - a_f = fore.alpha/255.0 - a_b = back.alpha/255.0 + a_f = fore.alpha / 255.0 + a_b = back.alpha / 255.0 c_f = fore.color c_b = back.color - a_r = a_f + a_b - a_f*a_b + a_r = a_f + a_b - a_f * a_b if blend_type != None: c_blend = self.blend(c_f, c_b, blend_type) - c_r = ( ((1-a_f)*a_b)[:,:,None] * c_b - + ((1-a_b)*a_f)[:,:,None] * c_f - + (a_f*a_b)[:,:,None] * c_blend ) + c_r = (((1 - a_f) * a_b)[:, :, None] * c_b + + ((1 - a_b) * a_f)[:, :, None] * c_f + + (a_f * a_b)[:, :, None] * c_blend) else: - c_r = ( ((1-a_f)*a_b)[:,:,None] * c_b - + a_f[:,:,None]*c_f ) + c_r = (((1 - a_f) * a_b)[:, :, None] * c_b + + a_f[:, :, None] * c_f) - return Layer((255*a_r).astype('uint8'), c_r.astype('uint8')) + return Layer((255 * a_r).astype('uint8'), c_r.astype('uint8')) def merge_down(self, layers, blends=None): """ @@ -229,20 +230,20 @@ def merge_down(self, layers, blends=None): """ nlayers = len(layers) if nlayers > 1: - [n,m] = layers[0].alpha.shape[:2] + [n, m] = layers[0].alpha.shape[:2] out_layer = layers[-1] - for i in range(-2,-nlayers-1,-1): - blend=None + for i in range(-2, -nlayers - 1, -1): + blend = None if blends is not None: - blend = blends[i+1] - out_layer = self.merge_two(fore=layers[i], back=out_layer,blend_type=blend) + blend = blends[i + 1] + out_layer = self.merge_two(fore=layers[i], back=out_layer, blend_type=blend) return out_layer else: return layers[0] def resize_im(self, im, osize): return np.array(Image.fromarray(im).resize(osize[::-1], Image.BICUBIC)) - + def occlude(self): """ somehow add occlusion to text. @@ -259,38 +260,39 @@ def color_border(self, col_text, col_bg): choice = np.random.choice(3) col_text = cv.cvtColor(col_text, cv.COLOR_RGB2HSV) - col_text = np.reshape(col_text, (np.prod(col_text.shape[:2]),3)) - col_text = np.mean(col_text,axis=0).astype('uint8') + col_text = np.reshape(col_text, (np.prod(col_text.shape[:2]), 3)) + col_text = np.mean(col_text, axis=0).astype('uint8') + + vs = np.linspace(0, 1) - vs = np.linspace(0,1) def get_sample(x): - ps = np.abs(vs - x/255.0) + ps = np.abs(vs - x / 255.0) ps /= np.sum(ps) - v_rand = np.clip(np.random.choice(vs,p=ps) + 0.1*np.random.randn(),0,1) - return 255*v_rand + v_rand = np.clip(np.random.choice(vs, p=ps) + 0.1 * np.random.randn(), 0, 1) + return 255 * v_rand # first choose a color, then inc/dec its VALUE: - if choice==0: + if choice == 0: # increase/decrease saturation: - col_text[0] = get_sample(col_text[0]) # saturation - col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB)) - elif choice==1: + col_text[0] = get_sample(col_text[0]) # saturation + col_text = np.squeeze(cv.cvtColor(col_text[None, None, :], cv.COLOR_HSV2RGB)) + elif choice == 1: # get the complementary color to text: - col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB)) + col_text = np.squeeze(cv.cvtColor(col_text[None, None, :], cv.COLOR_HSV2RGB)) col_text = self.font_color.complement(col_text) else: # choose a mid-way color: col_bg = cv.cvtColor(col_bg, cv.COLOR_RGB2HSV) - col_bg = np.reshape(col_bg, (np.prod(col_bg.shape[:2]),3)) - col_bg = np.mean(col_bg,axis=0).astype('uint8') - col_bg = np.squeeze(cv.cvtColor(col_bg[None,None,:],cv.COLOR_HSV2RGB)) - col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB)) - col_text = self.font_color.triangle_color(col_text,col_bg) + col_bg = np.reshape(col_bg, (np.prod(col_bg.shape[:2]), 3)) + col_bg = np.mean(col_bg, axis=0).astype('uint8') + col_bg = np.squeeze(cv.cvtColor(col_bg[None, None, :], cv.COLOR_HSV2RGB)) + col_text = np.squeeze(cv.cvtColor(col_text[None, None, :], cv.COLOR_HSV2RGB)) + col_text = self.font_color.triangle_color(col_text, col_bg) # now change the VALUE channel: - col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_RGB2HSV)) - col_text[2] = get_sample(col_text[2]) # value - return np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB)) + col_text = np.squeeze(cv.cvtColor(col_text[None, None, :], cv.COLOR_RGB2HSV)) + col_text[2] = get_sample(col_text[2]) # value + return np.squeeze(cv.cvtColor(col_text[None, None, :], cv.COLOR_HSV2RGB)) def color_text(self, text_arr, h, bg_arr): """ @@ -303,11 +305,10 @@ def color_text(self, text_arr, h, bg_arr): H : minimum height of a character """ - bg_col,fg_col,i = 0,0,0 - fg_col,bg_col = self.font_color.sample_from_data(bg_arr) + bg_col, fg_col, i = 0, 0, 0 + fg_col, bg_col = self.font_color.sample_from_data(bg_arr) return Layer(alpha=text_arr, color=fg_col), fg_col, bg_col - def process(self, text_arr, bg_arr, min_h): """ text_arr : one alpha mask : nxm, uint8 @@ -318,55 +319,63 @@ def process(self, text_arr, bg_arr, min_h): """ # decide on a color for the text: l_text, fg_col, bg_col = self.color_text(text_arr, min_h, bg_arr) - bg_col = np.mean(np.mean(bg_arr,axis=0),axis=0) - l_bg = Layer(alpha=255*np.ones_like(text_arr,'uint8'),color=bg_col) + bg_col = np.mean(np.mean(bg_arr, axis=0), axis=0) + l_bg = Layer(alpha=255 * np.ones_like(text_arr, 'uint8'), color=bg_col) - l_text.alpha = l_text.alpha * np.clip(0.88 + 0.1*np.random.randn(), 0.72, 1.0) + l_text.alpha = l_text.alpha * np.clip(0.88 + 0.1 * np.random.randn(), 0.72, 1.0) layers = [l_text] blends = [] # add border: if np.random.rand() < self.p_border: - if min_h <= 15 : bsz = 1 - elif 15 < min_h < 30: bsz = 3 - else: bsz = 5 + if min_h <= 15: + bsz = 1 + elif 15 < min_h < 30: + bsz = 3 + else: + bsz = 5 border_a = self.border(l_text.alpha, size=bsz) - l_border = Layer(border_a, self.color_border(l_text.color,l_bg.color)) + l_border = Layer(border_a, self.color_border(l_text.color, l_bg.color)) layers.append(l_border) blends.append('normal') # add shadow: if np.random.rand() < self.p_drop_shadow: # shadow gaussian size: - if min_h <= 15 : bsz = 1 - elif 15 < min_h < 30: bsz = 3 - else: bsz = 5 + if min_h <= 15: + bsz = 1 + elif 15 < min_h < 30: + bsz = 3 + else: + bsz = 5 # shadow angle: - theta = np.pi/4 * np.random.choice([1,3,5,7]) + 0.5*np.random.randn() + theta = np.pi / 4 * np.random.choice([1, 3, 5, 7]) + 0.5 * np.random.randn() # shadow shift: - if min_h <= 15 : shift = 2 - elif 15 < min_h < 30: shift = 7+np.random.randn() - else: shift = 15 + 3*np.random.randn() + if min_h <= 15: + shift = 2 + elif 15 < min_h < 30: + shift = 7 + np.random.randn() + else: + shift = 15 + 3 * np.random.randn() # opacity: - op = 0.50 + 0.1*np.random.randn() + op = 0.50 + 0.1 * np.random.randn() - shadow = self.drop_shadow(l_text.alpha, theta, shift, 3*bsz, op) + shadow = self.drop_shadow(l_text.alpha, theta, shift, 3 * bsz, op) l_shadow = Layer(shadow, 0) layers.append(l_shadow) blends.append('normal') - - l_bg = Layer(alpha=255*np.ones_like(text_arr,'uint8'), color=bg_col) + l_bg = Layer(alpha=255 * np.ones_like(text_arr, 'uint8'), color=bg_col) layers.append(l_bg) blends.append('normal') - l_normal = self.merge_down(layers,blends) + l_normal = self.merge_down(layers, blends) # now do poisson image editing: - l_bg = Layer(alpha=255*np.ones_like(text_arr,'uint8'), color=bg_arr) - l_out = blit_images(l_normal.color,l_bg.color.copy()) - + l_bg = Layer(alpha=255 * np.ones_like(text_arr, 'uint8'), color=bg_arr) + l_out = blit_images(l_normal.color, l_bg.color.copy()) + # plt.subplot(1,3,1) # plt.imshow(l_normal.color) # plt.subplot(1,3,2) @@ -374,17 +383,16 @@ def process(self, text_arr, bg_arr, min_h): # plt.subplot(1,3,3) # plt.imshow(l_out) # plt.show() - + if l_out is None: # poisson recontruction produced # imperceptible text. In this case, # just do a normal blend: layers[-1] = l_bg - return self.merge_down(layers,blends).color + return self.merge_down(layers, blends).color return l_out - def check_perceptible(self, txt_mask, bg, txt_bg): """ --- DEPRECATED; USE GRADIENT CHECKING IN POISSON-RECONSTRUCT INSTEAD --- @@ -396,19 +404,19 @@ def check_perceptible(self, txt_mask, bg, txt_bg): bg (hxwx3) : original background image WITHOUT any text. txt_bg (hxwx3) : image with text. """ - bgo,txto = bg.copy(), txt_bg.copy() + bgo, txto = bg.copy(), txt_bg.copy() txt_mask = txt_mask.astype('bool') bg = cv.cvtColor(bg.copy(), cv.COLOR_RGB2Lab) txt_bg = cv.cvtColor(txt_bg.copy(), cv.COLOR_RGB2Lab) - bg_px = bg[txt_mask,:] - txt_px = txt_bg[txt_mask,:] - bg_px[:,0] *= 100.0/255.0 #rescale - L channel - txt_px[:,0] *= 100.0/255.0 + bg_px = bg[txt_mask, :] + txt_px = txt_bg[txt_mask, :] + bg_px[:, 0] *= 100.0 / 255.0 # rescale - L channel + txt_px[:, 0] *= 100.0 / 255.0 - diff = np.linalg.norm(bg_px-txt_px,ord=None,axis=1) - diff = np.percentile(diff,[10,30,50,70,90]) - print ("color diff percentile :", diff) - return diff, (bgo,txto) + diff = np.linalg.norm(bg_px - txt_px, ord=None, axis=1) + diff = np.percentile(diff, [10, 30, 50, 70, 90]) + print("color diff percentile :", diff) + return diff, (bgo, txto) def color(self, bg_arr, text_arr, hs, place_order=None, pad=20): """ @@ -422,8 +430,8 @@ def color(self, bg_arr, text_arr, hs, place_order=None, pad=20): return : nxmx3 rgb colorized text-image. """ bg_arr = bg_arr.copy() - if bg_arr.ndim == 2 or bg_arr.shape[2]==1: # grayscale image: - bg_arr = np.repeat(bg_arr[:,:,None], 3, 2) + if bg_arr.ndim == 2 or bg_arr.shape[2] == 1: # grayscale image: + bg_arr = np.repeat(bg_arr[:, :, None], 3, 2) # get the canvas size: canvas_sz = np.array(bg_arr.shape[:2]) @@ -439,26 +447,26 @@ def color(self, bg_arr, text_arr, hs, place_order=None, pad=20): loc = np.where(text_arr[i]) lx, ly = np.min(loc[0]), np.min(loc[1]) mx, my = np.max(loc[0]), np.max(loc[1]) - l = np.array([lx,ly]) - m = np.array([mx,my])-l+1 - text_patch = text_arr[i][l[0]:l[0]+m[0],l[1]:l[1]+m[1]] + l = np.array([lx, ly]) + m = np.array([mx, my]) - l + 1 + text_patch = text_arr[i][l[0]:l[0] + m[0], l[1]:l[1] + m[1]] # figure out padding: - ext = canvas_sz - (l+m) - num_pad = pad*np.ones(4,dtype='int32') + ext = canvas_sz - (l + m) + num_pad = pad * np.ones(4, dtype='int32') num_pad[:2] = np.minimum(num_pad[:2], l) num_pad[2:] = np.minimum(num_pad[2:], ext) - text_patch = np.pad(text_patch, pad_width=((num_pad[0],num_pad[2]), (num_pad[1],num_pad[3])), mode='constant') + text_patch = np.pad(text_patch, pad_width=((num_pad[0], num_pad[2]), (num_pad[1], num_pad[3])), + mode='constant') l -= num_pad[:2] - w,h = text_patch.shape - bg = bg_arr[l[0]:l[0]+w,l[1]:l[1]+h,:] + w, h = text_patch.shape + bg = bg_arr[l[0]:l[0] + w, l[1]:l[1] + h, :] rdr0 = self.process(text_patch, bg, hs[i]) rendered.append(rdr0) - bg_arr[l[0]:l[0]+w,l[1]:l[1]+h,:] = rdr0#rendered[-1] - + bg_arr[l[0]:l[0] + w, l[1]:l[1] + h, :] = rdr0 # rendered[-1] return bg_arr diff --git a/data_provider.py b/data_provider.py new file mode 100644 index 0000000..f8ae317 --- /dev/null +++ b/data_provider.py @@ -0,0 +1,99 @@ +import os +from synthgen import * +from common import * +import wget +import tarfile + + +# TODO: move these contants inside DataProvider + +# path to the data-file, containing image, depth and segmentation: +DATA_PATH = 'data' # TODO dedup +DB_FNAME = osp.join(DATA_PATH, 'dset.h5') +# url of the data (google-drive public file): +DATA_URL = 'http://www.robots.ox.ac.uk/~ankush/data.tar.gz' + + +class DateProvider(object): + + def __init__(self, root_data_dir=None): + # TODO: add option to override those 3: + path_depth = "depth.h5" + path_segmap = "seg.h5" + self.path_images = "bg_img" + self.db = None + self.depth_db = None + self.seg_db = None + self.segmap = {} + self.depth = {} + + if root_data_dir is None: + # should download default example + self.db = DateProvider.get_data() + self.segmap = self.db['seg'] + self.depth = self.db['depth'] + self.imnames = sorted(self.db['image'].keys()) + else: + # provided path to the folder with all data downloaded separately. + # see https://github.com/ankush-me/SynthText#pre-processed-background-images + self.path = root_data_dir + self.depth_db = h5py.File(osp.join(self.path, path_depth), 'r') + self.seg_db = h5py.File(osp.join(self.path, path_segmap), 'r') + self.imnames = sorted(self.depth_db.keys()) + self.segmap = self.seg_db['mask'] + self.depth = self.depth_db + + @staticmethod + def get_data(): + """ + Downloads the archive using link specified in DATA_URL. Unpacks the archive, treats it as h5 database. + The image, depth and segmentation data is downloaded. + + Returns: + the h5 database. + """ + if not osp.exists(DB_FNAME): + try: + colorprint(Color.BLUE, '\tdownloading data (56 M) from: ' + DATA_URL, bold=True) + print() + sys.stdout.flush() + out_fname = 'data.tar.gz' + wget.download(DATA_URL, out=out_fname) + tar = tarfile.open(out_fname) + tar.extractall() + tar.close() + os.remove(out_fname) + colorprint(Color.BLUE, '\n\tdata saved at:' + DB_FNAME, bold=True) + sys.stdout.flush() + except: + print(colorize(Color.RED, 'Data not found and have problems downloading.', bold=True)) + sys.stdout.flush() + sys.exit(-1) + # open the h5 file and return: + return h5py.File(DB_FNAME, 'r') + + def get_image(self, imname: str): + if self.db is None: + return Image.open(osp.join(self.path, self.path_images, imname)).convert('RGB') + else: + return Image.fromarray(self.db['image'][imname][:]) + + def get_segmap(self, imname: str): + return self.segmap[imname] + + def get_depth(self, imname: str): + if self.db is None: + return self.depth[imname][:].T[:, :, 0] + else: + return self.depth[imname][:].T[:, :, 1] + + def get_imnames(self): + return self.imnames + + def close(self): + if self.db is not None: + self.db.close() + if self.depth_db is not None: + self.depth_db.close() + if self.seg_db is not None: + self.seg_db.close() diff --git a/gen.py b/gen.py index ffaf995..d57b142 100644 --- a/gen.py +++ b/gen.py @@ -11,133 +11,174 @@ year = "2016", } """ - -import numpy as np -import h5py -import os, sys, traceback -import os.path as osp +import os from synthgen import * from common import * -import wget, tarfile +from functools import reduce +import re +from time import time +from data_provider import DateProvider -## Define some configuration variables: -NUM_IMG = -1 # no. of images to use for generation (-1 to use all available): -INSTANCE_PER_IMAGE = 1 # no. of times to use the same image -SECS_PER_IMG = 5 #max time per image in seconds +# Define some configuration variables: +NUM_IMG = 1 # number of images to use for generation (-1 to use all available): +INSTANCE_PER_IMAGE = 1 # number of times to use the same image +SECS_PER_IMG = 5 # max time per image in seconds # path to the data-file, containing image, depth and segmentation: DATA_PATH = 'data' -DB_FNAME = osp.join(DATA_PATH,'dset.h5') -# url of the data (google-drive public file): -DATA_URL = 'http://www.robots.ox.ac.uk/~ankush/data.tar.gz' OUT_FILE = 'results/SynthText.h5' -def get_data(): - """ - Download the image,depth and segmentation data: - Returns, the h5 database. - """ - if not osp.exists(DB_FNAME): - try: - colorprint(Color.BLUE,'\tdownloading data (56 M) from: '+DATA_URL,bold=True) - print() - sys.stdout.flush() - out_fname = 'data.tar.gz' - wget.download(DATA_URL,out=out_fname) - tar = tarfile.open(out_fname) - tar.extractall() - tar.close() - os.remove(out_fname) - colorprint(Color.BLUE,'\n\tdata saved at:'+DB_FNAME,bold=True) - sys.stdout.flush() - except: - print (colorize(Color.RED,'Data not found and have problems downloading.',bold=True)) - sys.stdout.flush() - sys.exit(-1) - # open the h5 file and return: - return h5py.File(DB_FNAME,'r') - - -def add_res_to_db(imgname,res,db): - """ - Add the synthetically generated text image instance - and other metadata to the dataset. - """ - ninstance = len(res) - for i in range(ninstance): - dname = "%s_%d"%(imgname, i) - db['data'].create_dataset(dname,data=res[i]['img']) - db['data'][dname].attrs['charBB'] = res[i]['charBB'] - db['data'][dname].attrs['wordBB'] = res[i]['wordBB'] - #db['data'][dname].attrs['txt'] = res[i]['txt'] - L = res[i]['txt'] - L = [n.encode("ascii", "ignore") for n in L] - db['data'][dname].attrs['txt'] = L - - -def main(viz=False): - # open databases: - print (colorize(Color.BLUE,'getting data..',bold=True)) - db = get_data() - print (colorize(Color.BLUE,'\t-> done',bold=True)) - - # open the output h5 file: - out_db = h5py.File(OUT_FILE,'w') - out_db.create_group('/data') - print (colorize(Color.GREEN,'Storing the output in: '+OUT_FILE, bold=True)) - - # get the names of the image files in the dataset: - imnames = sorted(db['image'].keys()) - N = len(imnames) - global NUM_IMG - if NUM_IMG < 0: - NUM_IMG = N - start_idx,end_idx = 0,min(NUM_IMG, N) - - RV3 = RendererV3(DATA_PATH,max_time=SECS_PER_IMG) - for i in range(start_idx,end_idx): - imname = imnames[i] - try: - # get the image: - img = Image.fromarray(db['image'][imname][:]) - # get the pre-computed depth: - # there are 2 estimates of depth (represented as 2 "channels") - # here we are using the second one (in some cases it might be - # useful to use the other one): - depth = db['depth'][imname][:].T - depth = depth[:,:,1] - # get segmentation: - seg = db['seg'][imname][:].astype('float32') - area = db['seg'][imname].attrs['area'] - label = db['seg'][imname].attrs['label'] - - # re-size uniformly: - sz = depth.shape[:2][::-1] - img = np.array(img.resize(sz,Image.ANTIALIAS)) - seg = np.array(Image.fromarray(seg).resize(sz,Image.NEAREST)) - - print (colorize(Color.RED,'%d of %d'%(i,end_idx-1), bold=True)) - res = RV3.render_text(img,depth,seg,area,label, - ninstance=INSTANCE_PER_IMAGE,viz=viz) - if len(res) > 0: - # non-empty : successful in placing text: - add_res_to_db(imname,res,out_db) - # visualize the output: - if viz: - if 'q' in input(colorize(Color.RED,'continue? (enter to continue, q to exit): ',True)): - break - except: - traceback.print_exc() - print (colorize(Color.GREEN,'>>>> CONTINUING....', bold=True)) - continue - db.close() - out_db.close() - - -if __name__=='__main__': - import argparse - parser = argparse.ArgumentParser(description='Genereate Synthetic Scene-Text Images') - parser.add_argument('--viz',action='store_true',dest='viz',default=False,help='flag for turning on visualizations') - args = parser.parse_args() - main(args.viz) +MASKS_DIR = "./masks" + + +def add_res_to_db(imgname, res, db): + """ + Add the synthetically generated text image instance + and other metadata to the dataset. + """ + ninstance = len(res) + for i in range(ninstance): + dname = "%s_%d" % (imgname, i) + db['data'].create_dataset(dname, data=res[i]['img']) + db['data'][dname].attrs['charBB'] = res[i]['charBB'] + db['data'][dname].attrs['wordBB'] = res[i]['wordBB'] + L = res[i]['txt'] + L = [n.encode("ascii", "ignore") for n in L] + db['data'][dname].attrs['txt'] = L + + +def main(viz=False, debug=False, output_masks=False, data_path=None): + """ + Entry point. + + Args: + viz: display generated images. If this flag is true, needs user input to continue with every loop iteration. + output_masks: output masks of text, which was used during generation + """ + if output_masks: + # create a directory if not exists for masks + if not os.path.exists(MASKS_DIR): + os.makedirs(MASKS_DIR) + + # open databases: + print(colorize(Color.BLUE, 'getting data..', bold=True)) + + provider = DateProvider(data_path) + + # db = DateProvider.get_data() + print(colorize(Color.BLUE, '\t-> done', bold=True)) + + # open the output h5 file: + out_db = h5py.File(OUT_FILE, 'w') + out_db.create_group('/data') + print(colorize(Color.GREEN, 'Storing the output in: ' + OUT_FILE, bold=True)) + + # get the names of the image files in the dataset: + imnames = provider.get_imnames() + N = len(imnames) + global NUM_IMG + if NUM_IMG < 0: + NUM_IMG = N + start_idx, end_idx = 0, min(NUM_IMG, N) + + renderer = RendererV3(DATA_PATH, max_time=SECS_PER_IMG) + for i in range(start_idx, end_idx): + imname = imnames[i] + + try: + # get the image: + img = provider.get_image(imname) + # get the pre-computed depth: + # there are 2 estimates of depth (represented as 2 "channels") + # here we are using the second one (in some cases it might be + # useful to use the other one): + depth = provider.get_depth(imname) + # get segmentation: + seg = provider.get_segmap(imname)[:].astype('float32') + area = provider.get_segmap(imname).attrs['area'] # number of pixels in each region + label = provider.get_segmap(imname).attrs['label'] + + # re-size uniformly: + sz = depth.shape[:2][::-1] + img = np.array(img.resize(sz, Image.ANTIALIAS)) + seg = np.array(Image.fromarray(seg).resize(sz, Image.NEAREST)) + print(colorize(Color.RED, '%d of %d' % (i, end_idx - 1), bold=True)) + + if debug: + print("\n Processing " + str(imname) + "...") + + res = renderer.render_text(img, depth, seg, area, label, + ninstance=INSTANCE_PER_IMAGE) + if len(res) > 0: + # non-empty : successful in placing text: + add_res_to_db(imname, res, out_db) + if debug: + print(" Success. " + str(len(res[0]['txt'])) + " texts placed:") + print(" Texts:" + ";".join(res[0]['txt']) + "") + ws = re.sub(' +', ' ', (" ".join(res[0]['txt']).replace("\n", " "))).strip().split(" ") + print(" Words: #" +str(len(ws)) + " " + ";".join(ws) + "") + print(" Words bounding boxes: " + str(res[0]['wordBB'].shape) + "") + else: + print(" Failure: No text placed.") + + if len(res) > 0 and output_masks: + ts = str(int(time() * 1000)) + + # executed only if --output-masks flag is set + prefix = MASKS_DIR + "/" + imname + ts + + imageio.imwrite(prefix + "_original.png", img) + imageio.imwrite(prefix + "_with_text.png", res[0]['img']) + + # merge masks together: + merged = reduce(lambda a, b: np.add(a, b), res[0]['masks']) + # since we just added values of pixels, need to bring it back to 0..255 range. + merged = np.divide(merged, len(res[0]['masks'])) + imageio.imwrite(prefix + "_mask.png", merged) + + # print bounding boxes + f = open(prefix + "_bb.txt", "w+") + bbs = res[0]['wordBB'] + boxes = np.swapaxes(bbs, 2, 0) + words = re.sub(' +', ' ', ' '.join(res[0]['txt']).replace("\n", " ")).strip().split(" ") + assert len(boxes) == len(words) + for j in range(len(boxes)): + as_strings = np.char.mod('%f', boxes[j].flatten()) + f.write(",".join(as_strings) + "," + words[j] + "\n") + f.close() + + # visualize the output: + if viz: + # executed only if --viz flag is set + for idict in res: + img_with_text = idict['img'] + viz_textbb(1, img_with_text, [idict['wordBB']], alpha=1.0) + viz_masks(2, img_with_text, seg, depth, idict['labeled_region']) + # viz_regions(rgb.copy(),xyz,seg,regions['coeff'],regions['label']) + if i < INSTANCE_PER_IMAGE - 1: + raw_input(colorize(Color.BLUE, 'continue?', True)) + if 'q' in input(colorize(Color.RED, 'continue? (enter to continue, q to exit): ', True)): + break + except: + traceback.print_exc() + print(colorize(Color.GREEN, '>>>> CONTINUING....', bold=True)) + continue + provider.close() + out_db.close() + + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser(description='Genereate Synthetic Scene-Text Images') + parser.add_argument('--viz', action='store_true', dest='viz', default=False, + help='flag for turning on visualizations') + parser.add_argument('--output-masks', action='store_true', dest='output_masks', default=False, + help='flag for turning on output of masks') + parser.add_argument('--debug', action='store_true', dest='debug', default=False, + help='flag for turning on debug output') + parser.add_argument("--data", type=str, dest='data_path', default=None, + help="absolute path to data directory containing images, segmaps and depths") + args = parser.parse_args() + main(viz=args.viz, debug=args.debug, output_masks=args.output_masks, data_path=args.data_path) diff --git a/invert_font_size.py b/invert_font_size.py index 5697467..3eb5763 100644 --- a/invert_font_size.py +++ b/invert_font_size.py @@ -21,9 +21,8 @@ FS = FontState() #plt.figure() -#plt.hold(True) for i in xrange(len(FS.fonts)): - print i + print(i) font = freetype.Font(FS.fonts[i], size=12) h = [] for y in ys: diff --git a/poisson_reconstruct.py b/poisson_reconstruct.py index 7f90899..4030136 100644 --- a/poisson_reconstruct.py +++ b/poisson_reconstruct.py @@ -212,7 +212,6 @@ def contiguous_regions(mask): with sns.axes_style("darkgrid"): plt.subplot(2,1,2) plt.plot(l_alpha,label='alpha') - plt.hold(True) plt.plot(l_poisson,label='poisson') plt.plot(l_actual,label='actual') plt.legend() @@ -227,7 +226,6 @@ def contiguous_regions(mask): with sns.axes_style("white"): plt.subplot(2,1,1) plt.imshow(im_alpha[:,:,::-1].astype('uint8')) - plt.hold(True) plt.plot([0,im_alpha_L.shape[0]-1],[i,i],'r') plt.axis('image') plt.show() diff --git a/requirements.txt b/requirements.txt index 1b855ae..7460c72 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,18 @@ cycler==0.10.0 -h5py==2.7.1 -matplotlib==2.1.1 -numpy==1.13.3 -Pillow==5.0.0 -pkg-resources==0.0.0 -pygame==1.9.3 +h5py==2.9.0 +image==1.5.27 +imageio==2.5.0 +kiwisolver==1.1.0 +matplotlib==3.0.3 +numpy==1.16.3 +opencv-python==4.1.0.25 +Pillow==6.0.0 +pygame==1.9.6 pyparsing==2.2.0 python-dateutil==2.6.1 pytz==2017.3 -scipy==1.0.0 +scipy==1.2.1 six==1.11.0 +sqlparse==0.3.0 +tornado==6.0.2 wget==3.2 diff --git a/synthgen.py b/synthgen.py index 2fda5e9..d3d67e8 100644 --- a/synthgen.py +++ b/synthgen.py @@ -10,9 +10,8 @@ import cv2 import h5py from PIL import Image -import numpy as np -#import mayavi.mlab as mym -import matplotlib.pyplot as plt +import numpy as np +import matplotlib.pyplot as plt import os.path as osp import scipy.ndimage as sim import scipy.spatial.distance as ssd @@ -21,22 +20,25 @@ from colorize3_poisson import Colorize from common import * import traceback, itertools +import imageio +DEBUG = True + class TextRegions(object): """ Get region from segmentation which are good for placing text. """ - minWidth = 30 #px - minHeight = 30 #px - minAspect = 0.3 # w > 0.3*h + minWidth = 30 # px + minHeight = 30 # px + minAspect = 0.3 # w > 0.3*h maxAspect = 7 - minArea = 100 # number of pix - pArea = 0.60 # area_obj/area_minrect >= 0.6 + minArea = 100 # number of pix + pArea = 0.60 # area_obj/area_minrect >= 0.6 # RANSAC planar fitting params: - dist_thresh = 0.10 # m + dist_thresh = 0.10 # m num_inlier = 90 ransac_fit_trials = 100 min_z_projection = 0.25 @@ -48,44 +50,44 @@ def filter_rectified(mask): """ mask : 1 where "ON", 0 where "OFF" """ - wx = np.median(np.sum(mask,axis=0)) - wy = np.median(np.sum(mask,axis=1)) - return wx>TextRegions.minW and wy>TextRegions.minW + wx = np.median(np.sum(mask, axis=0)) + wy = np.median(np.sum(mask, axis=1)) + return wx > TextRegions.minW and wy > TextRegions.minW @staticmethod - def get_hw(pt,return_rot=False): + def get_hw(pt, return_rot=False): pt = pt.copy() R = su.unrotate2d(pt) - mu = np.median(pt,axis=0) - pt = (pt-mu[None,:]).dot(R.T) + mu[None,:] - h,w = np.max(pt,axis=0) - np.min(pt,axis=0) + mu = np.median(pt, axis=0) + pt = (pt - mu[None, :]).dot(R.T) + mu[None, :] + h, w = np.max(pt, axis=0) - np.min(pt, axis=0) if return_rot: - return h,w,R - return h,w - + return h, w, R + return h, w + @staticmethod - def filter(seg,area,label): + def filter(seg, area, label): """ Apply the filter. The final list is ranked by area. """ good = label[area > TextRegions.minArea] area = area[area > TextRegions.minArea] - filt,R = [],[] - for idx,i in enumerate(good): - mask = seg==i - xs,ys = np.where(mask) - - coords = np.c_[xs,ys].astype('float32') - rect = cv2.minAreaRect(coords) - #box = np.array(cv2.cv.BoxPoints(rect)) + filt, R = [], [] + for idx, i in enumerate(good): + mask = seg == i + xs, ys = np.where(mask) + + coords = np.c_[xs, ys].astype('float32') + rect = cv2.minAreaRect(coords) + # box = np.array(cv2.cv.BoxPoints(rect)) box = np.array(cv2.boxPoints(rect)) - h,w,rot = TextRegions.get_hw(box,return_rot=True) + h, w, rot = TextRegions.get_hw(box, return_rot=True) - f = (h > TextRegions.minHeight - and w > TextRegions.minWidth - and TextRegions.minAspect < w/h < TextRegions.maxAspect - and area[idx]/w*h > TextRegions.pArea) + f = (h > TextRegions.minHeight + and w > TextRegions.minWidth + and TextRegions.minAspect < w / h < TextRegions.maxAspect + and area[idx] / w * h > TextRegions.pArea) filt.append(f) R.append(rot) @@ -98,70 +100,70 @@ def filter(seg,area,label): aidx = np.argsort(-area) good = good[filt][aidx] R = [R[i] for i in aidx] - filter_info = {'label':good, 'rot':R, 'area': area[aidx]} + filter_info = {'label': good, 'rot': R, 'area': area[aidx]} return filter_info @staticmethod - def sample_grid_neighbours(mask,nsample,step=3): + def sample_grid_neighbours(mask, nsample, step=3): """ Given a HxW binary mask, sample 4 neighbours on the grid, in the cardinal directions, STEP pixels away. """ - if 2*step >= min(mask.shape[:2]): - return #None + if 2 * step >= min(mask.shape[:2]): + return # None - y_m,x_m = np.where(mask) - mask_idx = np.zeros_like(mask,'int32') + y_m, x_m = np.where(mask) + mask_idx = np.zeros_like(mask, 'int32') for i in range(len(y_m)): - mask_idx[y_m[i],x_m[i]] = i + mask_idx[y_m[i], x_m[i]] = i - xp,xn = np.zeros_like(mask), np.zeros_like(mask) - yp,yn = np.zeros_like(mask), np.zeros_like(mask) - xp[:,:-2*step] = mask[:,2*step:] - xn[:,2*step:] = mask[:,:-2*step] - yp[:-2*step,:] = mask[2*step:,:] - yn[2*step:,:] = mask[:-2*step,:] - valid = mask&xp&xn&yp&yn + xp, xn = np.zeros_like(mask), np.zeros_like(mask) + yp, yn = np.zeros_like(mask), np.zeros_like(mask) + xp[:, :-2 * step] = mask[:, 2 * step:] + xn[:, 2 * step:] = mask[:, :-2 * step] + yp[:-2 * step, :] = mask[2 * step:, :] + yn[2 * step:, :] = mask[:-2 * step, :] + valid = mask & xp & xn & yp & yn - ys,xs = np.where(valid) + ys, xs = np.where(valid) N = len(ys) - if N==0: #no valid pixels in mask: - return #None - nsample = min(nsample,N) - idx = np.random.choice(N,nsample,replace=False) + if N == 0: # no valid pixels in mask: + return # None + nsample = min(nsample, N) + idx = np.random.choice(N, nsample, replace=False) # generate neighborhood matrix: # (1+4)x2xNsample (2 for y,x) - xs,ys = xs[idx],ys[idx] + xs, ys = xs[idx], ys[idx] s = step - X = np.transpose(np.c_[xs,xs+s,xs+s,xs-s,xs-s][:,:,None],(1,2,0)) - Y = np.transpose(np.c_[ys,ys+s,ys-s,ys+s,ys-s][:,:,None],(1,2,0)) - sample_idx = np.concatenate([Y,X],axis=1) - mask_nn_idx = np.zeros((5,sample_idx.shape[-1]),'int32') + X = np.transpose(np.c_[xs, xs + s, xs + s, xs - s, xs - s][:, :, None], (1, 2, 0)) + Y = np.transpose(np.c_[ys, ys + s, ys - s, ys + s, ys - s][:, :, None], (1, 2, 0)) + sample_idx = np.concatenate([Y, X], axis=1) + mask_nn_idx = np.zeros((5, sample_idx.shape[-1]), 'int32') for i in range(sample_idx.shape[-1]): - mask_nn_idx[:,i] = mask_idx[sample_idx[:,:,i][:,0],sample_idx[:,:,i][:,1]] + mask_nn_idx[:, i] = mask_idx[sample_idx[:, :, i][:, 0], sample_idx[:, :, i][:, 1]] return mask_nn_idx @staticmethod - def filter_depth(xyz,seg,regions): - plane_info = {'label':[], - 'coeff':[], - 'support':[], - 'rot':[], - 'area':[]} - for idx,l in enumerate(regions['label']): - mask = seg==l - pt_sample = TextRegions.sample_grid_neighbours(mask,TextRegions.ransac_fit_trials,step=3) + def filter_depth(xyz, seg, regions): + plane_info = {'label': [], + 'coeff': [], + 'support': [], + 'rot': [], + 'area': []} + for idx, l in enumerate(regions['label']): + mask = seg == l + pt_sample = TextRegions.sample_grid_neighbours(mask, TextRegions.ransac_fit_trials, step=3) if pt_sample is None: - continue #not enough points for RANSAC + continue # not enough points for RANSAC # get-depths pt = xyz[mask] plane_model = su.isplanar(pt, pt_sample, - TextRegions.dist_thresh, - TextRegions.num_inlier, - TextRegions.min_z_projection) + TextRegions.dist_thresh, + TextRegions.num_inlier, + TextRegions.min_z_projection) if plane_model is not None: plane_coeff = plane_model[0] - if np.abs(plane_coeff[2])>TextRegions.min_z_projection: + if np.abs(plane_coeff[2]) > TextRegions.min_z_projection: plane_info['label'].append(l) plane_info['coeff'].append(plane_model[0]) plane_info['support'].append(plane_model[1]) @@ -171,13 +173,14 @@ def filter_depth(xyz,seg,regions): return plane_info @staticmethod - def get_regions(xyz,seg,area,label): - regions = TextRegions.filter(seg,area,label) + def get_regions(xyz, seg, area, label): + regions = TextRegions.filter(seg, area, label) # fit plane to text-regions: - regions = TextRegions.filter_depth(xyz,seg,regions) + regions = TextRegions.filter_depth(xyz, seg, regions) return regions -def rescale_frontoparallel(p_fp,box_fp,p_im): + +def rescale_frontoparallel(p_fp, box_fp, p_im): """ The fronto-parallel image region is rescaled to bring it in the same approx. size as the target region size. @@ -190,22 +193,23 @@ def rescale_frontoparallel(p_fp,box_fp,p_im): Returns the scale 's' to scale the fronto-parallel points by. """ - l1 = np.linalg.norm(box_fp[1,:]-box_fp[0,:]) - l2 = np.linalg.norm(box_fp[1,:]-box_fp[2,:]) + l1 = np.linalg.norm(box_fp[1, :] - box_fp[0, :]) + l2 = np.linalg.norm(box_fp[1, :] - box_fp[2, :]) - n0 = np.argmin(np.linalg.norm(p_fp-box_fp[0,:][None,:],axis=1)) - n1 = np.argmin(np.linalg.norm(p_fp-box_fp[1,:][None,:],axis=1)) - n2 = np.argmin(np.linalg.norm(p_fp-box_fp[2,:][None,:],axis=1)) + n0 = np.argmin(np.linalg.norm(p_fp - box_fp[0, :][None, :], axis=1)) + n1 = np.argmin(np.linalg.norm(p_fp - box_fp[1, :][None, :], axis=1)) + n2 = np.argmin(np.linalg.norm(p_fp - box_fp[2, :][None, :], axis=1)) - lt1 = np.linalg.norm(p_im[n1,:]-p_im[n0,:]) - lt2 = np.linalg.norm(p_im[n1,:]-p_im[n2,:]) + lt1 = np.linalg.norm(p_im[n1, :] - p_im[n0, :]) + lt2 = np.linalg.norm(p_im[n1, :] - p_im[n2, :]) - s = max(lt1/l1,lt2/l2) + s = max(lt1 / l1, lt2 / l2) if not np.isfinite(s): s = 1.0 return s -def get_text_placement_mask(xyz,mask,plane,pad=2,viz=False): + +def get_text_placement_mask(xyz, mask, plane, pad=2, viz=False): """ Returns a binary mask in which text can be placed. Also returns a homography from original image @@ -216,133 +220,136 @@ def get_text_placement_mask(xyz,mask,plane,pad=2,viz=False): REGION : DICT output of TextRegions.get_regions PAD : number of pixels to pad the placement-mask by """ - _,contour,hier = cv2.findContours(mask.copy().astype('uint8'), - mode=cv2.RETR_CCOMP, - method=cv2.CHAIN_APPROX_SIMPLE) + contour, hier = cv2.findContours(mask.copy().astype('uint8'), + mode=cv2.RETR_CCOMP, + method=cv2.CHAIN_APPROX_SIMPLE) contour = [np.squeeze(c).astype('float') for c in contour] - #plane = np.array([plane[1],plane[0],plane[2],plane[3]]) - H,W = mask.shape[:2] + # plane = np.array([plane[1],plane[0],plane[2],plane[3]]) + H, W = mask.shape[:2] # bring the contour 3d points to fronto-parallel config: - pts,pts_fp = [],[] - center = np.array([W,H])/2 - n_front = np.array([0.0,0.0,-1.0]) + pts, pts_fp = [], [] + center = np.array([W, H]) / 2 + n_front = np.array([0.0, 0.0, -1.0]) for i in range(len(contour)): cnt_ij = contour[i] xyz = su.DepthCamera.plane2xyz(center, cnt_ij, plane) - R = su.rot3d(plane[:3],n_front) + R = su.rot3d(plane[:3], n_front) xyz = xyz.dot(R.T) - pts_fp.append(xyz[:,:2]) + pts_fp.append(xyz[:, :2]) pts.append(cnt_ij) # unrotate in 2D plane: rect = cv2.minAreaRect(pts_fp[0].copy().astype('float32')) box = np.array(cv2.boxPoints(rect)) R2d = su.unrotate2d(box.copy()) - box = np.vstack([box,box[0,:]]) #close the box for visualization + box = np.vstack([box, box[0, :]]) # close the box for visualization + + mu = np.median(pts_fp[0], axis=0) + pts_tmp = (pts_fp[0] - mu[None, :]).dot(R2d.T) + mu[None, :] + boxR = (box - mu[None, :]).dot(R2d.T) + mu[None, :] - mu = np.median(pts_fp[0],axis=0) - pts_tmp = (pts_fp[0]-mu[None,:]).dot(R2d.T) + mu[None,:] - boxR = (box-mu[None,:]).dot(R2d.T) + mu[None,:] - # rescale the unrotated 2d points to approximately # the same scale as the target region: - s = rescale_frontoparallel(pts_tmp,boxR,pts[0]) + s = rescale_frontoparallel(pts_tmp, boxR, pts[0]) boxR *= s for i in range(len(pts_fp)): - pts_fp[i] = s*((pts_fp[i]-mu[None,:]).dot(R2d.T) + mu[None,:]) + pts_fp[i] = s * ((pts_fp[i] - mu[None, :]).dot(R2d.T) + mu[None, :]) # paint the unrotated contour points: - minxy = -np.min(boxR,axis=0) + pad//2 - ROW = np.max(ssd.pdist(np.atleast_2d(boxR[:,0]).T)) - COL = np.max(ssd.pdist(np.atleast_2d(boxR[:,1]).T)) + minxy = -np.min(boxR, axis=0) + pad // 2 + ROW = np.max(ssd.pdist(np.atleast_2d(boxR[:, 0]).T)) + COL = np.max(ssd.pdist(np.atleast_2d(boxR[:, 1]).T)) - place_mask = 255*np.ones((int(np.ceil(COL))+pad, int(np.ceil(ROW))+pad), 'uint8') + place_mask = 255 * np.ones((int(np.ceil(COL)) + pad, int(np.ceil(ROW)) + pad), 'uint8') - pts_fp_i32 = [(pts_fp[i]+minxy[None,:]).astype('int32') for i in range(len(pts_fp))] - cv2.drawContours(place_mask,pts_fp_i32,-1,0, + pts_fp_i32 = [(pts_fp[i] + minxy[None, :]).astype('int32') for i in range(len(pts_fp))] + cv2.drawContours(place_mask, pts_fp_i32, -1, 0, thickness=cv2.FILLED, - lineType=8,hierarchy=hier) - - if not TextRegions.filter_rectified((~place_mask).astype('float')/255): + lineType=8, hierarchy=hier) + + if not TextRegions.filter_rectified((~place_mask).astype('float') / 255): return # calculate the homography - H,_ = cv2.findHomography(pts[0].astype('float32').copy(), - pts_fp_i32[0].astype('float32').copy(), - method=0) + H, _ = cv2.findHomography(pts[0].astype('float32').copy(), + pts_fp_i32[0].astype('float32').copy(), + method=0) - Hinv,_ = cv2.findHomography(pts_fp_i32[0].astype('float32').copy(), - pts[0].astype('float32').copy(), - method=0) + Hinv, _ = cv2.findHomography(pts_fp_i32[0].astype('float32').copy(), + pts[0].astype('float32').copy(), + method=0) if viz: - plt.subplot(1,2,1) + plt.subplot(1, 2, 1) plt.imshow(mask) - plt.subplot(1,2,2) + plt.subplot(1, 2, 2) plt.imshow(~place_mask) - plt.hold(True) for i in range(len(pts_fp_i32)): - plt.scatter(pts_fp_i32[i][:,0],pts_fp_i32[i][:,1], - edgecolors='none',facecolor='g',alpha=0.5) + plt.scatter(pts_fp_i32[i][:, 0], pts_fp_i32[i][:, 1], + edgecolors='none', facecolor='g', alpha=0.5) plt.show() - return place_mask,H,Hinv + return place_mask, H, Hinv + -def viz_masks(fignum,rgb,seg,depth,label): +def viz_masks(fignum, rgb, seg, depth, label): """ img,depth,seg are images of the same size. visualizes depth masks for top NOBJ objects. """ - def mean_seg(rgb,seg,label): + + def mean_seg(rgb, seg, label): mim = np.zeros_like(rgb) for i in np.unique(seg.flat): - mask = seg==i - col = np.mean(rgb[mask,:],axis=0) - mim[mask,:] = col[None,None,:] - mim[seg==0,:] = 0 + mask = seg == i + col = np.mean(rgb[mask, :], axis=0) + mim[mask, :] = col[None, None, :] + mim[seg == 0, :] = 0 return mim - mim = mean_seg(rgb,seg,label) + mim = mean_seg(rgb, seg, label) img = rgb.copy() - for i,idx in enumerate(label): - mask = seg==idx - rgb_rand = (255*np.random.rand(3)).astype('uint8') - img[mask] = rgb_rand[None,None,:] + for i, idx in enumerate(label): + mask = seg == idx + rgb_rand = (255 * np.random.rand(3)).astype('uint8') + img[mask] = rgb_rand[None, None, :] - #import scipy - # scipy.misc.imsave('seg.png', mim) - # scipy.misc.imsave('depth.png', depth) - # scipy.misc.imsave('txt.png', rgb) - # scipy.misc.imsave('reg.png', img) + # import scipy + # imageio.imwrite('seg.png', mim) + # imageio.imwrite('depth.png', depth) + # imageio.imwrite('txt.png', rgb) + # imageio.imwrite('reg.png', img) plt.close(fignum) plt.figure(fignum) - ims = [rgb,mim,depth,img] + ims = [rgb, mim, depth, img] for i in range(len(ims)): - plt.subplot(2,2,i+1) + plt.subplot(2, 2, i + 1) plt.imshow(ims[i]) plt.show(block=False) -def viz_regions(img,xyz,seg,planes,labels): + +def viz_regions(img, xyz, seg, planes, labels): """ img,depth,seg are images of the same size. visualizes depth masks for top NOBJ objects. """ # plot the RGB-D point-cloud: - su.plot_xyzrgb(xyz.reshape(-1,3),img.reshape(-1,3)) + su.plot_xyzrgb(xyz.reshape(-1, 3), img.reshape(-1, 3)) # plot the RANSAC-planes at the text-regions: - for i,l in enumerate(labels): - mask = seg==l - xyz_region = xyz[mask,:] - su.visualize_plane(xyz_region,np.array(planes[i])) + for i, l in enumerate(labels): + mask = seg == l + xyz_region = xyz[mask, :] + su.visualize_plane(xyz_region, np.array(planes[i])) - mym.view(180,180) + mym.view(180, 180) mym.orientation_axes() mym.show(True) - -def viz_textbb(fignum,text_im, bb_list,alpha=1.0): + + +def viz_textbb(fignum, text_im, bb_list, alpha=1.0): """ text_im : image containing text bb_list : list of 2x4xn_i boundinb-box matrices @@ -350,34 +357,30 @@ def viz_textbb(fignum,text_im, bb_list,alpha=1.0): plt.close(fignum) plt.figure(fignum) plt.imshow(text_im) - plt.hold(True) - H,W = text_im.shape[:2] + H, W = text_im.shape[:2] for i in range(len(bb_list)): bbs = bb_list[i] ni = bbs.shape[-1] for j in range(ni): - bb = bbs[:,:,j] - bb = np.c_[bb,bb[:,0]] - plt.plot(bb[0,:], bb[1,:], 'r', linewidth=2, alpha=alpha) - plt.gca().set_xlim([0,W-1]) - plt.gca().set_ylim([H-1,0]) + bb = bbs[:, :, j] + bb = np.c_[bb, bb[:, 0]] + plt.plot(bb[0, :], bb[1, :], 'r', linewidth=2, alpha=alpha) + plt.gca().set_xlim([0, W - 1]) + plt.gca().set_ylim([H - 1, 0]) plt.show(block=False) + class RendererV3(object): def __init__(self, data_dir, max_time=None): self.text_renderer = tu.RenderFont(data_dir) self.colorizer = Colorize(data_dir) - #self.colorizerV2 = colorV2.Colorize(data_dir) - - self.min_char_height = 8 #px - self.min_asp_ratio = 0.4 # - + self.min_char_height = 8 # px + self.min_asp_ratio = 0.4 # self.max_text_regions = 7 - self.max_time = max_time - def filter_regions(self,regions,filt): + def filter_regions(self, regions, filt): """ filt : boolean list of regions to keep. """ @@ -386,27 +389,27 @@ def filter_regions(self,regions,filt): regions[k] = [regions[k][i] for i in idx] return regions - def filter_for_placement(self,xyz,seg,regions): + def filter_for_placement(self, xyz, seg, regions): filt = np.zeros(len(regions['label'])).astype('bool') - masks,Hs,Hinvs = [],[], [] - for idx,l in enumerate(regions['label']): - res = get_text_placement_mask(xyz,seg==l,regions['coeff'][idx],pad=2) + masks, Hs, Hinvs = [], [], [] + for idx, l in enumerate(regions['label']): + res = get_text_placement_mask(xyz, seg == l, regions['coeff'][idx], pad=2) if res is not None: - mask,H,Hinv = res + mask, H, Hinv = res masks.append(mask) Hs.append(H) Hinvs.append(Hinv) filt[idx] = True - regions = self.filter_regions(regions,filt) + regions = self.filter_regions(regions, filt) regions['place_mask'] = masks regions['homography'] = Hs regions['homography_inv'] = Hinvs return regions - def warpHomography(self,src_mat,H,dst_size): + def warpHomography(self, src_mat, H, dst_size): dst_mat = cv2.warpPerspective(src_mat, H, dst_size, - flags=cv2.WARP_INVERSE_MAP|cv2.INTER_LINEAR) + flags=cv2.WARP_INVERSE_MAP | cv2.INTER_LINEAR) return dst_mat def homographyBB(self, bbs, H, offset=None): @@ -419,22 +422,22 @@ def homographyBB(self, bbs, H, offset=None): """ eps = 1e-16 # check the shape of the BB array: - t,f,n = bbs.shape - assert (t==2) and (f==4) + t, f, n = bbs.shape + assert (t == 2) and (f == 4) # append 1 for homogenous coordinates: - bbs_h = np.reshape(np.r_[bbs, np.ones((1,4,n))], (3,4*n), order='F') + bbs_h = np.reshape(np.r_[bbs, np.ones((1, 4, n))], (3, 4 * n), order='F') if offset != None: - bbs_h[:2,:] += np.array(offset)[:,None] + bbs_h[:2, :] += np.array(offset)[:, None] # perpective: bbs_h = H.dot(bbs_h) - bbs_h /= (bbs_h[2,:]+eps) + bbs_h /= (bbs_h[2, :] + eps) - bbs_h = np.reshape(bbs_h, (3,4,n), order='F') - return bbs_h[:2,:,:] + bbs_h = np.reshape(bbs_h, (3, 4, n), order='F') + return bbs_h[:2, :, :] - def bb_filter(self,bb0,bb,text): + def bb_filter(self, bb0, bb, text): """ Ensure that bounding-boxes are not too distorted after perspective distortion. @@ -443,99 +446,95 @@ def bb_filter(self,bb0,bb,text): bb : 2x4xn matrix of BB after perspective text: string of text -- for excluding symbols/punctuations. """ - h0 = np.linalg.norm(bb0[:,3,:] - bb0[:,0,:], axis=0) - w0 = np.linalg.norm(bb0[:,1,:] - bb0[:,0,:], axis=0) - hw0 = np.c_[h0,w0] + h0 = np.linalg.norm(bb0[:, 3, :] - bb0[:, 0, :], axis=0) + w0 = np.linalg.norm(bb0[:, 1, :] - bb0[:, 0, :], axis=0) + hw0 = np.c_[h0, w0] - h = np.linalg.norm(bb[:,3,:] - bb[:,0,:], axis=0) - w = np.linalg.norm(bb[:,1,:] - bb[:,0,:], axis=0) - hw = np.c_[h,w] + h = np.linalg.norm(bb[:, 3, :] - bb[:, 0, :], axis=0) + w = np.linalg.norm(bb[:, 1, :] - bb[:, 0, :], axis=0) + hw = np.c_[h, w] # remove newlines and spaces: text = ''.join(text.split()) - assert len(text)==bb.shape[-1] + assert len(text) == bb.shape[-1] alnum = np.array([ch.isalnum() for ch in text]) - hw0 = hw0[alnum,:] - hw = hw[alnum,:] + hw0 = hw0[alnum, :] + hw = hw[alnum, :] - min_h0, min_h = np.min(hw0[:,0]), np.min(hw[:,0]) - asp0, asp = hw0[:,0]/hw0[:,1], hw[:,0]/hw[:,1] + min_h0, min_h = np.min(hw0[:, 0]), np.min(hw[:, 0]) + asp0, asp = hw0[:, 0] / hw0[:, 1], hw[:, 0] / hw[:, 1] asp0, asp = np.median(asp0), np.median(asp) - asp_ratio = asp/asp0 - is_good = ( min_h > self.min_char_height - and asp_ratio > self.min_asp_ratio - and asp_ratio < 1.0/self.min_asp_ratio) + asp_ratio = asp / asp0 + is_good = (min_h > self.min_char_height + and asp_ratio > self.min_asp_ratio + and asp_ratio < 1.0 / self.min_asp_ratio) return is_good - def get_min_h(selg, bb, text): # find min-height: - h = np.linalg.norm(bb[:,3,:] - bb[:,0,:], axis=0) + h = np.linalg.norm(bb[:, 3, :] - bb[:, 0, :], axis=0) # remove newlines and spaces: text = ''.join(text.split()) - assert len(text)==bb.shape[-1] + assert len(text) == bb.shape[-1] alnum = np.array([ch.isalnum() for ch in text]) h = h[alnum] return np.min(h) - def feather(self, text_mask, min_h): # determine the gaussian-blur std: - if min_h <= 15 : + if min_h <= 15: bsz = 0.25 - ksz=1 + ksz = 1 elif 15 < min_h < 30: - bsz = max(0.30, 0.5 + 0.1*np.random.randn()) + bsz = max(0.30, 0.5 + 0.1 * np.random.randn()) ksz = 3 else: - bsz = max(0.5, 1.5 + 0.5*np.random.randn()) + bsz = max(0.5, 1.5 + 0.5 * np.random.randn()) ksz = 5 - return cv2.GaussianBlur(text_mask,(ksz,ksz),bsz) + return cv2.GaussianBlur(text_mask, (ksz, ksz), bsz) - def place_text(self,rgb,collision_mask,H,Hinv): + def place_text(self, rgb, collision_mask, H, Hinv): font = self.text_renderer.font_state.sample() font = self.text_renderer.font_state.init_font(font) - render_res = self.text_renderer.render_sample(font,collision_mask) - if render_res is None: # rendering not successful - return #None + render_res = self.text_renderer.render_sample(font, collision_mask) + if render_res is None: # rendering not successful + return # None else: - text_mask,loc,bb,text = render_res + text_mask, loc, bb, text = render_res # update the collision mask with text: - collision_mask += (255 * (text_mask>0)).astype('uint8') + collision_mask += (255 * (text_mask > 0)).astype('uint8') # warp the object mask back onto the image: - text_mask_orig = text_mask.copy() + # text_mask_orig = text_mask.copy() bb_orig = bb.copy() - text_mask = self.warpHomography(text_mask,H,rgb.shape[:2][::-1]) - bb = self.homographyBB(bb,Hinv) + text_mask = self.warpHomography(text_mask, H, rgb.shape[:2][::-1]) + bb = self.homographyBB(bb, Hinv) - if not self.bb_filter(bb_orig,bb,text): - #warn("bad charBB statistics") - return #None + if not self.bb_filter(bb_orig, bb, text): + # warn("bad charBB statistics") + return # None # get the minimum height of the character-BB: - min_h = self.get_min_h(bb,text) + min_h = self.get_min_h(bb, text) - #feathering: + # feathering: text_mask = self.feather(text_mask, min_h) + im_final = self.colorizer.color(rgb, [text_mask], np.array([min_h])) - im_final = self.colorizer.color(rgb,[text_mask],np.array([min_h])) - - return im_final, text, bb, collision_mask - + return im_final, text, bb, text_mask def get_num_text_regions(self, nregions): - #return nregions + # return nregions nmax = min(self.max_text_regions, nregions) if np.random.rand() < 0.10: rnd = np.random.rand() else: - rnd = np.random.beta(5.0,1.0) + rnd = np.random.beta(5.0, 1.0) return int(np.ceil(nmax * rnd)) def char2wordBB(self, charBB, text): @@ -551,54 +550,58 @@ def char2wordBB(self, charBB, text): """ wrds = text.split() bb_idx = np.r_[0, np.cumsum([len(w) for w in wrds])] - wordBB = np.zeros((2,4,len(wrds)), 'float32') - + wordBB = np.zeros((2, 4, len(wrds)), 'float32') + for i in range(len(wrds)): - cc = charBB[:,:,bb_idx[i]:bb_idx[i+1]] + cc = charBB[:, :, bb_idx[i]:bb_idx[i + 1]] # fit a rotated-rectangle: # change shape from 2x4xn_i -> (4*n_i)x2 - cc = np.squeeze(np.concatenate(np.dsplit(cc,cc.shape[-1]),axis=1)).T.astype('float32') + cc = np.squeeze(np.concatenate(np.dsplit(cc, cc.shape[-1]), axis=1)).T.astype('float32') rect = cv2.minAreaRect(cc.copy()) box = np.array(cv2.boxPoints(rect)) # find the permutation of box-coordinates which # are "aligned" appropriately with the character-bb. # (exhaustive search over all possible assignments): - cc_tblr = np.c_[cc[0,:], - cc[-3,:], - cc[-2,:], - cc[3,:]].T + cc_tblr = np.c_[cc[0, :], + cc[-3, :], + cc[-2, :], + cc[3, :]].T perm4 = np.array(list(itertools.permutations(np.arange(4)))) dists = [] for pidx in range(perm4.shape[0]): - d = np.sum(np.linalg.norm(box[perm4[pidx],:]-cc_tblr,axis=1)) + d = np.sum(np.linalg.norm(box[perm4[pidx], :] - cc_tblr, axis=1)) dists.append(d) - wordBB[:,:,i] = box[perm4[np.argmin(dists)],:].T + wordBB[:, :, i] = box[perm4[np.argmin(dists)], :].T return wordBB - - def render_text(self,rgb,depth,seg,area,label,ninstance=1,viz=False): + def render_text(self, rgb, depth, seg, area, label, ninstance=1): """ - rgb : HxWx3 image rgb values (uint8) - depth : HxW depth values (float) - seg : HxW segmentation region masks - area : number of pixels in each region - label : region labels == unique(seg) / {0} - i.e., indices of pixels in SEG which - constitute a region mask - ninstance : no of times image should be - used to place text. - - @return: + This method is rendering and + + Args: + rgb : HxWx3 image rgb values (uint8) + depth : HxW depth values (float) + seg : HxW segmentation region masks + area : number of pixels in each region + label : region labels == unique(seg) / {0} + i.e., indices of pixels in SEG which + constitute a region mask + ninstance : number of times image should be + used to place text. + + Returns: res : a list of dictionaries, one for each of the image instances. Each dictionary has the following structure: - 'img' : rgb-image with text on it. - 'bb' : 2x4xn matrix of bounding-boxes + 'img' : rgb-image with text on it. + 'bb' : 2x4xn matrix of bounding-boxes for each character in the image. - 'txt' : a list of strings. + 'txt' : a list of strings. + 'masks': a list of masks of text placed on the image. + Shape of each mask is the same as shape of original image. The correspondence b/w bb and txt is that i-th non-space white-character in txt is at bb[:,:,i]. @@ -609,33 +612,34 @@ def render_text(self,rgb,depth,seg,area,label,ninstance=1,viz=False): try: # depth -> xyz xyz = su.DepthCamera.depth2xyz(depth) - + # find text-regions: - regions = TextRegions.get_regions(xyz,seg,area,label) + regions = TextRegions.get_regions(xyz, seg, area, label) # find the placement mask and homographies: - regions = self.filter_for_placement(xyz,seg,regions) + regions = self.filter_for_placement(xyz, seg, regions) # finally place some text: nregions = len(regions['place_mask']) - if nregions < 1: # no good region to place text on + if nregions < 1: # no good region to place text on return [] except: # failure in pre-text placement - #import traceback + # import traceback traceback.print_exc() return [] res = [] for i in range(ninstance): + # place_masks - is a local copy of list of collision masks. it's updated, but is not really used. place_masks = copy.deepcopy(regions['place_mask']) - print (colorize(Color.CYAN, " ** instance # : %d"%i)) + print(colorize(Color.CYAN, " ** instance # : %d" % i)) - idict = {'img':[], 'charBB':None, 'wordBB':None, 'txt':None} + idict = {'img': [], 'charBB': None, 'wordBB': None, 'txt': None} - m = self.get_num_text_regions(nregions)#np.arange(nregions)#min(nregions, 5*ninstance*self.max_text_regions)) - reg_idx = np.arange(min(2*m,nregions)) + m = self.get_num_text_regions(nregions) + reg_idx = np.arange(min(2 * m, nregions)) np.random.shuffle(reg_idx) reg_idx = reg_idx[:m] @@ -643,25 +647,30 @@ def render_text(self,rgb,depth,seg,area,label,ninstance=1,viz=False): img = rgb.copy() itext = [] ibb = [] + masks = [] # process regions: num_txt_regions = len(reg_idx) - NUM_REP = 5 # re-use each region three times: + NUM_REP = 5 # re-use each region three times: reg_range = np.arange(NUM_REP * num_txt_regions) % num_txt_regions + + if DEBUG: + print(" ... try text rendering for %s regions", len(reg_range)) + for idx in reg_range: ireg = reg_idx[idx] try: if self.max_time is None: - txt_render_res = self.place_text(img,place_masks[ireg], + txt_render_res = self.place_text(img, place_masks[ireg], regions['homography'][ireg], regions['homography_inv'][ireg]) else: with time_limit(self.max_time): - txt_render_res = self.place_text(img,place_masks[ireg], + txt_render_res = self.place_text(img, place_masks[ireg], regions['homography'][ireg], regions['homography_inv'][ireg]) except TimeoutException as msg: - print (msg) + print(msg) continue except: traceback.print_exc() @@ -669,25 +678,24 @@ def render_text(self,rgb,depth,seg,area,label,ninstance=1,viz=False): continue if txt_render_res is not None: + if DEBUG: + print(" ... text rendering attempt finished successfully") placed = True - img,text,bb,collision_mask = txt_render_res + img, text, bb, collision_mask = txt_render_res # update the region collision mask: - place_masks[ireg] = collision_mask + # place_masks[ireg] = collision_mask # no point of doing that, already updated inside place_text method + masks.append(collision_mask) # store the result: itext.append(text) ibb.append(bb) - if placed: + if placed: # at least 1 word was placed in this instance: idict['img'] = img idict['txt'] = itext idict['charBB'] = np.concatenate(ibb, axis=2) idict['wordBB'] = self.char2wordBB(idict['charBB'].copy(), ' '.join(itext)) + idict['masks'] = masks + idict['labeled_region'] = regions['label'] res.append(idict.copy()) - if viz: - viz_textbb(1,img, [idict['wordBB']], alpha=1.0) - viz_masks(2,img,seg,depth,regions['label']) - # viz_regions(rgb.copy(),xyz,seg,regions['coeff'],regions['label']) - if i < ninstance-1: - raw_input(colorize(Color.BLUE,'continue?',True)) return res diff --git a/visualize_results.py b/visualize_results.py index 7b578c6..c143635 100644 --- a/visualize_results.py +++ b/visualize_results.py @@ -24,7 +24,6 @@ def viz_textbb(text_im, charBB_list, wordBB, alpha=1.0): plt.close(1) plt.figure(1) plt.imshow(text_im) - plt.hold(True) H,W = text_im.shape[:2] # plot the character-BB: