diff --git a/colorize3_poisson.py b/colorize3_poisson.py
index e4f32af..4cf1737 100644
--- a/colorize3_poisson.py
+++ b/colorize3_poisson.py
@@ -1,52 +1,55 @@
 import cv2 as cv
-import numpy as np 
-import matplotlib.pyplot as plt 
+import numpy as np
+import matplotlib.pyplot as plt
 import scipy.interpolate as si
-import scipy.ndimage as scim 
+import scipy.ndimage as scim
 import scipy.ndimage.interpolation as sii
 import os
 import os.path as osp
-#import cPickle as cp
+# import cPickle as cp
 import _pickle as cp
-#import Image
+# import Image
 from PIL import Image
 from poisson_reconstruct import blit_images
 import pickle
 
+
 def sample_weighted(p_dict):
     ps = p_dict.keys()
-    return ps[np.random.choice(len(ps),p=p_dict.values())]
+    return ps[np.random.choice(len(ps), p=p_dict.values())]
+
 
 class Layer(object):
 
-    def __init__(self,alpha,color):
+    def __init__(self, alpha, color):
 
         # alpha for the whole image:
-        assert alpha.ndim==2
+        assert alpha.ndim == 2
         self.alpha = alpha
-        [n,m] = alpha.shape[:2]
+        [n, m] = alpha.shape[:2]
 
-        color=np.atleast_1d(np.array(color)).astype('uint8')
+        color = np.atleast_1d(np.array(color)).astype('uint8')
         # color for the image:
-        if color.ndim==1: # constant color for whole layer
+        if color.ndim == 1:  # constant color for whole layer
             ncol = color.size
-            if ncol == 1 : #grayscale layer
-                self.color = color * np.ones((n,m,3),'uint8')
-            if ncol == 3 : 
-                self.color = np.ones((n,m,3),'uint8') * color[None,None,:]
-        elif color.ndim==2: # grayscale image
-            self.color = np.repeat(color[:,:,None],repeats=3,axis=2).copy().astype('uint8')
-        elif color.ndim==3: #rgb image
+            if ncol == 1:  # grayscale layer
+                self.color = color * np.ones((n, m, 3), 'uint8')
+            if ncol == 3:
+                self.color = np.ones((n, m, 3), 'uint8') * color[None, None, :]
+        elif color.ndim == 2:  # grayscale image
+            self.color = np.repeat(color[:, :, None], repeats=3, axis=2).copy().astype('uint8')
+        elif color.ndim == 3:  # rgb image
             self.color = color.copy().astype('uint8')
         else:
-            print (color.shape)
+            print(color.shape)
             raise Exception("color datatype not understood")
 
+
 class FontColor(object):
 
     def __init__(self, col_file):
-        with open(col_file,'rb') as f:
-            #self.colorsRGB = cp.load(f)
+        with open(col_file, 'rb') as f:
+            # self.colorsRGB = cp.load(f)
             u = pickle._Unpickler(f)
             u.encoding = 'latin1'
             p = u.load()
@@ -55,9 +58,8 @@ def __init__(self, col_file):
 
         # convert color-means from RGB to LAB for better nearest neighbour
         # computations:
-        self.colorsLAB = np.r_[self.colorsRGB[:,0:3], self.colorsRGB[:,6:9]].astype('uint8')
-        self.colorsLAB = np.squeeze(cv.cvtColor(self.colorsLAB[None,:,:],cv.COLOR_RGB2Lab))
-
+        self.colorsLAB = np.r_[self.colorsRGB[:, 0:3], self.colorsRGB[:, 6:9]].astype('uint8')
+        self.colorsLAB = np.squeeze(cv.cvtColor(self.colorsLAB[None, :, :], cv.COLOR_RGB2Lab))
 
     def sample_normal(self, col_mean, col_std):
         """
@@ -76,19 +78,19 @@ def sample_from_data(self, bg_mat):
         """
         bg_orig = bg_mat.copy()
         bg_mat = cv.cvtColor(bg_mat, cv.COLOR_RGB2Lab)
-        bg_mat = np.reshape(bg_mat, (np.prod(bg_mat.shape[:2]),3))
-        bg_mean = np.mean(bg_mat,axis=0)
+        bg_mat = np.reshape(bg_mat, (np.prod(bg_mat.shape[:2]), 3))
+        bg_mean = np.mean(bg_mat, axis=0)
 
-        norms = np.linalg.norm(self.colorsLAB-bg_mean[None,:], axis=1)
+        norms = np.linalg.norm(self.colorsLAB - bg_mean[None, :], axis=1)
         # choose a random color amongst the top 3 closest matches:
-        #nn = np.random.choice(np.argsort(norms)[:3]) 
+        # nn = np.random.choice(np.argsort(norms)[:3])
         nn = np.argmin(norms)
 
         ## nearest neighbour color:
-        data_col = self.colorsRGB[np.mod(nn,self.ncol),:]
+        data_col = self.colorsRGB[np.mod(nn, self.ncol), :]
 
-        col1 = self.sample_normal(data_col[:3],data_col[3:6])
-        col2 = self.sample_normal(data_col[6:9],data_col[9:12])
+        col1 = self.sample_normal(data_col[:3], data_col[3:6])
+        col2 = self.sample_normal(data_col[6:9], data_col[9:12])
 
         if nn < self.ncol:
             return (col2, col1)
@@ -98,9 +100,9 @@ def sample_from_data(self, bg_mat):
 
     def mean_color(self, arr):
         col = cv.cvtColor(arr, cv.COLOR_RGB2HSV)
-        col = np.reshape(col, (np.prod(col.shape[:2]),3))
-        col = np.mean(col,axis=0).astype('uint8')
-        return np.squeeze(cv.cvtColor(col[None,None,:],cv.COLOR_HSV2RGB))
+        col = np.reshape(col, (np.prod(col.shape[:2]), 3))
+        col = np.mean(col, axis=0).astype('uint8')
+        return np.squeeze(cv.cvtColor(col[None, None, :], cv.COLOR_HSV2RGB))
 
     def invert(self, rgb):
         rgb = 127 + rgb
@@ -110,9 +112,9 @@ def complement(self, rgb_color):
         """
         return a color which is complementary to the RGB_COLOR.
         """
-        col_hsv = np.squeeze(cv.cvtColor(rgb_color[None,None,:], cv.COLOR_RGB2HSV))
-        col_hsv[0] = col_hsv[0] + 128 #uint8 mods to 255
-        col_comp = np.squeeze(cv.cvtColor(col_hsv[None,None,:],cv.COLOR_HSV2RGB))
+        col_hsv = np.squeeze(cv.cvtColor(rgb_color[None, None, :], cv.COLOR_RGB2HSV))
+        col_hsv[0] = col_hsv[0] + 128  # uint8 mods to 255
+        col_comp = np.squeeze(cv.cvtColor(col_hsv[None, None, :], cv.COLOR_HSV2RGB))
         return col_comp
 
     def triangle_color(self, col1, col2):
@@ -120,43 +122,42 @@ def triangle_color(self, col1, col2):
         Returns a color which is "opposite" to both col1 and col2.
         """
         col1, col2 = np.array(col1), np.array(col2)
-        col1 = np.squeeze(cv.cvtColor(col1[None,None,:], cv.COLOR_RGB2HSV))
-        col2 = np.squeeze(cv.cvtColor(col2[None,None,:], cv.COLOR_RGB2HSV))
+        col1 = np.squeeze(cv.cvtColor(col1[None, None, :], cv.COLOR_RGB2HSV))
+        col2 = np.squeeze(cv.cvtColor(col2[None, None, :], cv.COLOR_RGB2HSV))
         h1, h2 = col1[0], col2[0]
-        if h2 < h1 : h1,h2 = h2,h1 #swap
-        dh = h2-h1
-        if dh < 127: dh = 255-dh
-        col1[0] = h1 + dh/2
-        return np.squeeze(cv.cvtColor(col1[None,None,:],cv.COLOR_HSV2RGB))
+        if h2 < h1: h1, h2 = h2, h1  # swap
+        dh = h2 - h1
+        if dh < 127: dh = 255 - dh
+        col1[0] = h1 + dh / 2
+        return np.squeeze(cv.cvtColor(col1[None, None, :], cv.COLOR_HSV2RGB))
 
     def change_value(self, col_rgb, v_std=50):
-        col = np.squeeze(cv.cvtColor(col_rgb[None,None,:], cv.COLOR_RGB2HSV))
+        col = np.squeeze(cv.cvtColor(col_rgb[None, None, :], cv.COLOR_RGB2HSV))
         x = col[2]
-        vs = np.linspace(0,1)
-        ps = np.abs(vs - x/255.0)
+        vs = np.linspace(0, 1)
+        ps = np.abs(vs - x / 255.0)
         ps /= np.sum(ps)
-        v_rand = np.clip(np.random.choice(vs,p=ps) + 0.1*np.random.randn(),0,1)
-        col[2] = 255*v_rand
-        return np.squeeze(cv.cvtColor(col[None,None,:],cv.COLOR_HSV2RGB))
+        v_rand = np.clip(np.random.choice(vs, p=ps) + 0.1 * np.random.randn(), 0, 1)
+        col[2] = 255 * v_rand
+        return np.squeeze(cv.cvtColor(col[None, None, :], cv.COLOR_HSV2RGB))
 
 
 class Colorize(object):
 
-    def __init__(self, model_dir='data'):#, im_path):
+    def __init__(self, model_dir='data'):  # , im_path):
         # # get a list of background-images:
         # imlist = [osp.join(im_path,f) for f in os.listdir(im_path)]
         # self.bg_list = [p for p in imlist if osp.isfile(p)]
 
-        self.font_color = FontColor(col_file=osp.join(model_dir,'models/colors_new.cp'))
+        self.font_color = FontColor(col_file=osp.join(model_dir, 'models/colors_new.cp'))
 
         # probabilities of different text-effects:
-        self.p_bevel = 0.05 # add bevel effect to text
-        self.p_outline = 0.05 # just keep the outline of the text
+        self.p_bevel = 0.05  # add bevel effect to text
+        self.p_outline = 0.05  # just keep the outline of the text
         self.p_drop_shadow = 0.15
         self.p_border = 0.15
-        self.p_displacement = 0.30 # add background-based bump-mapping
-        self.p_texture = 0.0 # use an image for coloring text
-
+        self.p_displacement = 0.30  # add background-based bump-mapping
+        self.p_texture = 0.0  # use an image for coloring text
 
     def drop_shadow(self, alpha, theta, shift, size, op=0.80):
         """
@@ -169,12 +170,12 @@ def drop_shadow(self, alpha, theta, shift, size, op=0.80):
         @return : alpha of the shadow layer
                   (it is assumed that the color is black/white)
         """
-        if size%2==0:
+        if size % 2 == 0:
             size -= 1
-            size = max(1,size)
-        shadow = cv.GaussianBlur(alpha,(size,size),0)
-        [dx,dy] = shift * np.array([-np.sin(theta), np.cos(theta)])
-        shadow = op*sii.shift(shadow, shift=[dx,dy],mode='constant',cval=0)
+            size = max(1, size)
+        shadow = cv.GaussianBlur(alpha, (size, size), 0)
+        [dx, dy] = shift * np.array([-np.sin(theta), np.cos(theta)])
+        shadow = op * sii.shift(shadow, shift=[dx, dy], mode='constant', cval=0)
         return shadow.astype('uint8')
 
     def border(self, alpha, size, kernel_type='RECT'):
@@ -185,38 +186,38 @@ def border(self, alpha, size, kernel_type='RECT'):
 
         @return : alpha layer of the border (color to be added externally).
         """
-        kdict = {'RECT':cv.MORPH_RECT, 'ELLIPSE':cv.MORPH_ELLIPSE,
-                 'CROSS':cv.MORPH_CROSS}
-        kernel = cv.getStructuringElement(kdict[kernel_type],(size,size))
-        border = cv.dilate(alpha,kernel,iterations=1) # - alpha
+        kdict = {'RECT': cv.MORPH_RECT, 'ELLIPSE': cv.MORPH_ELLIPSE,
+                 'CROSS': cv.MORPH_CROSS}
+        kernel = cv.getStructuringElement(kdict[kernel_type], (size, size))
+        border = cv.dilate(alpha, kernel, iterations=1)  # - alpha
         return border
 
-    def blend(self,cf,cb,mode='normal'):
+    def blend(self, cf, cb, mode='normal'):
         return cf
 
-    def merge_two(self,fore,back,blend_type=None):
+    def merge_two(self, fore, back, blend_type=None):
         """
         merge two FOREground and BACKground layers.
         ref: https://en.wikipedia.org/wiki/Alpha_compositing
         ref: Chapter 7 (pg. 440 and pg. 444):
              http://partners.adobe.com/public/developer/en/pdf/PDFReference.pdf
         """
-        a_f = fore.alpha/255.0
-        a_b = back.alpha/255.0
+        a_f = fore.alpha / 255.0
+        a_b = back.alpha / 255.0
         c_f = fore.color
         c_b = back.color
 
-        a_r = a_f + a_b - a_f*a_b
+        a_r = a_f + a_b - a_f * a_b
         if blend_type != None:
             c_blend = self.blend(c_f, c_b, blend_type)
-            c_r = (   ((1-a_f)*a_b)[:,:,None] * c_b
-                    + ((1-a_b)*a_f)[:,:,None] * c_f
-                    + (a_f*a_b)[:,:,None] * c_blend   )
+            c_r = (((1 - a_f) * a_b)[:, :, None] * c_b
+                   + ((1 - a_b) * a_f)[:, :, None] * c_f
+                   + (a_f * a_b)[:, :, None] * c_blend)
         else:
-            c_r = (   ((1-a_f)*a_b)[:,:,None] * c_b
-                    + a_f[:,:,None]*c_f    )
+            c_r = (((1 - a_f) * a_b)[:, :, None] * c_b
+                   + a_f[:, :, None] * c_f)
 
-        return Layer((255*a_r).astype('uint8'), c_r.astype('uint8'))
+        return Layer((255 * a_r).astype('uint8'), c_r.astype('uint8'))
 
     def merge_down(self, layers, blends=None):
         """
@@ -229,20 +230,20 @@ def merge_down(self, layers, blends=None):
         """
         nlayers = len(layers)
         if nlayers > 1:
-            [n,m] = layers[0].alpha.shape[:2]
+            [n, m] = layers[0].alpha.shape[:2]
             out_layer = layers[-1]
-            for i in range(-2,-nlayers-1,-1):
-                blend=None
+            for i in range(-2, -nlayers - 1, -1):
+                blend = None
                 if blends is not None:
-                    blend = blends[i+1]
-                    out_layer = self.merge_two(fore=layers[i], back=out_layer,blend_type=blend)
+                    blend = blends[i + 1]
+                    out_layer = self.merge_two(fore=layers[i], back=out_layer, blend_type=blend)
             return out_layer
         else:
             return layers[0]
 
     def resize_im(self, im, osize):
         return np.array(Image.fromarray(im).resize(osize[::-1], Image.BICUBIC))
-        
+
     def occlude(self):
         """
         somehow add occlusion to text.
@@ -259,38 +260,39 @@ def color_border(self, col_text, col_bg):
         choice = np.random.choice(3)
 
         col_text = cv.cvtColor(col_text, cv.COLOR_RGB2HSV)
-        col_text = np.reshape(col_text, (np.prod(col_text.shape[:2]),3))
-        col_text = np.mean(col_text,axis=0).astype('uint8')
+        col_text = np.reshape(col_text, (np.prod(col_text.shape[:2]), 3))
+        col_text = np.mean(col_text, axis=0).astype('uint8')
+
+        vs = np.linspace(0, 1)
 
-        vs = np.linspace(0,1)
         def get_sample(x):
-            ps = np.abs(vs - x/255.0)
+            ps = np.abs(vs - x / 255.0)
             ps /= np.sum(ps)
-            v_rand = np.clip(np.random.choice(vs,p=ps) + 0.1*np.random.randn(),0,1)
-            return 255*v_rand
+            v_rand = np.clip(np.random.choice(vs, p=ps) + 0.1 * np.random.randn(), 0, 1)
+            return 255 * v_rand
 
         # first choose a color, then inc/dec its VALUE:
-        if choice==0:
+        if choice == 0:
             # increase/decrease saturation:
-            col_text[0] = get_sample(col_text[0]) # saturation
-            col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB))
-        elif choice==1:
+            col_text[0] = get_sample(col_text[0])  # saturation
+            col_text = np.squeeze(cv.cvtColor(col_text[None, None, :], cv.COLOR_HSV2RGB))
+        elif choice == 1:
             # get the complementary color to text:
-            col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB))
+            col_text = np.squeeze(cv.cvtColor(col_text[None, None, :], cv.COLOR_HSV2RGB))
             col_text = self.font_color.complement(col_text)
         else:
             # choose a mid-way color:
             col_bg = cv.cvtColor(col_bg, cv.COLOR_RGB2HSV)
-            col_bg = np.reshape(col_bg, (np.prod(col_bg.shape[:2]),3))
-            col_bg = np.mean(col_bg,axis=0).astype('uint8')
-            col_bg = np.squeeze(cv.cvtColor(col_bg[None,None,:],cv.COLOR_HSV2RGB))
-            col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB))
-            col_text = self.font_color.triangle_color(col_text,col_bg)
+            col_bg = np.reshape(col_bg, (np.prod(col_bg.shape[:2]), 3))
+            col_bg = np.mean(col_bg, axis=0).astype('uint8')
+            col_bg = np.squeeze(cv.cvtColor(col_bg[None, None, :], cv.COLOR_HSV2RGB))
+            col_text = np.squeeze(cv.cvtColor(col_text[None, None, :], cv.COLOR_HSV2RGB))
+            col_text = self.font_color.triangle_color(col_text, col_bg)
 
         # now change the VALUE channel:        
-        col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_RGB2HSV))
-        col_text[2] = get_sample(col_text[2]) # value
-        return np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB))
+        col_text = np.squeeze(cv.cvtColor(col_text[None, None, :], cv.COLOR_RGB2HSV))
+        col_text[2] = get_sample(col_text[2])  # value
+        return np.squeeze(cv.cvtColor(col_text[None, None, :], cv.COLOR_HSV2RGB))
 
     def color_text(self, text_arr, h, bg_arr):
         """
@@ -303,11 +305,10 @@ def color_text(self, text_arr, h, bg_arr):
 
             H : minimum height of a character
         """
-        bg_col,fg_col,i = 0,0,0
-        fg_col,bg_col = self.font_color.sample_from_data(bg_arr)
+        bg_col, fg_col, i = 0, 0, 0
+        fg_col, bg_col = self.font_color.sample_from_data(bg_arr)
         return Layer(alpha=text_arr, color=fg_col), fg_col, bg_col
 
-
     def process(self, text_arr, bg_arr, min_h):
         """
         text_arr : one alpha mask : nxm, uint8
@@ -318,55 +319,63 @@ def process(self, text_arr, bg_arr, min_h):
         """
         # decide on a color for the text:
         l_text, fg_col, bg_col = self.color_text(text_arr, min_h, bg_arr)
-        bg_col = np.mean(np.mean(bg_arr,axis=0),axis=0)
-        l_bg = Layer(alpha=255*np.ones_like(text_arr,'uint8'),color=bg_col)
+        bg_col = np.mean(np.mean(bg_arr, axis=0), axis=0)
+        l_bg = Layer(alpha=255 * np.ones_like(text_arr, 'uint8'), color=bg_col)
 
-        l_text.alpha = l_text.alpha * np.clip(0.88 + 0.1*np.random.randn(), 0.72, 1.0)
+        l_text.alpha = l_text.alpha * np.clip(0.88 + 0.1 * np.random.randn(), 0.72, 1.0)
         layers = [l_text]
         blends = []
 
         # add border:
         if np.random.rand() < self.p_border:
-            if min_h <= 15 : bsz = 1
-            elif 15 < min_h < 30: bsz = 3
-            else: bsz = 5
+            if min_h <= 15:
+                bsz = 1
+            elif 15 < min_h < 30:
+                bsz = 3
+            else:
+                bsz = 5
             border_a = self.border(l_text.alpha, size=bsz)
-            l_border = Layer(border_a, self.color_border(l_text.color,l_bg.color))
+            l_border = Layer(border_a, self.color_border(l_text.color, l_bg.color))
             layers.append(l_border)
             blends.append('normal')
 
         # add shadow:
         if np.random.rand() < self.p_drop_shadow:
             # shadow gaussian size:
-            if min_h <= 15 : bsz = 1
-            elif 15 < min_h < 30: bsz = 3
-            else: bsz = 5
+            if min_h <= 15:
+                bsz = 1
+            elif 15 < min_h < 30:
+                bsz = 3
+            else:
+                bsz = 5
 
             # shadow angle:
-            theta = np.pi/4 * np.random.choice([1,3,5,7]) + 0.5*np.random.randn()
+            theta = np.pi / 4 * np.random.choice([1, 3, 5, 7]) + 0.5 * np.random.randn()
 
             # shadow shift:
-            if min_h <= 15 : shift = 2
-            elif 15 < min_h < 30: shift = 7+np.random.randn()
-            else: shift = 15 + 3*np.random.randn()
+            if min_h <= 15:
+                shift = 2
+            elif 15 < min_h < 30:
+                shift = 7 + np.random.randn()
+            else:
+                shift = 15 + 3 * np.random.randn()
 
             # opacity:
-            op = 0.50 + 0.1*np.random.randn()
+            op = 0.50 + 0.1 * np.random.randn()
 
-            shadow = self.drop_shadow(l_text.alpha, theta, shift, 3*bsz, op)
+            shadow = self.drop_shadow(l_text.alpha, theta, shift, 3 * bsz, op)
             l_shadow = Layer(shadow, 0)
             layers.append(l_shadow)
             blends.append('normal')
-        
 
-        l_bg = Layer(alpha=255*np.ones_like(text_arr,'uint8'), color=bg_col)
+        l_bg = Layer(alpha=255 * np.ones_like(text_arr, 'uint8'), color=bg_col)
         layers.append(l_bg)
         blends.append('normal')
-        l_normal = self.merge_down(layers,blends)
+        l_normal = self.merge_down(layers, blends)
         # now do poisson image editing:
-        l_bg = Layer(alpha=255*np.ones_like(text_arr,'uint8'), color=bg_arr)
-        l_out =  blit_images(l_normal.color,l_bg.color.copy())
-        
+        l_bg = Layer(alpha=255 * np.ones_like(text_arr, 'uint8'), color=bg_arr)
+        l_out = blit_images(l_normal.color, l_bg.color.copy())
+
         # plt.subplot(1,3,1)
         # plt.imshow(l_normal.color)
         # plt.subplot(1,3,2)
@@ -374,17 +383,16 @@ def process(self, text_arr, bg_arr, min_h):
         # plt.subplot(1,3,3)
         # plt.imshow(l_out)
         # plt.show()
-        
+
         if l_out is None:
             # poisson recontruction produced
             # imperceptible text. In this case,
             # just do a normal blend:
             layers[-1] = l_bg
-            return self.merge_down(layers,blends).color
+            return self.merge_down(layers, blends).color
 
         return l_out
 
-
     def check_perceptible(self, txt_mask, bg, txt_bg):
         """
         --- DEPRECATED; USE GRADIENT CHECKING IN POISSON-RECONSTRUCT INSTEAD ---
@@ -396,19 +404,19 @@ def check_perceptible(self, txt_mask, bg, txt_bg):
         bg (hxwx3) : original background image WITHOUT any text.
         txt_bg (hxwx3) : image with text.
         """
-        bgo,txto = bg.copy(), txt_bg.copy()
+        bgo, txto = bg.copy(), txt_bg.copy()
         txt_mask = txt_mask.astype('bool')
         bg = cv.cvtColor(bg.copy(), cv.COLOR_RGB2Lab)
         txt_bg = cv.cvtColor(txt_bg.copy(), cv.COLOR_RGB2Lab)
-        bg_px = bg[txt_mask,:]
-        txt_px = txt_bg[txt_mask,:]
-        bg_px[:,0] *= 100.0/255.0 #rescale - L channel
-        txt_px[:,0] *= 100.0/255.0
+        bg_px = bg[txt_mask, :]
+        txt_px = txt_bg[txt_mask, :]
+        bg_px[:, 0] *= 100.0 / 255.0  # rescale - L channel
+        txt_px[:, 0] *= 100.0 / 255.0
 
-        diff = np.linalg.norm(bg_px-txt_px,ord=None,axis=1)
-        diff = np.percentile(diff,[10,30,50,70,90])
-        print ("color diff percentile :", diff)
-        return diff, (bgo,txto)
+        diff = np.linalg.norm(bg_px - txt_px, ord=None, axis=1)
+        diff = np.percentile(diff, [10, 30, 50, 70, 90])
+        print("color diff percentile :", diff)
+        return diff, (bgo, txto)
 
     def color(self, bg_arr, text_arr, hs, place_order=None, pad=20):
         """
@@ -422,8 +430,8 @@ def color(self, bg_arr, text_arr, hs, place_order=None, pad=20):
         return : nxmx3 rgb colorized text-image.
         """
         bg_arr = bg_arr.copy()
-        if bg_arr.ndim == 2 or bg_arr.shape[2]==1: # grayscale image:
-            bg_arr = np.repeat(bg_arr[:,:,None], 3, 2)
+        if bg_arr.ndim == 2 or bg_arr.shape[2] == 1:  # grayscale image:
+            bg_arr = np.repeat(bg_arr[:, :, None], 3, 2)
 
         # get the canvas size:
         canvas_sz = np.array(bg_arr.shape[:2])
@@ -439,26 +447,26 @@ def color(self, bg_arr, text_arr, hs, place_order=None, pad=20):
             loc = np.where(text_arr[i])
             lx, ly = np.min(loc[0]), np.min(loc[1])
             mx, my = np.max(loc[0]), np.max(loc[1])
-            l = np.array([lx,ly])
-            m = np.array([mx,my])-l+1
-            text_patch = text_arr[i][l[0]:l[0]+m[0],l[1]:l[1]+m[1]]
+            l = np.array([lx, ly])
+            m = np.array([mx, my]) - l + 1
+            text_patch = text_arr[i][l[0]:l[0] + m[0], l[1]:l[1] + m[1]]
 
             # figure out padding:
-            ext = canvas_sz - (l+m)
-            num_pad = pad*np.ones(4,dtype='int32')
+            ext = canvas_sz - (l + m)
+            num_pad = pad * np.ones(4, dtype='int32')
             num_pad[:2] = np.minimum(num_pad[:2], l)
             num_pad[2:] = np.minimum(num_pad[2:], ext)
-            text_patch = np.pad(text_patch, pad_width=((num_pad[0],num_pad[2]), (num_pad[1],num_pad[3])), mode='constant')
+            text_patch = np.pad(text_patch, pad_width=((num_pad[0], num_pad[2]), (num_pad[1], num_pad[3])),
+                                mode='constant')
             l -= num_pad[:2]
 
-            w,h = text_patch.shape
-            bg = bg_arr[l[0]:l[0]+w,l[1]:l[1]+h,:]
+            w, h = text_patch.shape
+            bg = bg_arr[l[0]:l[0] + w, l[1]:l[1] + h, :]
 
             rdr0 = self.process(text_patch, bg, hs[i])
             rendered.append(rdr0)
 
-            bg_arr[l[0]:l[0]+w,l[1]:l[1]+h,:] = rdr0#rendered[-1]
-
+            bg_arr[l[0]:l[0] + w, l[1]:l[1] + h, :] = rdr0  # rendered[-1]
 
             return bg_arr
 
diff --git a/data_provider.py b/data_provider.py
new file mode 100644
index 0000000..f8ae317
--- /dev/null
+++ b/data_provider.py
@@ -0,0 +1,99 @@
+import os
+from synthgen import *
+from common import *
+import wget
+import tarfile
+
+
+# TODO: move these contants inside DataProvider
+
+# path to the data-file, containing image, depth and segmentation:
+DATA_PATH = 'data'  # TODO dedup
+DB_FNAME = osp.join(DATA_PATH, 'dset.h5')
+# url of the data (google-drive public file):
+DATA_URL = 'http://www.robots.ox.ac.uk/~ankush/data.tar.gz'
+
+
+class DateProvider(object):
+
+    def __init__(self, root_data_dir=None):
+        # TODO: add option to override those 3:
+        path_depth = "depth.h5"
+        path_segmap = "seg.h5"
+        self.path_images = "bg_img"
+        self.db = None
+        self.depth_db = None
+        self.seg_db = None
+        self.segmap = {}
+        self.depth = {}
+
+        if root_data_dir is None:
+            # should download default example
+            self.db = DateProvider.get_data()
+            self.segmap = self.db['seg']
+            self.depth = self.db['depth']
+            self.imnames = sorted(self.db['image'].keys())
+        else:
+            # provided path to the folder with all data downloaded separately.
+            # see https://github.com/ankush-me/SynthText#pre-processed-background-images
+            self.path = root_data_dir
+            self.depth_db = h5py.File(osp.join(self.path, path_depth), 'r')
+            self.seg_db = h5py.File(osp.join(self.path, path_segmap), 'r')
+            self.imnames = sorted(self.depth_db.keys())
+            self.segmap = self.seg_db['mask']
+            self.depth = self.depth_db
+
+    @staticmethod
+    def get_data():
+        """
+        Downloads the archive using link specified in DATA_URL. Unpacks the archive, treats it as h5 database.
+        The image, depth and segmentation data is downloaded.
+
+        Returns:
+            the h5 database.
+        """
+        if not osp.exists(DB_FNAME):
+            try:
+                colorprint(Color.BLUE, '\tdownloading data (56 M) from: ' + DATA_URL, bold=True)
+                print()
+                sys.stdout.flush()
+                out_fname = 'data.tar.gz'
+                wget.download(DATA_URL, out=out_fname)
+                tar = tarfile.open(out_fname)
+                tar.extractall()
+                tar.close()
+                os.remove(out_fname)
+                colorprint(Color.BLUE, '\n\tdata saved at:' + DB_FNAME, bold=True)
+                sys.stdout.flush()
+            except:
+                print(colorize(Color.RED, 'Data not found and have problems downloading.', bold=True))
+                sys.stdout.flush()
+                sys.exit(-1)
+        # open the h5 file and return:
+        return h5py.File(DB_FNAME, 'r')
+
+    def get_image(self, imname: str):
+        if self.db is None:
+            return Image.open(osp.join(self.path, self.path_images, imname)).convert('RGB')
+        else:
+            return Image.fromarray(self.db['image'][imname][:])
+
+    def get_segmap(self, imname: str):
+        return self.segmap[imname]
+
+    def get_depth(self, imname: str):
+        if self.db is None:
+            return self.depth[imname][:].T[:, :, 0]
+        else:
+            return self.depth[imname][:].T[:, :, 1]
+
+    def get_imnames(self):
+        return self.imnames
+
+    def close(self):
+        if self.db is not None:
+            self.db.close()
+        if self.depth_db is not None:
+            self.depth_db.close()
+        if self.seg_db is not None:
+            self.seg_db.close()
diff --git a/gen.py b/gen.py
index ffaf995..d57b142 100644
--- a/gen.py
+++ b/gen.py
@@ -11,133 +11,174 @@
       year         = "2016",
     }
 """
-
-import numpy as np
-import h5py
-import os, sys, traceback
-import os.path as osp
+import os
 from synthgen import *
 from common import *
-import wget, tarfile
+from functools import reduce
+import re
+from time import time
+from data_provider import DateProvider
 
 
-## Define some configuration variables:
-NUM_IMG = -1 # no. of images to use for generation (-1 to use all available):
-INSTANCE_PER_IMAGE = 1 # no. of times to use the same image
-SECS_PER_IMG = 5 #max time per image in seconds
+# Define some configuration variables:
+NUM_IMG = 1  # number of images to use for generation (-1 to use all available):
+INSTANCE_PER_IMAGE = 1  # number of times to use the same image
+SECS_PER_IMG = 5  # max time per image in seconds
 
 # path to the data-file, containing image, depth and segmentation:
 DATA_PATH = 'data'
-DB_FNAME = osp.join(DATA_PATH,'dset.h5')
-# url of the data (google-drive public file):
-DATA_URL = 'http://www.robots.ox.ac.uk/~ankush/data.tar.gz'
 OUT_FILE = 'results/SynthText.h5'
 
-def get_data():
-  """
-  Download the image,depth and segmentation data:
-  Returns, the h5 database.
-  """
-  if not osp.exists(DB_FNAME):
-    try:
-      colorprint(Color.BLUE,'\tdownloading data (56 M) from: '+DATA_URL,bold=True)
-      print()
-      sys.stdout.flush()
-      out_fname = 'data.tar.gz'
-      wget.download(DATA_URL,out=out_fname)
-      tar = tarfile.open(out_fname)
-      tar.extractall()
-      tar.close()
-      os.remove(out_fname)
-      colorprint(Color.BLUE,'\n\tdata saved at:'+DB_FNAME,bold=True)
-      sys.stdout.flush()
-    except:
-      print (colorize(Color.RED,'Data not found and have problems downloading.',bold=True))
-      sys.stdout.flush()
-      sys.exit(-1)
-  # open the h5 file and return:
-  return h5py.File(DB_FNAME,'r')
-
-
-def add_res_to_db(imgname,res,db):
-  """
-  Add the synthetically generated text image instance
-  and other metadata to the dataset.
-  """
-  ninstance = len(res)
-  for i in range(ninstance):
-    dname = "%s_%d"%(imgname, i)
-    db['data'].create_dataset(dname,data=res[i]['img'])
-    db['data'][dname].attrs['charBB'] = res[i]['charBB']
-    db['data'][dname].attrs['wordBB'] = res[i]['wordBB']        
-    #db['data'][dname].attrs['txt'] = res[i]['txt']
-    L = res[i]['txt']
-    L = [n.encode("ascii", "ignore") for n in L]
-    db['data'][dname].attrs['txt'] = L
-
-
-def main(viz=False):
-  # open databases:
-  print (colorize(Color.BLUE,'getting data..',bold=True))
-  db = get_data()
-  print (colorize(Color.BLUE,'\t-> done',bold=True))
-
-  # open the output h5 file:
-  out_db = h5py.File(OUT_FILE,'w')
-  out_db.create_group('/data')
-  print (colorize(Color.GREEN,'Storing the output in: '+OUT_FILE, bold=True))
-
-  # get the names of the image files in the dataset:
-  imnames = sorted(db['image'].keys())
-  N = len(imnames)
-  global NUM_IMG
-  if NUM_IMG < 0:
-    NUM_IMG = N
-  start_idx,end_idx = 0,min(NUM_IMG, N)
-
-  RV3 = RendererV3(DATA_PATH,max_time=SECS_PER_IMG)
-  for i in range(start_idx,end_idx):
-    imname = imnames[i]
-    try:
-      # get the image:
-      img = Image.fromarray(db['image'][imname][:])
-      # get the pre-computed depth:
-      #  there are 2 estimates of depth (represented as 2 "channels")
-      #  here we are using the second one (in some cases it might be
-      #  useful to use the other one):
-      depth = db['depth'][imname][:].T
-      depth = depth[:,:,1]
-      # get segmentation:
-      seg = db['seg'][imname][:].astype('float32')
-      area = db['seg'][imname].attrs['area']
-      label = db['seg'][imname].attrs['label']
-
-      # re-size uniformly:
-      sz = depth.shape[:2][::-1]
-      img = np.array(img.resize(sz,Image.ANTIALIAS))
-      seg = np.array(Image.fromarray(seg).resize(sz,Image.NEAREST))
-
-      print (colorize(Color.RED,'%d of %d'%(i,end_idx-1), bold=True))
-      res = RV3.render_text(img,depth,seg,area,label,
-                            ninstance=INSTANCE_PER_IMAGE,viz=viz)
-      if len(res) > 0:
-        # non-empty : successful in placing text:
-        add_res_to_db(imname,res,out_db)
-      # visualize the output:
-      if viz:
-        if 'q' in input(colorize(Color.RED,'continue? (enter to continue, q to exit): ',True)):
-          break
-    except:
-      traceback.print_exc()
-      print (colorize(Color.GREEN,'>>>> CONTINUING....', bold=True))
-      continue
-  db.close()
-  out_db.close()
-
-
-if __name__=='__main__':
-  import argparse
-  parser = argparse.ArgumentParser(description='Genereate Synthetic Scene-Text Images')
-  parser.add_argument('--viz',action='store_true',dest='viz',default=False,help='flag for turning on visualizations')
-  args = parser.parse_args()
-  main(args.viz)
+MASKS_DIR = "./masks"
+
+
+def add_res_to_db(imgname, res, db):
+    """
+    Add the synthetically generated text image instance
+    and other metadata to the dataset.
+    """
+    ninstance = len(res)
+    for i in range(ninstance):
+        dname = "%s_%d" % (imgname, i)
+        db['data'].create_dataset(dname, data=res[i]['img'])
+        db['data'][dname].attrs['charBB'] = res[i]['charBB']
+        db['data'][dname].attrs['wordBB'] = res[i]['wordBB']
+        L = res[i]['txt']
+        L = [n.encode("ascii", "ignore") for n in L]
+        db['data'][dname].attrs['txt'] = L
+
+
+def main(viz=False, debug=False, output_masks=False, data_path=None):
+    """
+    Entry point.
+
+    Args:
+        viz: display generated images. If this flag is true, needs user input to continue with every loop iteration.
+        output_masks: output masks of text, which was used during generation
+    """
+    if output_masks:
+        # create a directory if not exists for masks
+        if not os.path.exists(MASKS_DIR):
+            os.makedirs(MASKS_DIR)
+
+    # open databases:
+    print(colorize(Color.BLUE, 'getting data..', bold=True))
+
+    provider = DateProvider(data_path)
+
+    # db = DateProvider.get_data()
+    print(colorize(Color.BLUE, '\t-> done', bold=True))
+
+    # open the output h5 file:
+    out_db = h5py.File(OUT_FILE, 'w')
+    out_db.create_group('/data')
+    print(colorize(Color.GREEN, 'Storing the output in: ' + OUT_FILE, bold=True))
+
+    # get the names of the image files in the dataset:
+    imnames = provider.get_imnames()
+    N = len(imnames)
+    global NUM_IMG
+    if NUM_IMG < 0:
+        NUM_IMG = N
+    start_idx, end_idx = 0, min(NUM_IMG, N)
+
+    renderer = RendererV3(DATA_PATH, max_time=SECS_PER_IMG)
+    for i in range(start_idx, end_idx):
+        imname = imnames[i]
+
+        try:
+            # get the image:
+            img = provider.get_image(imname)
+            # get the pre-computed depth:
+            #  there are 2 estimates of depth (represented as 2 "channels")
+            #  here we are using the second one (in some cases it might be
+            #  useful to use the other one):
+            depth = provider.get_depth(imname)
+            # get segmentation:
+            seg = provider.get_segmap(imname)[:].astype('float32')
+            area = provider.get_segmap(imname).attrs['area']  # number of pixels in each region
+            label = provider.get_segmap(imname).attrs['label']
+
+            # re-size uniformly:
+            sz = depth.shape[:2][::-1]
+            img = np.array(img.resize(sz, Image.ANTIALIAS))
+            seg = np.array(Image.fromarray(seg).resize(sz, Image.NEAREST))
+            print(colorize(Color.RED, '%d of %d' % (i, end_idx - 1), bold=True))
+
+            if debug:
+                print("\n    Processing " + str(imname) + "...")
+
+            res = renderer.render_text(img, depth, seg, area, label,
+                                  ninstance=INSTANCE_PER_IMAGE)
+            if len(res) > 0:
+                # non-empty : successful in placing text:
+                add_res_to_db(imname, res, out_db)
+                if debug:
+                    print("    Success. " + str(len(res[0]['txt'])) + " texts placed:")
+                    print("    Texts:" + ";".join(res[0]['txt']) + "")
+                    ws = re.sub(' +', ' ', (" ".join(res[0]['txt']).replace("\n", " "))).strip().split(" ")
+                    print("    Words: #" +str(len(ws)) + " " + ";".join(ws) + "")
+                    print("    Words bounding boxes: " + str(res[0]['wordBB'].shape) + "")
+            else:
+                print("    Failure: No text placed.")
+
+            if len(res) > 0 and output_masks:
+                ts = str(int(time() * 1000))
+
+                # executed only if --output-masks flag is set
+                prefix = MASKS_DIR + "/" + imname + ts
+
+                imageio.imwrite(prefix + "_original.png", img)
+                imageio.imwrite(prefix + "_with_text.png", res[0]['img'])
+
+                # merge masks together:
+                merged = reduce(lambda a, b: np.add(a, b), res[0]['masks'])
+                # since we just added values of pixels, need to bring it back to 0..255 range.
+                merged = np.divide(merged, len(res[0]['masks']))
+                imageio.imwrite(prefix + "_mask.png", merged)
+
+                # print bounding boxes
+                f = open(prefix + "_bb.txt", "w+")
+                bbs = res[0]['wordBB']
+                boxes = np.swapaxes(bbs, 2, 0)
+                words = re.sub(' +', ' ', ' '.join(res[0]['txt']).replace("\n", " ")).strip().split(" ")
+                assert len(boxes) == len(words)
+                for j in range(len(boxes)):
+                    as_strings = np.char.mod('%f', boxes[j].flatten())
+                    f.write(",".join(as_strings) + "," + words[j] + "\n")
+                f.close()
+
+            # visualize the output:
+            if viz:
+                # executed only if --viz flag is set
+                for idict in res:
+                    img_with_text = idict['img']
+                    viz_textbb(1, img_with_text, [idict['wordBB']], alpha=1.0)
+                    viz_masks(2, img_with_text, seg, depth, idict['labeled_region'])
+                    # viz_regions(rgb.copy(),xyz,seg,regions['coeff'],regions['label'])
+                    if i < INSTANCE_PER_IMAGE - 1:
+                        raw_input(colorize(Color.BLUE, 'continue?', True))
+                if 'q' in input(colorize(Color.RED, 'continue? (enter to continue, q to exit): ', True)):
+                    break
+        except:
+            traceback.print_exc()
+            print(colorize(Color.GREEN, '>>>> CONTINUING....', bold=True))
+            continue
+    provider.close()
+    out_db.close()
+
+
+if __name__ == '__main__':
+    import argparse
+
+    parser = argparse.ArgumentParser(description='Genereate Synthetic Scene-Text Images')
+    parser.add_argument('--viz', action='store_true', dest='viz', default=False,
+                        help='flag for turning on visualizations')
+    parser.add_argument('--output-masks', action='store_true', dest='output_masks', default=False,
+                        help='flag for turning on output of masks')
+    parser.add_argument('--debug', action='store_true', dest='debug', default=False,
+                        help='flag for turning on debug output')
+    parser.add_argument("--data", type=str, dest='data_path', default=None,
+                        help="absolute path to data directory containing images, segmaps and depths")
+    args = parser.parse_args()
+    main(viz=args.viz, debug=args.debug, output_masks=args.output_masks, data_path=args.data_path)
diff --git a/invert_font_size.py b/invert_font_size.py
index 5697467..3eb5763 100644
--- a/invert_font_size.py
+++ b/invert_font_size.py
@@ -21,9 +21,8 @@
 
 FS = FontState()
 #plt.figure()
-#plt.hold(True)
 for i in xrange(len(FS.fonts)):
-	print i
+	print(i)
 	font = freetype.Font(FS.fonts[i], size=12)
 	h = []
 	for y in ys:
diff --git a/poisson_reconstruct.py b/poisson_reconstruct.py
index 7f90899..4030136 100644
--- a/poisson_reconstruct.py
+++ b/poisson_reconstruct.py
@@ -212,7 +212,6 @@ def contiguous_regions(mask):
         with sns.axes_style("darkgrid"):
             plt.subplot(2,1,2)
             plt.plot(l_alpha,label='alpha')
-            plt.hold(True)
             plt.plot(l_poisson,label='poisson')
             plt.plot(l_actual,label='actual')
             plt.legend()
@@ -227,7 +226,6 @@ def contiguous_regions(mask):
         with sns.axes_style("white"):
             plt.subplot(2,1,1)
             plt.imshow(im_alpha[:,:,::-1].astype('uint8'))
-            plt.hold(True)
             plt.plot([0,im_alpha_L.shape[0]-1],[i,i],'r')
             plt.axis('image')
             plt.show()
diff --git a/requirements.txt b/requirements.txt
index 1b855ae..7460c72 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,13 +1,18 @@
 cycler==0.10.0
-h5py==2.7.1
-matplotlib==2.1.1
-numpy==1.13.3
-Pillow==5.0.0
-pkg-resources==0.0.0
-pygame==1.9.3
+h5py==2.9.0
+image==1.5.27
+imageio==2.5.0
+kiwisolver==1.1.0
+matplotlib==3.0.3
+numpy==1.16.3
+opencv-python==4.1.0.25
+Pillow==6.0.0
+pygame==1.9.6
 pyparsing==2.2.0
 python-dateutil==2.6.1
 pytz==2017.3
-scipy==1.0.0
+scipy==1.2.1
 six==1.11.0
+sqlparse==0.3.0
+tornado==6.0.2
 wget==3.2
diff --git a/synthgen.py b/synthgen.py
index 2fda5e9..d3d67e8 100644
--- a/synthgen.py
+++ b/synthgen.py
@@ -10,9 +10,8 @@
 import cv2
 import h5py
 from PIL import Image
-import numpy as np 
-#import mayavi.mlab as mym
-import matplotlib.pyplot as plt 
+import numpy as np
+import matplotlib.pyplot as plt
 import os.path as osp
 import scipy.ndimage as sim
 import scipy.spatial.distance as ssd
@@ -21,22 +20,25 @@
 from colorize3_poisson import Colorize
 from common import *
 import traceback, itertools
+import imageio
 
 
+DEBUG = True
+
 class TextRegions(object):
     """
     Get region from segmentation which are good for placing
     text.
     """
-    minWidth = 30 #px
-    minHeight = 30 #px
-    minAspect = 0.3 # w > 0.3*h
+    minWidth = 30  # px
+    minHeight = 30  # px
+    minAspect = 0.3  # w > 0.3*h
     maxAspect = 7
-    minArea = 100 # number of pix
-    pArea = 0.60 # area_obj/area_minrect >= 0.6
+    minArea = 100  # number of pix
+    pArea = 0.60  # area_obj/area_minrect >= 0.6
 
     # RANSAC planar fitting params:
-    dist_thresh = 0.10 # m
+    dist_thresh = 0.10  # m
     num_inlier = 90
     ransac_fit_trials = 100
     min_z_projection = 0.25
@@ -48,44 +50,44 @@ def filter_rectified(mask):
         """
         mask : 1 where "ON", 0 where "OFF"
         """
-        wx = np.median(np.sum(mask,axis=0))
-        wy = np.median(np.sum(mask,axis=1))
-        return wx>TextRegions.minW and wy>TextRegions.minW
+        wx = np.median(np.sum(mask, axis=0))
+        wy = np.median(np.sum(mask, axis=1))
+        return wx > TextRegions.minW and wy > TextRegions.minW
 
     @staticmethod
-    def get_hw(pt,return_rot=False):
+    def get_hw(pt, return_rot=False):
         pt = pt.copy()
         R = su.unrotate2d(pt)
-        mu = np.median(pt,axis=0)
-        pt = (pt-mu[None,:]).dot(R.T) + mu[None,:]
-        h,w = np.max(pt,axis=0) - np.min(pt,axis=0)
+        mu = np.median(pt, axis=0)
+        pt = (pt - mu[None, :]).dot(R.T) + mu[None, :]
+        h, w = np.max(pt, axis=0) - np.min(pt, axis=0)
         if return_rot:
-            return h,w,R
-        return h,w
- 
+            return h, w, R
+        return h, w
+
     @staticmethod
-    def filter(seg,area,label):
+    def filter(seg, area, label):
         """
         Apply the filter.
         The final list is ranked by area.
         """
         good = label[area > TextRegions.minArea]
         area = area[area > TextRegions.minArea]
-        filt,R = [],[]
-        for idx,i in enumerate(good):
-            mask = seg==i
-            xs,ys = np.where(mask)
-
-            coords = np.c_[xs,ys].astype('float32')
-            rect = cv2.minAreaRect(coords)          
-            #box = np.array(cv2.cv.BoxPoints(rect))
+        filt, R = [], []
+        for idx, i in enumerate(good):
+            mask = seg == i
+            xs, ys = np.where(mask)
+
+            coords = np.c_[xs, ys].astype('float32')
+            rect = cv2.minAreaRect(coords)
+            # box = np.array(cv2.cv.BoxPoints(rect))
             box = np.array(cv2.boxPoints(rect))
-            h,w,rot = TextRegions.get_hw(box,return_rot=True)
+            h, w, rot = TextRegions.get_hw(box, return_rot=True)
 
-            f = (h > TextRegions.minHeight 
-                and w > TextRegions.minWidth
-                and TextRegions.minAspect < w/h < TextRegions.maxAspect
-                and area[idx]/w*h > TextRegions.pArea)
+            f = (h > TextRegions.minHeight
+                 and w > TextRegions.minWidth
+                 and TextRegions.minAspect < w / h < TextRegions.maxAspect
+                 and area[idx] / w * h > TextRegions.pArea)
             filt.append(f)
             R.append(rot)
 
@@ -98,70 +100,70 @@ def filter(seg,area,label):
         aidx = np.argsort(-area)
         good = good[filt][aidx]
         R = [R[i] for i in aidx]
-        filter_info = {'label':good, 'rot':R, 'area': area[aidx]}
+        filter_info = {'label': good, 'rot': R, 'area': area[aidx]}
         return filter_info
 
     @staticmethod
-    def sample_grid_neighbours(mask,nsample,step=3):
+    def sample_grid_neighbours(mask, nsample, step=3):
         """
         Given a HxW binary mask, sample 4 neighbours on the grid,
         in the cardinal directions, STEP pixels away.
         """
-        if 2*step >= min(mask.shape[:2]):
-            return #None
+        if 2 * step >= min(mask.shape[:2]):
+            return  # None
 
-        y_m,x_m = np.where(mask)
-        mask_idx = np.zeros_like(mask,'int32')
+        y_m, x_m = np.where(mask)
+        mask_idx = np.zeros_like(mask, 'int32')
         for i in range(len(y_m)):
-            mask_idx[y_m[i],x_m[i]] = i
+            mask_idx[y_m[i], x_m[i]] = i
 
-        xp,xn = np.zeros_like(mask), np.zeros_like(mask)
-        yp,yn = np.zeros_like(mask), np.zeros_like(mask)
-        xp[:,:-2*step] = mask[:,2*step:]
-        xn[:,2*step:] = mask[:,:-2*step]
-        yp[:-2*step,:] = mask[2*step:,:]
-        yn[2*step:,:] = mask[:-2*step,:]
-        valid = mask&xp&xn&yp&yn
+        xp, xn = np.zeros_like(mask), np.zeros_like(mask)
+        yp, yn = np.zeros_like(mask), np.zeros_like(mask)
+        xp[:, :-2 * step] = mask[:, 2 * step:]
+        xn[:, 2 * step:] = mask[:, :-2 * step]
+        yp[:-2 * step, :] = mask[2 * step:, :]
+        yn[2 * step:, :] = mask[:-2 * step, :]
+        valid = mask & xp & xn & yp & yn
 
-        ys,xs = np.where(valid)
+        ys, xs = np.where(valid)
         N = len(ys)
-        if N==0: #no valid pixels in mask:
-            return #None
-        nsample = min(nsample,N)
-        idx = np.random.choice(N,nsample,replace=False)
+        if N == 0:  # no valid pixels in mask:
+            return  # None
+        nsample = min(nsample, N)
+        idx = np.random.choice(N, nsample, replace=False)
         # generate neighborhood matrix:
         # (1+4)x2xNsample (2 for y,x)
-        xs,ys = xs[idx],ys[idx]
+        xs, ys = xs[idx], ys[idx]
         s = step
-        X = np.transpose(np.c_[xs,xs+s,xs+s,xs-s,xs-s][:,:,None],(1,2,0))
-        Y = np.transpose(np.c_[ys,ys+s,ys-s,ys+s,ys-s][:,:,None],(1,2,0))
-        sample_idx = np.concatenate([Y,X],axis=1)
-        mask_nn_idx = np.zeros((5,sample_idx.shape[-1]),'int32')
+        X = np.transpose(np.c_[xs, xs + s, xs + s, xs - s, xs - s][:, :, None], (1, 2, 0))
+        Y = np.transpose(np.c_[ys, ys + s, ys - s, ys + s, ys - s][:, :, None], (1, 2, 0))
+        sample_idx = np.concatenate([Y, X], axis=1)
+        mask_nn_idx = np.zeros((5, sample_idx.shape[-1]), 'int32')
         for i in range(sample_idx.shape[-1]):
-            mask_nn_idx[:,i] = mask_idx[sample_idx[:,:,i][:,0],sample_idx[:,:,i][:,1]]
+            mask_nn_idx[:, i] = mask_idx[sample_idx[:, :, i][:, 0], sample_idx[:, :, i][:, 1]]
         return mask_nn_idx
 
     @staticmethod
-    def filter_depth(xyz,seg,regions):
-        plane_info = {'label':[],
-                      'coeff':[],
-                      'support':[],
-                      'rot':[],
-                      'area':[]}
-        for idx,l in enumerate(regions['label']):
-            mask = seg==l
-            pt_sample = TextRegions.sample_grid_neighbours(mask,TextRegions.ransac_fit_trials,step=3)
+    def filter_depth(xyz, seg, regions):
+        plane_info = {'label': [],
+                      'coeff': [],
+                      'support': [],
+                      'rot': [],
+                      'area': []}
+        for idx, l in enumerate(regions['label']):
+            mask = seg == l
+            pt_sample = TextRegions.sample_grid_neighbours(mask, TextRegions.ransac_fit_trials, step=3)
             if pt_sample is None:
-                continue #not enough points for RANSAC
+                continue  # not enough points for RANSAC
             # get-depths
             pt = xyz[mask]
             plane_model = su.isplanar(pt, pt_sample,
-                                     TextRegions.dist_thresh,
-                                     TextRegions.num_inlier,
-                                     TextRegions.min_z_projection)
+                                      TextRegions.dist_thresh,
+                                      TextRegions.num_inlier,
+                                      TextRegions.min_z_projection)
             if plane_model is not None:
                 plane_coeff = plane_model[0]
-                if np.abs(plane_coeff[2])>TextRegions.min_z_projection:
+                if np.abs(plane_coeff[2]) > TextRegions.min_z_projection:
                     plane_info['label'].append(l)
                     plane_info['coeff'].append(plane_model[0])
                     plane_info['support'].append(plane_model[1])
@@ -171,13 +173,14 @@ def filter_depth(xyz,seg,regions):
         return plane_info
 
     @staticmethod
-    def get_regions(xyz,seg,area,label):
-        regions = TextRegions.filter(seg,area,label)
+    def get_regions(xyz, seg, area, label):
+        regions = TextRegions.filter(seg, area, label)
         # fit plane to text-regions:
-        regions = TextRegions.filter_depth(xyz,seg,regions)
+        regions = TextRegions.filter_depth(xyz, seg, regions)
         return regions
 
-def rescale_frontoparallel(p_fp,box_fp,p_im):
+
+def rescale_frontoparallel(p_fp, box_fp, p_im):
     """
     The fronto-parallel image region is rescaled to bring it in 
     the same approx. size as the target region size.
@@ -190,22 +193,23 @@ def rescale_frontoparallel(p_fp,box_fp,p_im):
 
     Returns the scale 's' to scale the fronto-parallel points by.
     """
-    l1 = np.linalg.norm(box_fp[1,:]-box_fp[0,:])
-    l2 = np.linalg.norm(box_fp[1,:]-box_fp[2,:])
+    l1 = np.linalg.norm(box_fp[1, :] - box_fp[0, :])
+    l2 = np.linalg.norm(box_fp[1, :] - box_fp[2, :])
 
-    n0 = np.argmin(np.linalg.norm(p_fp-box_fp[0,:][None,:],axis=1))
-    n1 = np.argmin(np.linalg.norm(p_fp-box_fp[1,:][None,:],axis=1))
-    n2 = np.argmin(np.linalg.norm(p_fp-box_fp[2,:][None,:],axis=1))
+    n0 = np.argmin(np.linalg.norm(p_fp - box_fp[0, :][None, :], axis=1))
+    n1 = np.argmin(np.linalg.norm(p_fp - box_fp[1, :][None, :], axis=1))
+    n2 = np.argmin(np.linalg.norm(p_fp - box_fp[2, :][None, :], axis=1))
 
-    lt1 = np.linalg.norm(p_im[n1,:]-p_im[n0,:])
-    lt2 = np.linalg.norm(p_im[n1,:]-p_im[n2,:])
+    lt1 = np.linalg.norm(p_im[n1, :] - p_im[n0, :])
+    lt2 = np.linalg.norm(p_im[n1, :] - p_im[n2, :])
 
-    s =  max(lt1/l1,lt2/l2)
+    s = max(lt1 / l1, lt2 / l2)
     if not np.isfinite(s):
         s = 1.0
     return s
 
-def get_text_placement_mask(xyz,mask,plane,pad=2,viz=False):
+
+def get_text_placement_mask(xyz, mask, plane, pad=2, viz=False):
     """
     Returns a binary mask in which text can be placed.
     Also returns a homography from original image
@@ -216,133 +220,136 @@ def get_text_placement_mask(xyz,mask,plane,pad=2,viz=False):
     REGION : DICT output of TextRegions.get_regions
     PAD : number of pixels to pad the placement-mask by
     """
-    _,contour,hier = cv2.findContours(mask.copy().astype('uint8'),
-                                    mode=cv2.RETR_CCOMP,
-                                    method=cv2.CHAIN_APPROX_SIMPLE)
+    contour, hier = cv2.findContours(mask.copy().astype('uint8'),
+                                     mode=cv2.RETR_CCOMP,
+                                     method=cv2.CHAIN_APPROX_SIMPLE)
     contour = [np.squeeze(c).astype('float') for c in contour]
-    #plane = np.array([plane[1],plane[0],plane[2],plane[3]])
-    H,W = mask.shape[:2]
+    # plane = np.array([plane[1],plane[0],plane[2],plane[3]])
+    H, W = mask.shape[:2]
 
     # bring the contour 3d points to fronto-parallel config:
-    pts,pts_fp = [],[]
-    center = np.array([W,H])/2
-    n_front = np.array([0.0,0.0,-1.0])
+    pts, pts_fp = [], []
+    center = np.array([W, H]) / 2
+    n_front = np.array([0.0, 0.0, -1.0])
     for i in range(len(contour)):
         cnt_ij = contour[i]
         xyz = su.DepthCamera.plane2xyz(center, cnt_ij, plane)
-        R = su.rot3d(plane[:3],n_front)
+        R = su.rot3d(plane[:3], n_front)
         xyz = xyz.dot(R.T)
-        pts_fp.append(xyz[:,:2])
+        pts_fp.append(xyz[:, :2])
         pts.append(cnt_ij)
 
     # unrotate in 2D plane:
     rect = cv2.minAreaRect(pts_fp[0].copy().astype('float32'))
     box = np.array(cv2.boxPoints(rect))
     R2d = su.unrotate2d(box.copy())
-    box = np.vstack([box,box[0,:]]) #close the box for visualization
+    box = np.vstack([box, box[0, :]])  # close the box for visualization
+
+    mu = np.median(pts_fp[0], axis=0)
+    pts_tmp = (pts_fp[0] - mu[None, :]).dot(R2d.T) + mu[None, :]
+    boxR = (box - mu[None, :]).dot(R2d.T) + mu[None, :]
 
-    mu = np.median(pts_fp[0],axis=0)
-    pts_tmp = (pts_fp[0]-mu[None,:]).dot(R2d.T) + mu[None,:]
-    boxR = (box-mu[None,:]).dot(R2d.T) + mu[None,:]
-    
     # rescale the unrotated 2d points to approximately
     # the same scale as the target region:
-    s = rescale_frontoparallel(pts_tmp,boxR,pts[0])
+    s = rescale_frontoparallel(pts_tmp, boxR, pts[0])
     boxR *= s
     for i in range(len(pts_fp)):
-        pts_fp[i] = s*((pts_fp[i]-mu[None,:]).dot(R2d.T) + mu[None,:])
+        pts_fp[i] = s * ((pts_fp[i] - mu[None, :]).dot(R2d.T) + mu[None, :])
 
     # paint the unrotated contour points:
-    minxy = -np.min(boxR,axis=0) + pad//2
-    ROW = np.max(ssd.pdist(np.atleast_2d(boxR[:,0]).T))
-    COL = np.max(ssd.pdist(np.atleast_2d(boxR[:,1]).T))
+    minxy = -np.min(boxR, axis=0) + pad // 2
+    ROW = np.max(ssd.pdist(np.atleast_2d(boxR[:, 0]).T))
+    COL = np.max(ssd.pdist(np.atleast_2d(boxR[:, 1]).T))
 
-    place_mask = 255*np.ones((int(np.ceil(COL))+pad, int(np.ceil(ROW))+pad), 'uint8')
+    place_mask = 255 * np.ones((int(np.ceil(COL)) + pad, int(np.ceil(ROW)) + pad), 'uint8')
 
-    pts_fp_i32 = [(pts_fp[i]+minxy[None,:]).astype('int32') for i in range(len(pts_fp))]
-    cv2.drawContours(place_mask,pts_fp_i32,-1,0,
+    pts_fp_i32 = [(pts_fp[i] + minxy[None, :]).astype('int32') for i in range(len(pts_fp))]
+    cv2.drawContours(place_mask, pts_fp_i32, -1, 0,
                      thickness=cv2.FILLED,
-                     lineType=8,hierarchy=hier)
-    
-    if not TextRegions.filter_rectified((~place_mask).astype('float')/255):
+                     lineType=8, hierarchy=hier)
+
+    if not TextRegions.filter_rectified((~place_mask).astype('float') / 255):
         return
 
     # calculate the homography
-    H,_ = cv2.findHomography(pts[0].astype('float32').copy(),
-                             pts_fp_i32[0].astype('float32').copy(),
-                             method=0)
+    H, _ = cv2.findHomography(pts[0].astype('float32').copy(),
+                              pts_fp_i32[0].astype('float32').copy(),
+                              method=0)
 
-    Hinv,_ = cv2.findHomography(pts_fp_i32[0].astype('float32').copy(),
-                                pts[0].astype('float32').copy(),
-                                method=0)
+    Hinv, _ = cv2.findHomography(pts_fp_i32[0].astype('float32').copy(),
+                                 pts[0].astype('float32').copy(),
+                                 method=0)
     if viz:
-        plt.subplot(1,2,1)
+        plt.subplot(1, 2, 1)
         plt.imshow(mask)
-        plt.subplot(1,2,2)
+        plt.subplot(1, 2, 2)
         plt.imshow(~place_mask)
-        plt.hold(True)
         for i in range(len(pts_fp_i32)):
-            plt.scatter(pts_fp_i32[i][:,0],pts_fp_i32[i][:,1],
-                        edgecolors='none',facecolor='g',alpha=0.5)
+            plt.scatter(pts_fp_i32[i][:, 0], pts_fp_i32[i][:, 1],
+                        edgecolors='none', facecolor='g', alpha=0.5)
         plt.show()
 
-    return place_mask,H,Hinv
+    return place_mask, H, Hinv
+
 
-def viz_masks(fignum,rgb,seg,depth,label):
+def viz_masks(fignum, rgb, seg, depth, label):
     """
     img,depth,seg are images of the same size.
     visualizes depth masks for top NOBJ objects.
     """
-    def mean_seg(rgb,seg,label):
+
+    def mean_seg(rgb, seg, label):
         mim = np.zeros_like(rgb)
         for i in np.unique(seg.flat):
-            mask = seg==i
-            col = np.mean(rgb[mask,:],axis=0)
-            mim[mask,:] = col[None,None,:]
-        mim[seg==0,:] = 0
+            mask = seg == i
+            col = np.mean(rgb[mask, :], axis=0)
+            mim[mask, :] = col[None, None, :]
+        mim[seg == 0, :] = 0
         return mim
 
-    mim = mean_seg(rgb,seg,label)
+    mim = mean_seg(rgb, seg, label)
 
     img = rgb.copy()
-    for i,idx in enumerate(label):
-        mask = seg==idx
-        rgb_rand = (255*np.random.rand(3)).astype('uint8')
-        img[mask] = rgb_rand[None,None,:] 
+    for i, idx in enumerate(label):
+        mask = seg == idx
+        rgb_rand = (255 * np.random.rand(3)).astype('uint8')
+        img[mask] = rgb_rand[None, None, :]
 
-    #import scipy
-    # scipy.misc.imsave('seg.png', mim)
-    # scipy.misc.imsave('depth.png', depth)
-    # scipy.misc.imsave('txt.png', rgb)
-    # scipy.misc.imsave('reg.png', img)
+    # import scipy
+    # imageio.imwrite('seg.png', mim)
+    # imageio.imwrite('depth.png', depth)
+    # imageio.imwrite('txt.png', rgb)
+    # imageio.imwrite('reg.png', img)
 
     plt.close(fignum)
     plt.figure(fignum)
-    ims = [rgb,mim,depth,img]
+    ims = [rgb, mim, depth, img]
     for i in range(len(ims)):
-        plt.subplot(2,2,i+1)
+        plt.subplot(2, 2, i + 1)
         plt.imshow(ims[i])
     plt.show(block=False)
 
-def viz_regions(img,xyz,seg,planes,labels):
+
+def viz_regions(img, xyz, seg, planes, labels):
     """
     img,depth,seg are images of the same size.
     visualizes depth masks for top NOBJ objects.
     """
     # plot the RGB-D point-cloud:
-    su.plot_xyzrgb(xyz.reshape(-1,3),img.reshape(-1,3))
+    su.plot_xyzrgb(xyz.reshape(-1, 3), img.reshape(-1, 3))
 
     # plot the RANSAC-planes at the text-regions:
-    for i,l in enumerate(labels):
-        mask = seg==l
-        xyz_region = xyz[mask,:]
-        su.visualize_plane(xyz_region,np.array(planes[i]))
+    for i, l in enumerate(labels):
+        mask = seg == l
+        xyz_region = xyz[mask, :]
+        su.visualize_plane(xyz_region, np.array(planes[i]))
 
-    mym.view(180,180)
+    mym.view(180, 180)
     mym.orientation_axes()
     mym.show(True)
- 
-def viz_textbb(fignum,text_im, bb_list,alpha=1.0):
+
+
+def viz_textbb(fignum, text_im, bb_list, alpha=1.0):
     """
     text_im : image containing text
     bb_list : list of 2x4xn_i boundinb-box matrices
@@ -350,34 +357,30 @@ def viz_textbb(fignum,text_im, bb_list,alpha=1.0):
     plt.close(fignum)
     plt.figure(fignum)
     plt.imshow(text_im)
-    plt.hold(True)
-    H,W = text_im.shape[:2]
+    H, W = text_im.shape[:2]
     for i in range(len(bb_list)):
         bbs = bb_list[i]
         ni = bbs.shape[-1]
         for j in range(ni):
-            bb = bbs[:,:,j]
-            bb = np.c_[bb,bb[:,0]]
-            plt.plot(bb[0,:], bb[1,:], 'r', linewidth=2, alpha=alpha)
-    plt.gca().set_xlim([0,W-1])
-    plt.gca().set_ylim([H-1,0])
+            bb = bbs[:, :, j]
+            bb = np.c_[bb, bb[:, 0]]
+            plt.plot(bb[0, :], bb[1, :], 'r', linewidth=2, alpha=alpha)
+    plt.gca().set_xlim([0, W - 1])
+    plt.gca().set_ylim([H - 1, 0])
     plt.show(block=False)
 
+
 class RendererV3(object):
 
     def __init__(self, data_dir, max_time=None):
         self.text_renderer = tu.RenderFont(data_dir)
         self.colorizer = Colorize(data_dir)
-        #self.colorizerV2 = colorV2.Colorize(data_dir)
-
-        self.min_char_height = 8 #px
-        self.min_asp_ratio = 0.4 #
-
+        self.min_char_height = 8  # px
+        self.min_asp_ratio = 0.4  #
         self.max_text_regions = 7
-
         self.max_time = max_time
 
-    def filter_regions(self,regions,filt):
+    def filter_regions(self, regions, filt):
         """
         filt : boolean list of regions to keep.
         """
@@ -386,27 +389,27 @@ def filter_regions(self,regions,filt):
             regions[k] = [regions[k][i] for i in idx]
         return regions
 
-    def filter_for_placement(self,xyz,seg,regions):
+    def filter_for_placement(self, xyz, seg, regions):
         filt = np.zeros(len(regions['label'])).astype('bool')
-        masks,Hs,Hinvs = [],[], []
-        for idx,l in enumerate(regions['label']):
-            res = get_text_placement_mask(xyz,seg==l,regions['coeff'][idx],pad=2)
+        masks, Hs, Hinvs = [], [], []
+        for idx, l in enumerate(regions['label']):
+            res = get_text_placement_mask(xyz, seg == l, regions['coeff'][idx], pad=2)
             if res is not None:
-                mask,H,Hinv = res
+                mask, H, Hinv = res
                 masks.append(mask)
                 Hs.append(H)
                 Hinvs.append(Hinv)
                 filt[idx] = True
-        regions = self.filter_regions(regions,filt)
+        regions = self.filter_regions(regions, filt)
         regions['place_mask'] = masks
         regions['homography'] = Hs
         regions['homography_inv'] = Hinvs
 
         return regions
 
-    def warpHomography(self,src_mat,H,dst_size):
+    def warpHomography(self, src_mat, H, dst_size):
         dst_mat = cv2.warpPerspective(src_mat, H, dst_size,
-                                      flags=cv2.WARP_INVERSE_MAP|cv2.INTER_LINEAR)
+                                      flags=cv2.WARP_INVERSE_MAP | cv2.INTER_LINEAR)
         return dst_mat
 
     def homographyBB(self, bbs, H, offset=None):
@@ -419,22 +422,22 @@ def homographyBB(self, bbs, H, offset=None):
         """
         eps = 1e-16
         # check the shape of the BB array:
-        t,f,n = bbs.shape
-        assert (t==2) and (f==4)
+        t, f, n = bbs.shape
+        assert (t == 2) and (f == 4)
 
         # append 1 for homogenous coordinates:
-        bbs_h = np.reshape(np.r_[bbs, np.ones((1,4,n))], (3,4*n), order='F')
+        bbs_h = np.reshape(np.r_[bbs, np.ones((1, 4, n))], (3, 4 * n), order='F')
         if offset != None:
-            bbs_h[:2,:] += np.array(offset)[:,None]
+            bbs_h[:2, :] += np.array(offset)[:, None]
 
         # perpective:
         bbs_h = H.dot(bbs_h)
-        bbs_h /= (bbs_h[2,:]+eps)
+        bbs_h /= (bbs_h[2, :] + eps)
 
-        bbs_h = np.reshape(bbs_h, (3,4,n), order='F')
-        return bbs_h[:2,:,:]
+        bbs_h = np.reshape(bbs_h, (3, 4, n), order='F')
+        return bbs_h[:2, :, :]
 
-    def bb_filter(self,bb0,bb,text):
+    def bb_filter(self, bb0, bb, text):
         """
         Ensure that bounding-boxes are not too distorted
         after perspective distortion.
@@ -443,99 +446,95 @@ def bb_filter(self,bb0,bb,text):
         bb  : 2x4xn matrix of BB after perspective
         text: string of text -- for excluding symbols/punctuations.
         """
-        h0 = np.linalg.norm(bb0[:,3,:] - bb0[:,0,:], axis=0)
-        w0 = np.linalg.norm(bb0[:,1,:] - bb0[:,0,:], axis=0)
-        hw0 = np.c_[h0,w0]
+        h0 = np.linalg.norm(bb0[:, 3, :] - bb0[:, 0, :], axis=0)
+        w0 = np.linalg.norm(bb0[:, 1, :] - bb0[:, 0, :], axis=0)
+        hw0 = np.c_[h0, w0]
 
-        h = np.linalg.norm(bb[:,3,:] - bb[:,0,:], axis=0)
-        w = np.linalg.norm(bb[:,1,:] - bb[:,0,:], axis=0)
-        hw = np.c_[h,w]
+        h = np.linalg.norm(bb[:, 3, :] - bb[:, 0, :], axis=0)
+        w = np.linalg.norm(bb[:, 1, :] - bb[:, 0, :], axis=0)
+        hw = np.c_[h, w]
 
         # remove newlines and spaces:
         text = ''.join(text.split())
-        assert len(text)==bb.shape[-1]
+        assert len(text) == bb.shape[-1]
 
         alnum = np.array([ch.isalnum() for ch in text])
-        hw0 = hw0[alnum,:]
-        hw = hw[alnum,:]
+        hw0 = hw0[alnum, :]
+        hw = hw[alnum, :]
 
-        min_h0, min_h = np.min(hw0[:,0]), np.min(hw[:,0])
-        asp0, asp = hw0[:,0]/hw0[:,1], hw[:,0]/hw[:,1]
+        min_h0, min_h = np.min(hw0[:, 0]), np.min(hw[:, 0])
+        asp0, asp = hw0[:, 0] / hw0[:, 1], hw[:, 0] / hw[:, 1]
         asp0, asp = np.median(asp0), np.median(asp)
 
-        asp_ratio = asp/asp0
-        is_good = ( min_h > self.min_char_height
-                    and asp_ratio > self.min_asp_ratio
-                    and asp_ratio < 1.0/self.min_asp_ratio)
+        asp_ratio = asp / asp0
+        is_good = (min_h > self.min_char_height
+                   and asp_ratio > self.min_asp_ratio
+                   and asp_ratio < 1.0 / self.min_asp_ratio)
         return is_good
 
-
     def get_min_h(selg, bb, text):
         # find min-height:
-        h = np.linalg.norm(bb[:,3,:] - bb[:,0,:], axis=0)
+        h = np.linalg.norm(bb[:, 3, :] - bb[:, 0, :], axis=0)
         # remove newlines and spaces:
         text = ''.join(text.split())
-        assert len(text)==bb.shape[-1]
+        assert len(text) == bb.shape[-1]
 
         alnum = np.array([ch.isalnum() for ch in text])
         h = h[alnum]
         return np.min(h)
 
-
     def feather(self, text_mask, min_h):
         # determine the gaussian-blur std:
-        if min_h <= 15 :
+        if min_h <= 15:
             bsz = 0.25
-            ksz=1
+            ksz = 1
         elif 15 < min_h < 30:
-            bsz = max(0.30, 0.5 + 0.1*np.random.randn())
+            bsz = max(0.30, 0.5 + 0.1 * np.random.randn())
             ksz = 3
         else:
-            bsz = max(0.5, 1.5 + 0.5*np.random.randn())
+            bsz = max(0.5, 1.5 + 0.5 * np.random.randn())
             ksz = 5
-        return cv2.GaussianBlur(text_mask,(ksz,ksz),bsz)
+        return cv2.GaussianBlur(text_mask, (ksz, ksz), bsz)
 
-    def place_text(self,rgb,collision_mask,H,Hinv):
+    def place_text(self, rgb, collision_mask, H, Hinv):
         font = self.text_renderer.font_state.sample()
         font = self.text_renderer.font_state.init_font(font)
 
-        render_res = self.text_renderer.render_sample(font,collision_mask)
-        if render_res is None: # rendering not successful
-            return #None
+        render_res = self.text_renderer.render_sample(font, collision_mask)
+        if render_res is None:  # rendering not successful
+            return  # None
         else:
-            text_mask,loc,bb,text = render_res
+            text_mask, loc, bb, text = render_res
 
         # update the collision mask with text:
-        collision_mask += (255 * (text_mask>0)).astype('uint8')
+        collision_mask += (255 * (text_mask > 0)).astype('uint8')
 
         # warp the object mask back onto the image:
-        text_mask_orig = text_mask.copy()
+        # text_mask_orig = text_mask.copy()
         bb_orig = bb.copy()
-        text_mask = self.warpHomography(text_mask,H,rgb.shape[:2][::-1])
-        bb = self.homographyBB(bb,Hinv)
+        text_mask = self.warpHomography(text_mask, H, rgb.shape[:2][::-1])
+        bb = self.homographyBB(bb, Hinv)
 
-        if not self.bb_filter(bb_orig,bb,text):
-            #warn("bad charBB statistics")
-            return #None
+        if not self.bb_filter(bb_orig, bb, text):
+            # warn("bad charBB statistics")
+            return  # None
 
         # get the minimum height of the character-BB:
-        min_h = self.get_min_h(bb,text)
+        min_h = self.get_min_h(bb, text)
 
-        #feathering:
+        # feathering:
         text_mask = self.feather(text_mask, min_h)
+        im_final = self.colorizer.color(rgb, [text_mask], np.array([min_h]))
 
-        im_final = self.colorizer.color(rgb,[text_mask],np.array([min_h]))
-
-        return im_final, text, bb, collision_mask
-
+        return im_final, text, bb, text_mask
 
     def get_num_text_regions(self, nregions):
-        #return nregions
+        # return nregions
         nmax = min(self.max_text_regions, nregions)
         if np.random.rand() < 0.10:
             rnd = np.random.rand()
         else:
-            rnd = np.random.beta(5.0,1.0)
+            rnd = np.random.beta(5.0, 1.0)
         return int(np.ceil(nmax * rnd))
 
     def char2wordBB(self, charBB, text):
@@ -551,54 +550,58 @@ def char2wordBB(self, charBB, text):
         """
         wrds = text.split()
         bb_idx = np.r_[0, np.cumsum([len(w) for w in wrds])]
-        wordBB = np.zeros((2,4,len(wrds)), 'float32')
-        
+        wordBB = np.zeros((2, 4, len(wrds)), 'float32')
+
         for i in range(len(wrds)):
-            cc = charBB[:,:,bb_idx[i]:bb_idx[i+1]]
+            cc = charBB[:, :, bb_idx[i]:bb_idx[i + 1]]
 
             # fit a rotated-rectangle:
             # change shape from 2x4xn_i -> (4*n_i)x2
-            cc = np.squeeze(np.concatenate(np.dsplit(cc,cc.shape[-1]),axis=1)).T.astype('float32')
+            cc = np.squeeze(np.concatenate(np.dsplit(cc, cc.shape[-1]), axis=1)).T.astype('float32')
             rect = cv2.minAreaRect(cc.copy())
             box = np.array(cv2.boxPoints(rect))
 
             # find the permutation of box-coordinates which
             # are "aligned" appropriately with the character-bb.
             # (exhaustive search over all possible assignments):
-            cc_tblr = np.c_[cc[0,:],
-                            cc[-3,:],
-                            cc[-2,:],
-                            cc[3,:]].T
+            cc_tblr = np.c_[cc[0, :],
+                            cc[-3, :],
+                            cc[-2, :],
+                            cc[3, :]].T
             perm4 = np.array(list(itertools.permutations(np.arange(4))))
             dists = []
             for pidx in range(perm4.shape[0]):
-                d = np.sum(np.linalg.norm(box[perm4[pidx],:]-cc_tblr,axis=1))
+                d = np.sum(np.linalg.norm(box[perm4[pidx], :] - cc_tblr, axis=1))
                 dists.append(d)
-            wordBB[:,:,i] = box[perm4[np.argmin(dists)],:].T
+            wordBB[:, :, i] = box[perm4[np.argmin(dists)], :].T
 
         return wordBB
 
-
-    def render_text(self,rgb,depth,seg,area,label,ninstance=1,viz=False):
+    def render_text(self, rgb, depth, seg, area, label, ninstance=1):
         """
-        rgb   : HxWx3 image rgb values (uint8)
-        depth : HxW depth values (float)
-        seg   : HxW segmentation region masks
-        area  : number of pixels in each region
-        label : region labels == unique(seg) / {0}
-               i.e., indices of pixels in SEG which
-               constitute a region mask
-        ninstance : no of times image should be
-                    used to place text.
-
-        @return:
+        This method is rendering and
+
+        Args:
+            rgb   : HxWx3 image rgb values (uint8)
+            depth : HxW depth values (float)
+            seg   : HxW segmentation region masks
+            area  : number of pixels in each region
+            label : region labels == unique(seg) / {0}
+                   i.e., indices of pixels in SEG which
+                   constitute a region mask
+            ninstance : number of times image should be
+                        used to place text.
+
+        Returns:
             res : a list of dictionaries, one for each of 
                   the image instances.
                   Each dictionary has the following structure:
-                      'img' : rgb-image with text on it.
-                      'bb'  : 2x4xn matrix of bounding-boxes
+                      'img'  : rgb-image with text on it.
+                      'bb'   : 2x4xn matrix of bounding-boxes
                               for each character in the image.
-                      'txt' : a list of strings.
+                      'txt'  : a list of strings.
+                      'masks': a list of masks of text placed on the image.
+                              Shape of each mask is the same as shape of original image.
 
                   The correspondence b/w bb and txt is that
                   i-th non-space white-character in txt is at bb[:,:,i].
@@ -609,33 +612,34 @@ def render_text(self,rgb,depth,seg,area,label,ninstance=1,viz=False):
         try:
             # depth -> xyz
             xyz = su.DepthCamera.depth2xyz(depth)
-            
+
             # find text-regions:
-            regions = TextRegions.get_regions(xyz,seg,area,label)
+            regions = TextRegions.get_regions(xyz, seg, area, label)
 
             # find the placement mask and homographies:
-            regions = self.filter_for_placement(xyz,seg,regions)
+            regions = self.filter_for_placement(xyz, seg, regions)
 
             # finally place some text:
             nregions = len(regions['place_mask'])
-            if nregions < 1: # no good region to place text on
+            if nregions < 1:  # no good region to place text on
                 return []
         except:
             # failure in pre-text placement
-            #import traceback
+            # import traceback
             traceback.print_exc()
             return []
 
         res = []
         for i in range(ninstance):
+            # place_masks - is a local copy of list of collision masks. it's updated, but is not really used.
             place_masks = copy.deepcopy(regions['place_mask'])
 
-            print (colorize(Color.CYAN, " ** instance # : %d"%i))
+            print(colorize(Color.CYAN, " ** instance # : %d" % i))
 
-            idict = {'img':[], 'charBB':None, 'wordBB':None, 'txt':None}
+            idict = {'img': [], 'charBB': None, 'wordBB': None, 'txt': None}
 
-            m = self.get_num_text_regions(nregions)#np.arange(nregions)#min(nregions, 5*ninstance*self.max_text_regions))
-            reg_idx = np.arange(min(2*m,nregions))
+            m = self.get_num_text_regions(nregions)
+            reg_idx = np.arange(min(2 * m, nregions))
             np.random.shuffle(reg_idx)
             reg_idx = reg_idx[:m]
 
@@ -643,25 +647,30 @@ def render_text(self,rgb,depth,seg,area,label,ninstance=1,viz=False):
             img = rgb.copy()
             itext = []
             ibb = []
+            masks = []
 
             # process regions: 
             num_txt_regions = len(reg_idx)
-            NUM_REP = 5 # re-use each region three times:
+            NUM_REP = 5  # re-use each region three times:
             reg_range = np.arange(NUM_REP * num_txt_regions) % num_txt_regions
+
+            if DEBUG:
+                print("    ... try text rendering for %s regions", len(reg_range))
+
             for idx in reg_range:
                 ireg = reg_idx[idx]
                 try:
                     if self.max_time is None:
-                        txt_render_res = self.place_text(img,place_masks[ireg],
+                        txt_render_res = self.place_text(img, place_masks[ireg],
                                                          regions['homography'][ireg],
                                                          regions['homography_inv'][ireg])
                     else:
                         with time_limit(self.max_time):
-                            txt_render_res = self.place_text(img,place_masks[ireg],
+                            txt_render_res = self.place_text(img, place_masks[ireg],
                                                              regions['homography'][ireg],
                                                              regions['homography_inv'][ireg])
                 except TimeoutException as msg:
-                    print (msg)
+                    print(msg)
                     continue
                 except:
                     traceback.print_exc()
@@ -669,25 +678,24 @@ def render_text(self,rgb,depth,seg,area,label,ninstance=1,viz=False):
                     continue
 
                 if txt_render_res is not None:
+                    if DEBUG:
+                        print("    ... text rendering attempt finished successfully")
                     placed = True
-                    img,text,bb,collision_mask = txt_render_res
+                    img, text, bb, collision_mask = txt_render_res
                     # update the region collision mask:
-                    place_masks[ireg] = collision_mask
+                    # place_masks[ireg] = collision_mask  # no point of doing that, already updated inside place_text method
+                    masks.append(collision_mask)
                     # store the result:
                     itext.append(text)
                     ibb.append(bb)
 
-            if  placed:
+            if placed:
                 # at least 1 word was placed in this instance:
                 idict['img'] = img
                 idict['txt'] = itext
                 idict['charBB'] = np.concatenate(ibb, axis=2)
                 idict['wordBB'] = self.char2wordBB(idict['charBB'].copy(), ' '.join(itext))
+                idict['masks'] = masks
+                idict['labeled_region'] = regions['label']
                 res.append(idict.copy())
-                if viz:
-                    viz_textbb(1,img, [idict['wordBB']], alpha=1.0)
-                    viz_masks(2,img,seg,depth,regions['label'])
-                    # viz_regions(rgb.copy(),xyz,seg,regions['coeff'],regions['label'])
-                    if i < ninstance-1:
-                        raw_input(colorize(Color.BLUE,'continue?',True))                    
         return res
diff --git a/visualize_results.py b/visualize_results.py
index 7b578c6..c143635 100644
--- a/visualize_results.py
+++ b/visualize_results.py
@@ -24,7 +24,6 @@ def viz_textbb(text_im, charBB_list, wordBB, alpha=1.0):
     plt.close(1)
     plt.figure(1)
     plt.imshow(text_im)
-    plt.hold(True)
     H,W = text_im.shape[:2]
 
     # plot the character-BB: