Source code for compute_energy

# -*- coding: utf-8 -*-
from time import time

import cv2 
import numpy as np
from tqdm import tqdm

import utils 


###############################################################################
[docs]def compute_energy_data(
    frame_index, 
    sequence, 
    window_side=10, 
    sigma_c=1.0, 
    sigma_d=2.5):
    """Compute the data cost term, for frame *frame_index*, to be used 
    for the LBP algorithm.

    This function computes an array with shape `[m, h, w]`, where `m` is 
    the number of depth labels used, `h` and `w` are respectively the 
    pictures' height and width.
    The value in this array at position `(d, y, x)`, is inversely 
    proportional to the likelihood that pixel `(y,x)` has disparity `d`.
    this value is computed using multi-stereo photo consistency constraints,
    and, if ``sequence.use_bundle()==True``, geometric consistency with previously
    estimated depth-maps (*bundle optimization*).
    The result of this function should be used as **data cost** for the LBP
    algorithm (see :func:`lbp.lbp`).

    See Also
    --------
    lbp.lbp: *Loopy Belief Propagation* implementation.

    Parameters
    ----------
    frame_index : int
        Frame whose depth-map is estimated.

    sequence : utils.Sequence
        Object containing parameters necessary to the depth-maps estimation.
        It contains the camera matrices, picture arrays, length of the
        sequence, etc. If *sequence* contains also previously estimated 
        depth-maps, then the *bundle optimization* phase is also executed.

    Returns
    -------
    numpy array, type float32
        Array with shape `[m, h, w]`, with `m` the number of possible depth 
        labels, and `h` and `w` height and width of frame *frame_index*.
    """
    
    # TODO check input validity
    
    # set the range of possible frames 
    t, start, end = frame_index, 0, sequence.end - sequence.start
    if t - window_side < start :
        window_start = start
        window_end = min(end, window_start + window_side*2 + 1)
    elif t + window_side + 1> end:
        window_end = end
        window_start = max(start, window_end - window_side*2 - 1)
    else:
        window_start = t - window_side
        window_end = t + window_side + 1 
    
    # get paramaters
    h, w = sequence.height, sequence.width
    levels = sequence.depth_levels
    depth_values = np.linspace(
        sequence.depth_min,
        sequence.depth_max,
        sequence.depth_levels,
        dtype=np.float32)
    
    # get image at frame cf
    I_t = sequence.I[t]
    
    # initialize indices coordinates
    x_h = homogeneous_coord_grid(h,w) #[3, h, w]
    d = np.zeros([h, w], dtype=np.float32) #[h, w]
    
    # initialize Likelihood table L 
    L = np.zeros([levels, h, w], dtype=np.float32) #[levels, h, w]

    
    for t_prime in tqdm(range(window_start, window_end)):
        # skip if same image
        if t_prime == t: continue  
    
        # get image at frame i
        I_t_prime = sequence.I[t_prime]
        
        for level in range(levels):
            # photo-consistency constraint ------------------------------------
            
            # fill each position with the current level depht value
            d[:,:] = depth_values[level] 

            # compute conjugate pixel position using the depth level at time t
            x_prime_h = conujugate_coordinates(
                    sequence=sequence,
                    pose1=t,
                    pose2=t_prime,
                    coorsxy=x_h, 
                    d=d)
            
            # remap the image w.r.t. the projected pixels
            x_prime = np.transpose(x_prime_h[:2,:,:], [1,2,0])
            I_t_prime_projected = cv2.remap(
                    src=I_t_prime, 
                    map1=x_prime, 
                    map2=None, 
                    interpolation=cv2.INTER_NEAREST, 
                    borderValue=[128, 128, 128])
            
            # compute the photo constraint term (pc)
            color_difference = L2_norm(I_t, I_t_prime_projected, keepdims=False)
            pc = sigma_c/(sigma_c + color_difference)
            
            # check if bundle optimization is required
            if not sequence.use_bundle():
                # update likelyhood using photo-consistency contraint
                L[level,:,:] += pc

            else:
                # geometric-consistency constraint ----------------------------
                
                # compute candidate pixels using previously estimated depth values
                depthmap = sequence.D[t_prime] # get prev. estimated depth 
                d = cv2.remap(
                        src=depthmap, 
                        map1=x_prime, 
                        map2=None, 
                        interpolation=cv2.INTER_NEAREST, 
                        borderValue=int(levels/2.0))
                
                # fill the matrix d with the conjugate pixels' depth value indices
                #np.take(depth_values, depth_indices_projected, out=d)
                
                # project back from t_prime to t using prev. estimated depth values
                projected_x_prime_h = conujugate_coordinates(
                        sequence=sequence,
                        pose1=t_prime,
                        pose2=t,
                        coorsxy=x_prime_h,
                        d=d)
    
                # compute norm of diff. between original and projected coord 
                color_difference_norm = np.sum(
                    np.square(x_h - projected_x_prime_h), 
                    axis=0,
                    keepdims=False)
                
                # compute the bundle optimization term (pv)
                pv = np.exp(color_difference_norm/(-2*sigma_d*sigma_d))
                
                # update likelyhood using photo and geometric constraints
                L[level,:,:] += pc*pv
                
    # compute and return the computed energy values
    u = np.reciprocal(L.max(axis=0, keepdims=True))
    return 1 - u*L

#------------------------------------------------------------------------------
[docs]def conujugate_coordinates(sequence, pose1, pose2, coorsxy, d):
    """Return the image pixel coordinates with respect of two different camera poses.

    Parameters
    ----------
    sequence : utils.Sequence
        Sequence object containing the camera parameters.
     
    pose1 : int
        Index of the first pose in *camera*
       
    pose2 : int
        Index of the second pose in *camera*
       
    coorsxy : numpy array, type float32
        Homogeneous camera coordinates of shape [3, h, w], the first axis
        represents the coordinate itself.
        
    d : numpy array, type float32
        Array of shape [h,w] that indicates the disparity to use for a 
        certain pixel while computing the conjugate point.
    
    Returns
    -------
    numpy array, type float32
        Array  with shape [3, h, w] representing the conjugate coordinates.
    """
    
    # check input validity ----------------------------------------------------
    #assert type(pose1) == int and 0 <= pose1 < len(camera)
    #assert type(pose2) == int and 0 <= pose2 < len(camera)
    #assert isinstance(camera, utils.Camera)
    
    K = sequence.K
    R1, T1 = sequence.Rs[pose1], sequence.Ts[pose1]
    R2, T2 = sequence.Rs[pose2], sequence.Ts[pose2]
    h, w = sequence.height, sequence.width
    
    assert isinstance(coorsxy, np.ndarray)
    assert isinstance(d, np.ndarray)
    
    assert coorsxy.dtype == np.float32
    assert d.dtype == np.float32
    
    assert coorsxy.shape == (3, h, w)
    assert d.shape == (h, w)
    
    # compute pixel candidates w.r.t. the given camera parameters -------------
    coorsxy = np.reshape(coorsxy, [3,-1])
    d = np.reshape(d, [1, -1])

    depth = (T1-T2).T * d 
    remap = (K*R2.T) * ((R1*K.I) * coorsxy + depth)
    remap = np.divide(remap, remap[2, :])
    remap = np.reshape(np.asarray(remap), [3, h, w])
    return remap

#------------------------------------------------------------------------------
[docs]def L2_norm(img_a, img_b, keepdims=True):
    """Compute the norm of the per-pixel difference between the two images.
    
    Parameters
    ----------
    img_a :  numpy array, type float32
        First image, shape [h, w, 3]
        
    img_b : numpy array, type float32
        Second image, shape [h, w, 3]
    
    Returns
    -------
    numpy array 
        Array representing the norm of the difference between the two images.
        The array has shape [h, w, 1] if ``keepdims==True``, [h, w] otherwise.
    """

    assert isinstance(img_a, np.ndarray)
    assert isinstance(img_b, np.ndarray)
    
    assert img_a.dtype == np.float32
    assert img_b.dtype == np.float32

    assert len(img_a.shape) == 3
    assert len(img_b.shape) == 3
    assert img_b.shape == img_a.shape

    return np.sqrt(np.sum(np.square(img_a-img_b), axis=-1, keepdims=keepdims))

#------------------------------------------------------------------------------
[docs]def homogeneous_coord_grid(h,w):
    '''Compute grid of homogeneous coordinates having three dimensions.
    
    Parameters
    ----------
    h : int
        height
    w : int
        width
     
    Returns
    -------
    out : numpy array, type float32
        Grid of homogeneous coordinates with shape [3, h, w], whose first
        axis indicates the coordinate itself, that is,
        
        | ``out[0, y, :] = y``
        | ``out[1, :, x] = x``
        | ``out[2, :, :] = 1``

    '''
    assert type(h) == int
    assert type(w) == int
    
    X, Y = np.float32(np.meshgrid(
                np.arange(w, dtype=np.float32),
                np.arange(h, dtype=np.float32),
                indexing='xy'))
    
    X, Y, Z = (np.expand_dims(X,axis=0), 
               np.expand_dims(Y,axis=0), 
               np.ones([1, h, w], np.float32))
    
    return np.concatenate([X, Y, Z], axis=0)

#------------------------------------------------------------------------------
[docs]def lambda_factor(image, ws=1.0, epsilon=1.0):
    '''Compute the smoothness weights for the LBP algorithm.

    This functions computes the edge weights that will be used during the
    executiong of the *Loopy Belief Propagation* algorithm.
    The values are inversely proportional to the difference between 
    adjacent pixels' colors.

    Notes
    -----
    the math formula used to compute the lambda factor for an edge `(x,y)`, 
    is given by

    .. math::

        \\lambda(x,y) = w_s\\cdot \\frac{u_\\lambda(x)}{||I(x) - I(y)|| + \\epsilon},

    with :math:`w_s` and :math:`\\epsilon` positive constant values, :math:`I(x)`
    the image's color at pixel `x`, and :math:`u_\\lambda(x)` is

    .. math::

        u_\\lambda(x) = {|N(x)|}\\big/{\\sum_{y'\\in N(x)} \\frac{1}{||I(x) - I(y')||+\\epsilon}}.

    See Also
    --------
    lbp.lbp : *Loopy Belief Propagation* implementation.

    Parameters
    ----------
    image : numpy array, type float32
        Array representing an image (so with shape [`h`, `w`, 3]).
     
    Returns 
    -------
    list of numpy arrays, type float32
      Said list contains four smoothness weight arrays with shape 
      [`h`, `w`]; one matrix per neighbor direction. More precisely, 
      if we denote the four matrices as :math:`M_{up}, M_{down}, M_{left}, M_{right}`,
      we have that

      * :math:`M_{up}[y,x] = \\lambda((y,x), (y+1, x)),`
      * :math:`M_{down}[y,x] = \\lambda((y,x), (y-1, x)),`
      * :math:`M_{left}[y,x] = \\lambda((y,x), (y, x+1)),`
      * :math:`M_{right}[y,x] = \\lambda((y,x), (y, x-1)).`

    '''
    assert isinstance(image, np.ndarray)
    assert image.dtype == np.float32
    assert len(image.shape) == 3
    
    h, w, _ = image.shape
    directions = utils.DIRECTIONS
    img = [None]*len(directions)
    lambda_factor = [None]*len(directions)
    
    up, do, le, ri = utils.UP,utils.DOWN, utils.LEFT, utils.RIGHT
    img[up] = cv2.warpAffine(image, utils.AFFINE_DIR[do], dsize=(w, h))
    img[do] = cv2.warpAffine(image, utils.AFFINE_DIR[up], dsize=(w, h))
    img[le] = cv2.warpAffine(image, utils.AFFINE_DIR[ri], dsize=(w, h))
    img[ri] = cv2.warpAffine(image, utils.AFFINE_DIR[le], dsize=(w, h))
    u_denominator = np.zeros([h,w], dtype=np.float32)
    
    # compute partial lambda factor and the denominator for the u_lambda term
    for j in directions:
        inverse_color_diff = np.float32(1)/(L2_norm(image, img[j], False) + epsilon)
        u_denominator += inverse_color_diff # compute denominator for u_lambda term
        lambda_factor[j] = inverse_color_diff # compute partial lambda factor
    
    # compute u_lambda term (approximation of the term, incorrect on image borders)
    u_lambda = np.float32(4)/u_denominator
    
    # compute final lambda smoothness weight for each edge
    for j in directions:
        lambda_factor[j] = ws*u_lambda*lambda_factor[j]
    
    return lambda_factor
Source code for compute_energy

IM_Project1_2018-2019

Navigation

Related Topics