The Laplacian pyramid and the Discrete Wavelet Transform are two closed numerical transforms that can be used to obtain a multiresolution representation of a image.
The $N$-levels Gaussian pyramid of a image $I$ is defined as
\begin{equation} \left\{\begin{array}{rcl} L^0 & = & I \\ L^{n+1} & = & \text{REDUCE}(L^n);~n\in\{0,1,\cdots,N-1\} \end{array}\right. \end{equation}where the $\text{REDUCE}(s)$ operator is carried out by convolving $I$ with a Gaussian kernel $w$ (the unit impulse response, also known as the transfer function, of the corresponding Gaussian low-pass filter). A Gaussian kernel is the evaluation of a Gaussian function at (for example, 5, in the 1-dimensional case) values centered at zero and, the Gaussian coefficients must satisfy that
\begin{equation} \sum_{m=-2}^2w(m)=1 \end{equation}(the gain of the filter is $1$)
\begin{equation} w(i)=w(-i); i=0,1,2 \end{equation}(the kernel is symmetric), and being $w[0]=a$, $w[-1]=w[1]=b$ and $w[-2]=w[2]=c$, it holds that
\begin{equation} a=2b-2c. \end{equation}Notice that in this case, $w=\{\frac{1}{16}, \frac{4}{16}, \frac{6}{16}, \frac{4}{16}, \frac{1}{16}\}$. This is the 1D kernel. The 2D version can be extended or the 1D can be applied by rows and then by columns, or viceversa.
As it will be seen in the demo (see below), the Gaussian scale space exhibits several interesting properties:
The last stage of the $\text{REDUCE}(\cdot)$ operator is the decimation (the even or the odd samples are removed) of the filtered image. This is a lossless stage (without aliasing) only in the case the half of the high-frequency components of $L^n$ are not available in $L^{n+1}$. Notice that, as a consequence of the decimation, the overall energy of $L^{n+1}$ will be, on average, 1/4 of the energy of $L^n$.
# Import the urllib.request — extensible library for opening URLs module, the urllib package is included in the standard library.
import urllib.request
# Import Python bindings for OpenCV, a computer vision library. Install the package with "sudo pip3 install opencv-python
". Recomended reading OpenCV-Python Tutorials.
import cv2
# Import NumPy, an efficient array manipulation library. Install with "pip3 install numpy
".
import numpy as np
# Import the pyplot API module from matplotlib package, a library for 2D plotting. This will allow us to plot data, interactively. Install with "sudo pip3 install matplotlib
".
from matplotlib import pyplot as plt
# Configure matplotlib for plotting inside Jupyter Notebook (not in external windows).
%matplotlib inline
# Request the lena image. This function returns a file-like object.
HTTP_response = urllib.request.urlopen('http://www.hpca.ual.es/~vruiz/images/lena.png')
# Read the complete "file" HTTP_response
as a bytes object. This (inmutable) object is transformed in a (mutable) bytearray object, as the NumPy's asarray (function) converter, requires. The final result is an array object containing unsigned integers of 8 bits.
arr = np.asarray(bytearray(HTTP_response.read()), dtype=np.uint8)
# Decompress the content of the input array. Returns an image object.
BGR = cv2.imdecode(arr,-1)
# Convert the image to the YCrCb color space.
Y = cv2.cvtColor(BGR, cv2.COLOR_BGR2YCrCb)[:,:,0]
#tmp = cv2.cvtColor(I, cv2.COLOR_BGR2RGB)
#I = tmp
plt.imshow(Y, cmap="gray")
# Compute the pyramid. We use a list for storing the levels of the pyramid.
N = 5 # Number of levels of the pyhramid
L = [Y] # L^0 (the base of the pyramid)
for n in range(N-1):
print(n, len(L), len(L[n]))
L.append(cv2.pyrDown(L[n]))
plt.imshow(L[0], cmap="gray")
plt.imshow(L[1], cmap="gray")
plt.imshow(L[2], cmap="gray")
plt.imshow(L[3], cmap="gray")
plt.imshow(L[4], cmap="gray")
In the Gaussian pyramid, the image $L^{N-1}$ (the top level) may serve as a prediction for the image (next below level) $L^{N−2}$, and if this prediction (the interpolation of $L^{N-1}$) is substracted to $L^{N-2}$, the resulting error signal $H^{N-2}$ can have a lower entropy than the original $L^{N-2}$. This process can be applied to the complete Gaussian pyramid, level by level, obtaining a Laplacian pyramid by means of:
\begin{equation} \left\{\begin{array}{rcl} H^{N-1} & = & L^{N-1} \text{ /* the top level of the Gaussian pyramid */}\\ H^n & = & L^n - \text{EXPAND}(L^{n+1});~n\in\{N-2,N-3,\cdots,0\} \end{array}\right. \end{equation}The EXPAND operator performs bidimensional a 1/2-subpixel interpolation.
H = [L[N-1]]
for n in range(N-1):
interpolation = cv2.pyrUp(L[N-n-1])
difference = L[N-n-2] - interpolation + 128
H.append(difference)
H.reverse()
plt.imshow(H[0], cmap="gray") # Base of the pyramid
plt.imshow(H[1], cmap="gray")
plt.imshow(H[2], cmap="gray")
plt.imshow(H[3], cmap="gray")
plt.imshow(H[4], cmap="gray")
# Compute (and plot) the histograms of the pyramid using cv2.calcHist()
.
sum_hist = np.zeros([256,1], np.float64)
for n in range(N-1):
print(n)
hist = cv2.calcHist([H[n]], [0], None, [256], [0, 256])
sum_hist += hist
plt.plot(sum_hist)
plt.xlim([0,256])
plt.show()
# Count the number of elements in each component of the pyramid.
n_pels = 0
for n in range(N):
n_pels += ((Y.shape[0]+1) >> n) * ((Y.shape[1]+1) >> n)
print("Number of elements in the original image =", Y.shape[0]*Y.shape[1])
print("Number or elements in (a component of) the pyramid =", n_pels, "(",100.*(n_pels/(1.*Y.shape[0]*Y.shape[1])-1),"% of increment)")
# Install scipy
with pip3 install scipy
to use "scipy.stats.entropy
".
import scipy.stats as st
entropy = st.entropy(sum_hist)[0]
print("Entropy of the pyramid =", entropy)
Compute the entropy of the original lena image.
The original image $I$ can be recovered from $\{H^n; n=N-1,\cdots,0\}$, the Laplacian pyramid, using the procedure:
\begin{equation} \left\{\begin{array}{rcl} I & = & H^{N-1} \\ I & = & \text{EXPAND}(I) + H^n;~n\in\{N-2,N-3,\cdots,0\} \end{array}\right. \end{equation}Notice that, basically, we have reconstructed (in-place) the whole Gaussian Pyramid.
R = H[N-1]
for n in range(N-1):
R = cv2.pyrUp(R)
R = R + H[N-n-2] - 128
plt.imshow(R, cmap="gray")
# Check if the reconstruction is lossless
print((R == Y).all())
The DWT can be computed using two different techniques. The first one is through a cascade of quadrature mirror filters banks, also known as PRFBs (Perfect Reconstruction Filter Banks). The second one, using Lifting. The main differences between both alternatives are:
However, in order to create a comparison between pyramids and wavelets, we will work, first, with the PRFB option.
The one-step inverse DWT of a signal $s$ can be defined by:
\begin{equation} s = \uparrow^2(L)*{\cal L}^{-1} + \uparrow^2(H)*{\cal H}^{-1} \end{equation}where $L$ and $H$ are the low-pass subband and the high-pass subband, respectively, generated by the forward DWT, and ${\cal L}^{-1}$ and ${\cal H}^{-1}$ are the synthesis filters of the "inverse transform". The subbands are computed by convolving the signal $S$ as
\begin{equation} \begin{array}{rcl} L & = & \downarrow^2(s*{\cal L})\\ H & = & \downarrow^2(s*{\cal H}), \end{array} \end{equation}where ${\cal L}$ and ${\cal H}$ are (the response to the unit impulse signal of) the analysis filters of the "forward transform". If we express these filters in the $Z$-domain, in the case of a PRFB it holds that
\begin{equation} \begin{array}{rcl} {\cal L}^{-1}(z) & = & {\cal L}(-z)\\ {\cal H}^{-1}(z) & = & -{\cal H}(-z). \end{array} \end{equation}In other words, in the case of the low-pass synthesis filter ${\cal L}^{-1}$, it is the same than ${\cal L}$, but the filter has been modulated by the signal $(-1)^n; n\in\mathbb{N}$, and in the case of the high-pass synthesis filter ${\cal H}^{-1}$, the coeffients of the filter also has the oposite sign. Because the frequency response of real-domain signals (and also the transfer function of the filters) are periodic in the Fourier domain, with period $\pi$ (the frequency of the signal $(-1)^n; n\in\mathbb{N}$), in the practice we can implement the PRFB considering that
\begin{equation} \begin{array}{rcl} {\cal L}^{-1}(z) & = & {\cal L}(z)\\ {\cal H}^{-1}(z) & = & -{\cal H}(z). \end{array} \end{equation}This is the definition of the 1-level DWT. The $N$-levels DWT is computed by applying the 1-level DWT to the $L$ subband, recursively:
\begin{equation} \left\{\begin{array}{rcl} L^N & = & \downarrow^2(L^{N-1}*{\cal L}) \\ H^N & = & \downarrow^2(L^{N-1}*{\cal H}) \end{array}\right. \end{equation}By definition, $L^0=s$.
Considering the previous equation, it can be easely deduced that the LPT is described by
\begin{equation} \left\{\begin{array}{rcl} L^N & = & \downarrow^2(L^{N-1}*{\cal L}) \\ H^N & = & L^{N-1}*{\cal H}. \end{array}\right. \end{equation}Therefore, it is possible to compute the LPT by means of the DWT if the coefficients are interpolated. In the case of a image, we have for the 1-level DWT the subbands:
+------+------+
| L | LH |
+------+------+
| HL | HH |
+------+------+
# Install PyWavelets with "sudo pip3 install pywavelets
".
import pywt
# Compute the DWT of each component
print (pywt.wavelist())
wavelet = 'db7'
plt.imshow(Y, cmap='gray')
print(Y.shape)
# N-levels 2D DWT (WAVElet DEComposition)
coeffs = pywt.wavedec2(Y, wavelet, mode='per', level=N)
for l in range(0,N+1):
print(coeffs[l][0].shape, coeffs[l][1].shape, coeffs[l][2].shape)
# Reorder the H-DWT levels in the H_dwt list
H_dwt = [None]*(N+1)
LL = coeffs[0] # LL-N
for i in range(N):
H_dwt[N-i-1] = coeffs[i+1]
print(N-i-1, i+1, H_dwt[N-i-1][0].shape)
plt.imshow(H_dwt[0][2], cmap='gray')
type(H_dwt[1])
# Compute the pyramid
H_level = []
for l in range(0, N, 1):
# Create a matrix of zeros to use it as the LL subband in each level of the LPT
zero_LL = np.zeros((H_dwt[l][0].shape[0],H_dwt[l][0].shape[1]), 'float32')
DWT_level = (zero_LL, H_dwt[l])
LPT_level = pywt.idwt2(DWT_level, wavelet, mode='per')
print(l, LPT_level.shape)
H_level.append(LPT_level)
plt.imshow(H_level[0], cmap='gray')
plt.imshow(H_level[1], cmap='gray')
plt.imshow(H_level[2], cmap='gray')
plt.imshow(H_level[3], cmap='gray')
plt.imshow(H_level[4], cmap='gray')
plt.imshow(LL, cmap='gray')
sum_hist = np.zeros([256,1], np.float64)
for n in range(N):
print(n)
hist = cv2.calcHist([H_level[n].astype('uint8')+128], [0], None, [256], [0, 256])
sum_hist += hist
plt.plot(sum_hist)
plt.xlim([0,256])
plt.show()
entropy = st.entropy(sum_hist)[0]
print("Entropy of the pyramid =", entropy)
+----+
+------------------------>| LL |---------------+
| +----+ |
| v
+---------+ +-+--+----+ +---------+ +----+----+ +---------+
| | DWT(Y) | LL | LH | iDWT (2) | | DWT(H) | LL | lh | iDWT (4) | |
| Y |------->+----+----+---------------->| H |------->+----+----+----------------->| y |
| | (1) | HL | HH | (0, LH, HL, HH) | | (3) | hl | hh | (LL, lh, hl, hh) | |
+---------+ +----+----+ +---------+ +----+----+ +---------+
import urllib.request
import cv2
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
import scipy.stats as st
HTTP_response = urllib.request.urlopen('http://www.hpca.ual.es/~vruiz/images/lena.png')
arr = np.asarray(bytearray(HTTP_response.read()), dtype=np.uint8)
BGR = cv2.imdecode(arr,-1)
Y = cv2.cvtColor(BGR, cv2.COLOR_BGR2YCrCb)[:,:,0]
plt.figure(figsize = (10,10))
plt.imshow(Y, cmap="gray")
import pywt
# Sólo por ver los distintos filtros que hay disponibles
print (pywt.wavelist())
# Seleccionamos una en concreto
wavelet = 'db9' # http://wavelets.pybytes.com/wavelet/db9/
#wavelet = 'haar'
#wavelet='bior3.5'
# (1)
LL, (LH, HL, HH) = pywt.dwt2(Y, wavelet)
print(Y[10][10])
print(LL[10][10])
# (2)
zero_subband = np.zeros(HH.shape, np.float64)
H = pywt.idwt2((zero_subband, (LH, HL, HH)), wavelet)
print(H[10][10])
# (3)
should_be_zero, (lh, hl, hh) = pywt.dwt2(H, wavelet)
# (4)
y = pywt.idwt2((LL, (lh, hl, hh)), wavelet)
print(y[10][10])
plt.figure(figsize = (10,10))
plt.imshow(y, cmap="gray")
diff = Y-y
plt.figure(figsize = (10,10))
plt.imshow(diff, cmap="gray")