In [1]:
import librosa
import scipy
import numpy as np
import matplotlib.pyplot as plt
import utils

Loading the audio file:

In [2]:
# http://freemusicarchive.org/music/The_Molecules_Art_Ensemble/Spaceship_in_blue_sky/1_-_Dancing_clowns
y, sr = librosa.load("The_Molecules_Art_Ensemble_-_01_-_Dancing_clowns.mp3", sr=44100)

m = len(y) // 2 # the middle of the song
t = 15 * sr # 15 seconds
y = y[m - t:m + t] # the central 30 seconds

plt.figure(figsize=(16, 7))
plt.plot(y)
plt.gca().set_aspect(utils.get_aspect(plt.gca(), 16, 9))

Generating the spectrogram:

In [3]:
n_fft = 2048
spectrogram = np.abs(librosa.core.stft(y, n_fft=n_fft, hop_length=512)) ** 2

plt.figure(figsize=(16, 7))
plt.imshow(spectrogram, origin="lower")
plt.colorbar()
plt.gca().set_aspect(utils.get_aspect(plt.gca(), 16, 9))

Displaying the mel filter bank:

In [6]:
mel_basis = librosa.filters.mel(sr, n_fft)

plt.figure(figsize=(16, 7))
for i in range(10): # plotting only the first 10 filters
    plt.plot(mel_basis[i][:20])
plt.gca().set_aspect(utils.get_aspect(plt.gca(), 16, 9))

Applying the mel filters to generate the melspectrogram:

In [7]:
melspectrogram = np.dot(mel_basis, spectrogram)

plt.figure(figsize=(16, 7))
plt.imshow(melspectrogram, origin="lower")
plt.colorbar()
plt.gca().set_aspect(utils.get_aspect(plt.gca(), 16, 9))

Converting to dB:

In [8]:
melspec_db = librosa.core.power_to_db(melspectrogram)

plt.figure(figsize=(16, 7))
plt.imshow(melspec_db, origin="lower")
plt.colorbar()
plt.gca().set_aspect(utils.get_aspect(plt.gca(), 16, 9))

Using the DCT to generate the MFCCgram:

In [10]:
mfccgram = scipy.fftpack.dct(melspec_db, axis=0, type=2, norm="ortho")[:20] # 20 coefficients

plt.figure(figsize=(16, 7))
plt.imshow(mfccgram, origin="lower")
plt.colorbar()
plt.gca().set_aspect(utils.get_aspect(plt.gca(), 16, 9))