octave_packages/signal-1.1.3/specgram.m

   1 ## Copyright (C) 1999-2001 Paul Kienzle <pkienzle@users.sf.net>
   2 ##
   3 ## This program is free software; you can redistribute it and/or modify it under
   4 ## the terms of the GNU General Public License as published by the Free Software
   5 ## Foundation; either version 3 of the License, or (at your option) any later
   6 ## version.
   7 ##
   8 ## This program is distributed in the hope that it will be useful, but WITHOUT
   9 ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  11 ## details.
  12 ##
  13 ## You should have received a copy of the GNU General Public License along with
  14 ## this program; if not, see <http://www.gnu.org/licenses/>.
  15
  16 ## usage: [S [, f [, t]]] = specgram(x [, n [, Fs [, window [, overlap]]]])
  17 ##
  18 ## Generate a spectrogram for the signal. This chops the signal into
  19 ## overlapping slices, windows each slice and applies a Fourier
  20 ## transform to determine the frequency components at that slice.
  21 ##
  22 ## x: vector of samples
  23 ## n: size of fourier transform window, or [] for default=256
  24 ## Fs: sample rate, or [] for default=2 Hz
  25 ## window: shape of the fourier transform window, or [] for default=hanning(n)
  26 ##    Note: window length can be specified instead, in which case
  27 ##    window=hanning(length)
  28 ## overlap: overlap with previous window, or [] for default=length(window)/2
  29 ##
  30 ## Return values
  31 ##    S is complex output of the FFT, one row per slice
  32 ##    f is the frequency indices corresponding to the rows of S.
  33 ##    t is the time indices corresponding to the columns of S.
  34 ##    If no return value is requested, the spectrogram is displayed instead.
  35 ##
  36 ## Example
  37 ##    x = chirp([0:0.001:2],0,2,500);  # freq. sweep from 0-500 over 2 sec.
  38 ##    Fs=1000;                  # sampled every 0.001 sec so rate is 1 kHz
  39 ##    step=ceil(20*Fs/1000);    # one spectral slice every 20 ms
  40 ##    window=ceil(100*Fs/1000); # 100 ms data window
  41 ##    specgram(x, 2^nextpow2(window), Fs, window, window-step);
  42 ##
  43 ##    ## Speech spectrogram
  44 ##    [x, Fs] = auload(file_in_loadpath("sample.wav")); # audio file
  45 ##    step = fix(5*Fs/1000);     # one spectral slice every 5 ms
  46 ##    window = fix(40*Fs/1000);  # 40 ms data window
  47 ##    fftn = 2^nextpow2(window); # next highest power of 2
  48 ##    [S, f, t] = specgram(x, fftn, Fs, window, window-step);
  49 ##    S = abs(S(2:fftn*4000/Fs,:)); # magnitude in range 0<f<=4000 Hz.
  50 ##    S = S/max(S(:));           # normalize magnitude so that max is 0 dB.
  51 ##    S = max(S, 10^(-40/10));   # clip below -40 dB.
  52 ##    S = min(S, 10^(-3/10));    # clip above -3 dB.
  53 ##    imagesc(t, f, flipud(log(S)));   # display in log scale
  54 ##
  55 ## The choice of window defines the time-frequency resolution.  In
  56 ## speech for example, a wide window shows more harmonic detail while a
  57 ## narrow window averages over the harmonic detail and shows more
  58 ## formant structure. The shape of the window is not so critical so long
  59 ## as it goes gradually to zero on the ends.
  60 ##
  61 ## Step size (which is window length minus overlap) controls the
  62 ## horizontal scale of the spectrogram. Decrease it to stretch, or
  63 ## increase it to compress. Increasing step size will reduce time
  64 ## resolution, but decreasing it will not improve it much beyond the
  65 ## limits imposed by the window size (you do gain a little bit,
  66 ## depending on the shape of your window, as the peak of the window
  67 ## slides over peaks in the signal energy).  The range 1-5 msec is good
  68 ## for speech.
  69 ##
  70 ## FFT length controls the vertical scale.  Selecting an FFT length
  71 ## greater than the window length does not add any information to the
  72 ## spectrum, but it is a good way to interpolate between frequency
  73 ## points which can make for prettier spectrograms.
  74 ##
  75 ## After you have generated the spectral slices, there are a number of
  76 ## decisions for displaying them.  First the phase information is
  77 ## discarded and the energy normalized:
  78 ##
  79 ##     S = abs(S); S = S/max(S(:));
  80 ##
  81 ## Then the dynamic range of the signal is chosen.  Since information in
  82 ## speech is well above the noise floor, it makes sense to eliminate any
  83 ## dynamic range at the bottom end.  This is done by taking the max of
  84 ## the magnitude and some minimum energy such as minE=-40dB. Similarly,
  85 ## there is not much information in the very top of the range, so
  86 ## clipping to a maximum energy such as maxE=-3dB makes sense:
  87 ##
  88 ##     S = max(S, 10^(minE/10)); S = min(S, 10^(maxE/10));
  89 ##
  90 ## The frequency range of the FFT is from 0 to the Nyquist frequency of
  91 ## one half the sampling rate.  If the signal of interest is band
  92 ## limited, you do not need to display the entire frequency range. In
  93 ## speech for example, most of the signal is below 4 kHz, so there is no
  94 ## reason to display up to the Nyquist frequency of 10 kHz for a 20 kHz
  95 ## sampling rate.  In this case you will want to keep only the first 40%
  96 ## of the rows of the returned S and f.  More generally, to display the
  97 ## frequency range [minF, maxF], you could use the following row index:
  98 ##
  99 ##     idx = (f >= minF & f <= maxF);
 100 ##
 101 ## Then there is the choice of colormap.  A brightness varying colormap
 102 ## such as copper or bone gives good shape to the ridges and valleys. A
 103 ## hue varying colormap such as jet or hsv gives an indication of the
 104 ## steepness of the slopes.  The final spectrogram is displayed in log
 105 ## energy scale and by convention has low frequencies on the bottom of
 106 ## the image:
 107 ##
 108 ##     imagesc(t, f, flipud(log(S(idx,:))));
 109
 110 function [S_r, f_r, t_r] = specgram(x, n = min(256, length(x)), Fs = 2, window = hanning(n), overlap = ceil(length(window)/2))
 111
 112   if nargin < 1 || nargin > 5
 113     print_usage;
 114   ## make sure x is a vector
 115   elseif columns(x) != 1 && rows(x) != 1
 116     error ("specgram data must be a vector");
 117   end
 118   if columns(x) != 1, x = x'; end
 119
 120   ## if only the window length is given, generate hanning window
 121   if length(window) == 1, window = hanning(window); end
 122
 123   ## should be extended to accept a vector of frequencies at which to
 124   ## evaluate the fourier transform (via filterbank or chirp
 125   ## z-transform)
 126   if length(n)>1,
 127     error("specgram doesn't handle frequency vectors yet");
 128   endif
 129
 130   ## compute window offsets
 131   win_size = length(window);
 132   if (win_size > n)
 133     n = win_size;
 134     warning ("specgram fft size adjusted to %d", n);
 135   end
 136   step = win_size - overlap;
 137
 138   ## build matrix of windowed data slices
 139   offset = [ 1 : step : length(x)-win_size ];
 140   S = zeros (n, length(offset));
 141   for i=1:length(offset)
 142     S(1:win_size, i) = x(offset(i):offset(i)+win_size-1) .* window;
 143   endfor
 144
 145   ## compute fourier transform
 146   S = fft (S);
 147
 148   ## extract the positive frequency components
 149   if rem(n,2)==1
 150     ret_n = (n+1)/2;
 151   else
 152     ret_n = n/2;
 153   end
 154   S = S(1:ret_n, :);
 155
 156   f = [0:ret_n-1]*Fs/n;
 157   t = offset/Fs;
 158   if nargout==0
 159     imagesc(t, f, 20*log10(abs(S)));
 160     set (gca (), "ydir", "normal");
 161     xlabel ("Time")
 162     ylabel ("Frequency")
 163   endif
 164   if nargout>0, S_r = S; endif
 165   if nargout>1, f_r = f; endif
 166   if nargout>2, t_r = t; endif
 167
 168 endfunction
 169
 170 %!shared S,f,t,x
 171 %! Fs=1000;
 172 %! x = chirp([0:1/Fs:2],0,2,500);  # freq. sweep from 0-500 over 2 sec.
 173 %! step=ceil(20*Fs/1000);    # one spectral slice every 20 ms
 174 %! window=ceil(100*Fs/1000); # 100 ms data window
 175 %! [S, f, t] = specgram(x);
 176
 177 %! ## test of returned shape
 178 %!assert (rows(S), 128)
 179 %!assert (columns(f), rows(S))
 180 %!assert (columns(t), columns(S))
 181 %!test [S, f, t] = specgram(x');
 182 %!assert (rows(S), 128)
 183 %!assert (columns(f), rows(S));
 184 %!assert (columns(t), columns(S));
 185 %!error (isempty(specgram([])));
 186 %!error (isempty(specgram([1, 2 ; 3, 4])));
 187 %!error (specgram)
 188
 189 %!demo
 190 %! Fs=1000;
 191 %! x = chirp([0:1/Fs:2],0,2,500);  # freq. sweep from 0-500 over 2 sec.
 192 %! step=ceil(20*Fs/1000);    # one spectral slice every 20 ms
 193 %! window=ceil(100*Fs/1000); # 100 ms data window
 194 %!
 195 %! ## test of automatic plot
 196 %! [S, f, t] = specgram(x);
 197 %! specgram(x, 2^nextpow2(window), Fs, window, window-step);
 198 %! disp("shows a diagonal from bottom left to top right");
 199 %! input("press enter:","s");
 200 %!
 201 %! ## test of returned values
 202 %! S = specgram(x, 2^nextpow2(window), Fs, window, window-step);
 203 %! imagesc(20*log10(flipud(abs(S))));
 204 %! disp("same again, but this time using returned value");
 205
 206 %!demo
 207 %! ## Speech spectrogram
 208 %! [x, Fs] = auload(file_in_loadpath("sample.wav")); # audio file
 209 %! step = fix(5*Fs/1000);     # one spectral slice every 5 ms
 210 %! window = fix(40*Fs/1000);  # 40 ms data window
 211 %! fftn = 2^nextpow2(window); # next highest power of 2
 212 %! [S, f, t] = specgram(x, fftn, Fs, window, window-step);
 213 %! S = abs(S(2:fftn*4000/Fs,:)); # magnitude in range 0<f<=4000 Hz.
 214 %! S = S/max(max(S));         # normalize magnitude so that max is 0 dB.
 215 %! S = max(S, 10^(-40/10));   # clip below -40 dB.
 216 %! S = min(S, 10^(-3/10));    # clip above -3 dB.
 217 %! imagesc(flipud(20*log10(S)));
 218 %!
 219 %! % The image contains a spectrogram of 'sample.wav'