In accordance with David's wishes that any changes be shared.......
%==========================================================
% lossyFLAC.m
%==========================================================
% David Robinson, 2007
% This code is open source. I'll pick a licence later (need advice), but:
% Any changes to this MATLAB code must be shared freely
% Any changes to the algorithm must be shared freely
% Any other implementations / interpretations must be shared freely
% You are free to include any implementation in commercial and/or closed source code, but must give an acknowledgement in help/about or similar.
% No warrantee / guarantee. This is work in progress and not debugged.
% Things to add:
% dither
% ms checking
% ignore digital silence
% work on .wav files in blocks
% look one before and after for short FFT
% Add 31/32 + retain 5 bits declipping option - done
% Lossless correction file
% optimise, inc:
% put 20log10 outside of min()
% buffer hanning
% Set the source path
%==========================================================
source_path='c:/data_nic/octave/wav/';
% external variables (i.e. 'switches' when implemented properly!)
%==========================================================
noise_threshold_shift=0;
% noise threshold shift. average level of added quantisation
% noise relative to the lowest amplitude frequency bin (default=0=equal!)
low_frequency_limit=20;
high_frequency_limit=16000;
% Frequency range over which to check for lowest amplitude signal
minimum_bits_to_keep=0;
choose_spread=1;
fix_clipped=0;
% 0 = do nothing;
% 1 - 8 = reduce by (2^n-1)/(2^n), stay at 16-bits, keep at least 5 bits;
% 9 = reduce by 6dB and switch to 24-bit;
flac_blocksize=1024;
% blocksize of lossless codec
% internal constants (i.e. no need to change them)
%==========================================================
noise_averages=1000;
inaudible=1E-10; % small number to add to audio to prevent log(0)
% Get a list of all .wav files in that source path
%==========================================================
filenamelist=dir([source_path "*.wav"]);
% Loop through them all, creating lossy versions
%==========================================================
for loop=1:length(filenamelist),
filename=filenamelist(loop).name;
% load file
[inaudio,fs,bs]=wavread_raw([filename]); % Octave puts path & filename into "filename".
% This reduces the amplitude by (2^n-1)/(2^n) and keeps at least 5 bits
% (1 = 1/2, 2 = 3/4, 3 = 7/8, 4 = 15/16, 5 = 31/32, 6 = 63/64, 7 = 127/128, 8 = 255/256)
if (fix_clipped>=1) & (fix_clipped<=8),
inaudio=inaudio.*((2^(fix_clipped)-1)/2^fix_clipped);
minimum_bits_to_keep=max(minimum_bits_to_keep,5);
end
% This pads it to 23 bits: It's treated as 24 bit data at the end, effectively dropping it by 6dB - Same as fix_clipped=1 but saves last bit.
if (fix_clipped==9),
inaudio = inaudio.*(2^(23-bs));
bs=23;
end
[samples channels]=size(inaudio);
% create integer copy (MATLAB wav(e)read loads audio with the range +/-1) - removed, now dealing with integer values.
% inaudio_int=inaudio.*(2^(bs-1))+inaudible;
% Set up the FFT analysis lengths - define these in time (seconds)
% (will then use the nearest power of two based on sampling frequency)
clear analysis_time spreading_function fft_length low_frequency_bin high_frequency_bin reference_thresholds reference_threshold min_bin bits_to_remove bits_to_remove_table;
analysis_time(1)=2.0E-2; % 20ms
analysis_time(2)=1.5E-3; % 1.5ms
number_of_analyses=length(analysis_time);
% spreading function to apply to FFT before determining lowest amplitude. Keep peak at centre, even if it means padding with zeros
if (choose_spread >= 1) & (choose_spread <= 10),
tcs=(0.5/(choose_spread+1));
else
tcs=(0.25);
end
spreading_function{1}=[tcs,0.5-tcs,0.5-tcs,tcs];
spreading_function{2}=[tcs,0.5-tcs,0.5-tcs,tcs];
% Loop through each analysis length (typically only two) and set the FFT values
for analysis_number=1:number_of_analyses,
% Calculate the closest FFT length
fft_length(analysis_number)=2^round(log10(analysis_time(analysis_number)*fs)/log10(2));
% Generate window function
window_function{analysis_number}=hanning(fft_length(analysis_number));
% Calculate which FFT bin corresponds to the low frequency limit
low_frequency_bin(analysis_number)=round(fft_length(analysis_number)*low_frequency_limit/fs+((length(spreading_function)-1)/2));
if low_frequency_bin(analysis_number)<2, low_frequency_bin(analysis_number)=2; end;
if low_frequency_bin(analysis_number)>fft_length(analysis_number)/2, error('low frequency too high'); end;
% Calculate which FFT bin corresponds to the high frequency limit
high_frequency_bin(analysis_number)=round(fft_length(analysis_number)*high_frequency_limit/fs+((length(spreading_function)-1)/2));
if high_frequency_bin(analysis_number)<2, error('high frequency too low'); end;
if high_frequency_bin(analysis_number)>fft_length(analysis_number)/2, high_frequency_bin(analysis_number)=fft_length(analysis_number)/2; end;
end
variables_filename=['lossy_variables__fs' num2str(fs) '_bs' num2str(bs) '_noa' num2str(number_of_analyses) '_fft' num2str(fft_length) '_lfb' num2str(low_frequency_bin) '_hfb' num2str(high_frequency_bin) '_nts' num2str(noise_threshold_shift) '_sprfunc' num2str(choose_spread) '.mat'];
% Find out if we've stored the quantisation noise thresholds before
if exist(variables_filename,'file'),
load(variables_filename);
% If not, estimate quantisation noise at each bit in these FFTs
else
for analysis_number=1:number_of_analyses,
clear reference_thresholds;
reference_thresholds(1:noise_averages,1:bs)=zeros;
for av_number=1:noise_averages,
noise_sample=rand(fft_length(analysis_number),1);
for bits_to_remove=1:bs,
% This models the quantisation noise introduced by truncating the last "bits_to_remove" bits from the audio data:
this_noise_sample=floor(noise_sample.*((2^bits_to_remove)))-(2^(bits_to_remove-1));
fft_result=20*log10(conv(abs(fft(this_noise_sample.*window_function{analysis_number})),spreading_fun
ction{analysis_number}));
reference_thresholds(av_number,bits_to_remove)=mean(fft_result(low_frequency_bin(analysis_number):hi
gh_frequency_bin(analysis_number)));
end
end
reference_threshold{analysis_number}=mean(reference_thresholds)-noise_threshold_shift;
for threshold=1:round(20*log10(2^(bs+4))),
if isempty(find(reference_threshold{analysis_number}<threshold)),
threshold_index{analysis_number}(threshold)=0;
else
threshold_index{analysis_number}(threshold)=max(find(reference_threshold{analysis_number}<threshold));
end;
end;
end;
save('-mat',variables_filename,'threshold_index');
end;
% Loop through each analysis length (typically only two) finding minimum value (min_bin) in each FFT
for analysis_number=1:number_of_analyses,
min_bin{analysis_number}(1:floor(samples/(fft_length(analysis_number)/2))-1,1:channels)=zeros;
% Perform spectral analysis
for block_start=1:fft_length(analysis_number)/2:samples-fft_length(analysis_number),
block_number=1+(block_start-1)/(fft_length(analysis_number)/2);
% On last (partial) block, just do to end of file (better than processing beyond end of file with zero pad, because that would
% add a hard cut-off transition on gapless files, giving an artificially high spectrum)
if block_start<samples-fft_length(analysis_number),
actual_block_start=block_start;
else
actual_block_start=samples-fft_length(analysis_number);
end;
for channel=1:channels,
fft_result=conv(abs(fft(window_function{analysis_number}.*inaudio(actual_block_start:actual_block_st
art+fft_length(analysis_number)-1,channel))),spreading_function{analysis_number});
min_bin{analysis_number}(block_number,channel)=20*log10(min(fft_result(low_frequency_bin(analysis_nu
mber):high_frequency_bin(analysis_number))));
end;
end;
min_bin_length(analysis_number)=length(min_bin{analysis_number}(:,1));
end;
clear bits_to_remove;
bits_to_remove(1:ceil(samples/flac_blocksize))=zeros;
% loop through flac blocks
for block_start=1:flac_blocksize:samples,
block_number=1+round(block_start/flac_blocksize);
block_end=block_start+flac_blocksize-1;
if block_end>samples, block_end=samples; end; % Don't jump past end of file!
for analysis_number=1:number_of_analyses,
first_block=(block_start-1)/(fft_length(analysis_number)/2);
last_block=first_block+(flac_blocksize/(fft_length(analysis_number)/2));
if first_block<1, first_block=1; end; % Don't jump before start of file
if last_block>min_bin_length(analysis_number), last_block=min_bin_length(analysis_number); end; % Don't jump past end of file!
if last_block<first_block, first_block=last_block; end;
for channel=1:channels,
this_min_bin=round(min(min_bin{analysis_number}(first_block:last_block,channel)));
if this_min_bin<1, % i.e. if it's quieter than quantisation noise at the least significant bit
bits_to_remove_table(analysis_number,channel)=0; % don't remove any bits!
else
bits_to_remove_table(analysis_number,channel)=threshold_index{analysis_number}(this_min_bin);
end;
end;
end;
bits_to_remove(block_number)=min(min(bits_to_remove_table));
bits_to_remove(block_number)=bs-max((bs-bits_to_remove(block_number)),minimum_bits_to_keep);
if bits_to_remove(block_number)>0,
twoval=(2^bits_to_remove(block_number));
inaudio(block_start:block_end,1:channels)=round(inaudio(block_start:block_end,1:channels)/twoval).*twoval;
end
end
if fix_clipped==9, bs=24; end
wavwrite_raw([filename(1:length(filename)-4) '.ss.wav'],inaudio,fs,bs)
% Make a .bat file to call FLAC twice for comparison: lossless and lossy
% Note comparison might not be fair, since -b1024 itself gives better
% compression on _some_ samples
fid=fopen('temp.bat','w');
dummy=fprintf(fid,'%s\n',['"C:\\Program Files\\FLAC\\flac.exe" -b' num2str(flac_blocksize) ' -f "' filename '"']);
dummy=fprintf(fid,'%s\n',['"C:\\Program Files\\FLAC\\flac.exe" -b' num2str(flac_blocksize) ' -f "' filename(1:length(filename)-4) '.ss.wav"']);
dummy=fclose(fid);
% Run the .bat file
% !temp.bat
%==========================================================
end
%==========================================================
Having fun with Octave now - it just seems to be slow on the first processing - subsequent processing is quicker
Nick.