// Neural Song Learner - neural net that attempts to learn very short audio segments // Copyright 2008 Les Hall // This software is protected by the GNU General Public License // Warning: this is an artificial intelligence program with an unsupervised // learning algorithm. It is capable of teaching itself if given enough time // and resources. Increasing the neural net size and running it on a powerful // networked computer could potentially create a self-aware software entity. // Take precautions to prevent a possible Terminator-style man / machine // apocalyptic future. Never add network communication protocols to an // artificially intelligent program or you could inadvertently create a society // of superintelligent programs residing on the world wide web. You have been // warned. // parameters: hours of fun and entertainment! tweak them! "Neural_Song_Learner.wav" => string input_filename; // name of input file "Neural_Song_Learner_Out.wav" => string output_filename; // name of output file 3::second => dur song_duration; // duration of input song 20.0 => float song_gain; // adjust the volume of the song AFTER decimation (!) 256 => int num_samples; // number of samples in FFT (second / samp) / 16 => float bandwidth; // upper frequency limit on input layer 2 => int num_hidden_layers; // number of hidden layers in neural net 0.995 => float k; // learning coefficient multiplier 0.2 => float initial_learning_coefficient; // determines how fast learning occurs 1.0 => float initial_spectrum_probability; // stomps on input spectrum 0.0 => float minimum_spectrum_probability; // its lowest value, let sound leak in 0.1 => float randomization_factor; // how strong to randomize with each training step 10.0 => float error_threshold; // average error must be below this to complete learning 1.0 => float hidden_layer_scaler; // scales hidden layer, >1 for fat, <1 for skinny hidden layer 1.0 => float output_layer_scaler; // scales output layer 1.0 => float tau; // exponential constant for sigmoids // variables float phase; // phase of song, 0.0 = beginning, 1.0 = end; num_samples => int num_frame_size; // size of each sample frame num_frame_size::samp => dur loop_duration; // duration of each time loop num_samples / 2 => int num_freqs; // number of frequencies in FFT num_frame_size / 2 => int num_frame_freqs; // number of frequencies in each frame second / (num_frame_size::samp) => float f_bin; // frequency bin size (bandwidth / f_bin) $ int => int i_max; // maximum index of input layer (hidden_layer_scaler * i_max) $ int => int h_max; // maximum index of hidden layer (output_layer_scaler * i_max) $ int => int o_max; // maximum index of output layer num_hidden_layers + 1 => int num_layers; // number of layers with weights (hidden + output) complex saved_spectrum[num_frame_freqs]; // spectrum saved from previous loop iteration float output_samples[num_frame_size]; // manually collected output samples complex output_spectrum[num_frame_freqs]; // output IFFT spectrum complex input_spectrum[num_frame_freqs]; // input IFFT spectrum Math.max (h_max, o_max) $ int => int array_size; // size of arrays float weights[num_layers][array_size][array_size]; // the neural net weights float offsets[num_layers][array_size]; // the neural net offsets float sums[num_layers][array_size]; // the sums of input * weight float outputs[num_layers][array_size]; // the output values of the neurons float error_deltas[num_layers][array_size]; // the backprop error differences int song_iterations; // number of times the song has been played float least_squared_error; // sum of output neuron errors squared int learning_complete; // goes to 1 when error is below a threshold value initial_learning_coefficient => float learning_coefficient; // initialize learning rate initial_spectrum_probability => float spectrum_probability; // initialize stomping int output_recorded; // 1 when the output has begun recording float temp; // holds temporary values, for optimizing code for speed int toggle; // toggles between 0 and 1 to indicate when to play the processed input signal // // build layer table for indexing layer sizes in loops int layer_sizes[num_layers+1]; // the layer sizes table 0 = input, num_layers = output i_max => layer_sizes[0]; // the input layer size for (1 => int layer; layer < num_layers; layer++) { h_max => layer_sizes[layer]; // the hidden layer size } o_max => layer_sizes[num_layers]; // the output layer size // find number of neurans total int num_neurons; // the total number of neurons for (1 => int layer; layer <= num_layers; layer++) { layer_sizes[layer] +=> num_neurons; } // the patch WvIn wvin => FFT fft =^ IFFT ifft => dac.left; IFFT ifft2 => dac.right; SinOsc blip => dac; // the patch parameters num_frame_size => fft.size => ifft.size => ifft2.size; Windowing.hamming (num_frame_size) => fft.window => ifft.window => ifft2.window; 440.0 => blip.freq; 0.0 => blip.gain; // seed the neural network with random weights and offsets for (0 => int layer; layer < num_layers; layer++) { for (0 => int neuron; neuron < layer_sizes[layer+1]; neuron++) { layer_sizes[layer] * randomization_factor * Math.rand2f (-1.0, 1.0) => offsets[layer][neuron]; for (0 => int input; input < layer_sizes[layer]; input++) { randomization_factor * Math.rand2f (-1.0, 1.0) => weights[layer][neuron][input]; } } } // initial print out message <<<"", "">>>; <<<"Neural Song Learner", "">>>; <<<"Copyright 2008 Les Hall", "">>>; <<<"Running a total of", num_neurons, "neurons in", num_layers, "layers (output + hidden)">>>; <<>>; <<<"Filter bandwidth", bandwidth, "Hz, in", i_max, "*", f_bin, "Hz FFT/IFFT bins">>>; // fix a quirky little ChucK bug samp => now; // start the song for the first play wvin.path (input_filename); // the time loop while (true) { // track time and play song in an endless loop loop_duration / song_duration +=> phase; if (phase > 1.0) { // when song is over song_iterations++; // increment the song play counter if (!learning_complete) { if (least_squared_error <= error_threshold) { 1 => learning_complete; <<<"Learning complete.", "">>>; <<<"Switching to playback mode.", "">>>; } } <<<"song", song_iterations, "learn", learning_coefficient, "prob", spectrum_probability, "tau", tau, "err", least_squared_error>>>; // print the iteration number 0.0 => phase; k *=> learning_coefficient; if (learning_complete) { k *=> spectrum_probability; if (spectrum_probability < minimum_spectrum_probability) { minimum_spectrum_probability => spectrum_probability; } } 0.0 => least_squared_error; 0.2 => blip.gain; 250::ms => now; 0.0 => blip.gain; wvin.path (input_filename); // restart song 1 - toggle => toggle; } // compute the new FFT fft.upchuck ().cvals () @=> input_spectrum; // amplify by the song gain and add increasing randomness, then save spectrum // also stomp on upper frequencies for (0 => int frequency; frequency < num_frame_freqs; frequency++) { // amplify song_gain * input_spectrum[frequency] => input_spectrum[frequency]; // save and add some randomness to mess up the input signal somewhat ( (1.0 - spectrum_probability) * Math.rand2f (0.0, 1.0) ) $ complex + input_spectrum[frequency] => saved_spectrum[frequency]; if (frequency >= i_max) { // stomp on upper frequencies 0.0 $ complex => saved_spectrum[frequency]; } if (frequency >= o_max) { // stomp on upper frequencies 0.0 $ complex => input_spectrum[frequency]; } } // send it back out to the dac every other song play if (!toggle) { ifft.transform (saved_spectrum); } // forward propagate the network for (0 => int layer; layer < num_layers; layer++) { if (layer == 0) { // when this is the first input layer for (0 => int neuron; neuron < layer_sizes[layer+1]; neuron++) { // initialize the sums to their offset values offsets[layer][neuron] => sums[layer][neuron]; for (0 => int input; input < layer_sizes[layer]; input++) { // add up the inputs * the weights, and add feedback of previous outputs to input (saved_spectrum[input] $ polar).mag * weights[layer][neuron][input] +=> sums[layer][neuron]; } // apply sigmoid activation function 1.0 / (1.0 + Math.exp (-sums[layer][neuron] / tau) ) => outputs[layer][neuron]; } } else { // when this is after the first input layer for (0 => int neuron; neuron < layer_sizes[layer+1]; neuron++) { // initialize the sums to their offset values offsets[layer][neuron] => sums[layer][neuron]; for (0 => int input; input < layer_sizes[layer]; input++) { // add up the inputs * the weights outputs[layer-1][input] * weights[layer][neuron][input] +=> sums[layer][neuron]; } // apply sigmoid activation function 1.0 / (1.0 + Math.exp (-sums[layer][neuron] / tau) ) => outputs[layer][neuron]; } } } // construct the output IFFT spectrum for (0 => int frequency; frequency < num_frame_freqs; frequency++) { if (frequency < o_max) { outputs[num_layers-1][frequency] $ complex => output_spectrum[frequency]; } else { 0.0 $ complex => output_spectrum[frequency]; } } // perform IFFT on output spectrum every other song play if (toggle) { ifft2.transform (output_spectrum); } // train the network with backprop // first calculate the error deltas for (num_layers-1 => int layer; layer >= 0; layer--) { if (layer == (num_layers-1) ) { // if this is the output layer for (0 => int neuron; neuron < layer_sizes[layer+1]; neuron++) { (input_spectrum[neuron] $ polar).mag - outputs[layer][neuron] => error_deltas[layer][neuron]; } } else { // if this is a hidden layer for (0 => int input; input < layer_sizes[layer]; input++) { 0.0 => error_deltas[layer][input]; for (0 => int neuron; neuron < layer_sizes[layer+1]; neuron++) { // add the next layer errors times the weights through which they backpropagate error_deltas[layer+1][neuron] * weights[layer+1][neuron][input] +=> error_deltas[layer][input]; } } } } // then modify the weights according to the error deltas if (!learning_complete) { for (0 => int layer; layer < num_layers; layer++) { for (0 => int neuron; neuron < layer_sizes[layer+1]; neuron++) { learning_coefficient * error_deltas[layer][neuron] => temp; outputs[layer][neuron] * (1.0 - outputs[layer][neuron]) / tau *=> temp; temp +=> offsets[layer][neuron]; for (0 => int input; input < layer_sizes[layer]; input++) { if (layer == 0) { temp * (saved_spectrum[input] $ polar).mag +=> weights[layer][neuron][input]; } else { temp * outputs[layer-1][input] +=> weights[layer][neuron][input]; } } } } } // and calculate the least squared error for (0 => int frequency; frequency < o_max; frequency++) { error_deltas[num_layers-1][frequency] * error_deltas[num_layers-1][frequency] +=> least_squared_error; } // do the time warp num_frame_size::samp => now; }