`timescale 1ns / 1ps
// Engineer: Scott R. Gravenhorst
// email: music.maker@gte.net
// Date Started: 2011-03-04
// Design Name: Additive Flute Model Monosynth
// Description: Use sine table additive synth with noise and EGs.
//              Target 16 bit arithmetic at 100 kHz sample rate as proof of concept.
//              Noise generator will be 32 bit LFSR with 16 bit output
//              Phase accumulator will be 48 bits.
//              LFSR logic will be 32 bits wide with 16 bit output for eventual port to dsPIC.
//              Portamento and Pitchwheel may be implemented, probably not in ver_a.
//              Sysex model = 05
//
// NCO0 - fundamental sine
// NCO1 - harmonic 2
// NCO2 - harmonic 3
// NCO3 - noise
//
// ver_a: - First cut, basically working prototype.  There are minor problems, but it demonstrates
//          the principle.
//
// ver_b: - Velocity and mod wheel to change level of 2nd harmonic (overblow).
//        - Velocity to modulate general output level.
//        - Implement single pole IIR lowpass filter for portamento.
//
// ver_c: - try portamento with 32 bit lowpass IIR filter.  
//          ver_b uses 16 bit multiplies with a 21 bit input, which doesn't work well.
//          I'm not happy with the 32 bit version either, although it works a bit better.
//        - It seems that allowing velocity to change between DACena as well as allowing
//          GATE to change between DACena was the source of clicks.
//        - Disable portamento, not actually needed for a flute and it uses resources that will
//          be wasteful in a dsPIC.
//        * Testing indicates a click occurs when notes are started while the ADSR is not idle.
//
// ver_d: - The clicking came from velocity changes occurring during the release state of the ADSR.
//          The click occured because the state machine allowed velocity values to change during
//          release because there were no keys pressed.  A signal 'is_idle' has been added to the
//          ADSR which is held at 1 if the ADSR is idle.  The driving flipflop changes in state 0
//          of the ADSR.  All 4 is_idle states are ANDed together and then used to determine if
//          MCU_VEL should be transferred to VEL.
//
// ver_e: - Remove portamento IIR logic entirely.
//        - Implement SVF bandpass filter for tonal noise.  Had to upgrade desgin of SVF to 32 bits.
//        * This is sounding quite acceptable.
//
// ver_f: - Implement main amplitude modulation by channel pressure.  Using only channel pressure at
//          an effective amplitude, there is apparent zipper noise.  Adding single pole IIR filter.
//          The filter should not require external configuration.
//
// ver_g: - add compensation for Q enhancement.
//
// ver_h: - add Pitch Wheel (+/- 1 semitone).
//
///////////////////////////////////////////////////////////////////////////////////////////////////
//
// PLEASE NOTE:
//
// This project is a proof of concept surrounding the sample rate of 100 kHz and the arithmetic bit
// bit widths used to make critical computations.  I would describe my technique as "quick and 
// dirty".  It is meant to be ported to a dsPIC when it is working satisfactorily.  Thus, as I
// wrote the Verilog code, I was concerned only with whether the design will work or not and gave
// little effort to "slick" or "elegant" coding practice.  The state machine is linear in operation
// when it could have been looped in more than one place which would have saved state count.
//
///////////////////////////////////////////////////////////////////////////////////////////////////

module Additive_Flute ( clk, LED, lcd_rs, lcd_rw, lcd_e, lcd_d, 
                   BTN_EAST, BTN_WEST, 
                   spi_sck, spi_dac_cs, spi_sdi, spi_rom_cs, spi_amp_cs, spi_adc_conv, 
                   spi_dac_cs, spi_amp_shdn, spi_dac_clr,
                   strataflash_oe, strataflash_ce, strataflash_we, platformflash_oe,
                   Raw_MIDI_In, TTY_In );

/////////////////////////////////////////////////////////////////////////////////////////
// This parameter supplies the version number to an MCU port which is displayed in the LCD.
////////////////////////////////////////////////////////////////////////////////////////
                              //                                                      //
                              //     #     #  #####  ####    ###   ###   ###   #   #  //
                              //     #     #  #      #   #  #       #   #   #  ##  #  //
  parameter version = "g";    //      #   #   ###    ####    ###    #   #   #  # # #  //
                              //       # #    #      #  #       #   #   #   #  #  ##  //
                              //        #     #####  #   #   ###   ###   ###   #   #  //
                              //                                                      //
////////////////////////////////////////////////////////////////////////////////////////

  parameter NCOs = 4;
  parameter NCOMAX = NCOs - 1;
  parameter SEL_WIDTH = 2;       // 2 raised to SEL_WIDTH should equal the value of NCOs.

  input clk;
  output [7:0] LED;
  
  inout [7:4] lcd_d;
  output lcd_rs;
  output lcd_rw;
  output lcd_e;
  
  input BTN_EAST;     // MCU reset
  input BTN_WEST;
  
  output spi_sck;
  output spi_sdi;
  output spi_rom_cs;
  output spi_amp_cs;
  output spi_adc_conv;
  output spi_dac_cs;
  output spi_amp_shdn;
  output spi_dac_clr;
  output strataflash_oe;
  output strataflash_ce;
  output strataflash_we;
  output platformflash_oe;

  input Raw_MIDI_In;
  input TTY_In;

  wire [7:4] lcd_d;
  wire lcd_rs;
  wire lcd_rw;
  wire lcd_rw_control;
  wire lcd_e;
  wire lcd_drive;
  
  reg [7:0] LCD;
  
  assign lcd_d[7:4] = ( lcd_drive == 1'b1 & lcd_rw_control == 1'b0 ) ? LCD[7:4] : 4'bzzzz;
  assign lcd_drive = LCD[3];
  assign lcd_rs = LCD[2];
  assign lcd_rw_control = LCD[1];
  assign lcd_e = LCD[0];
  assign lcd_rw = lcd_rw_control & lcd_drive;

  wire clk;
  wire [7:0] LED;

  wire BTN_EAST, BTN_WEST;

  wire [3:0] rstd;                // POR delay
  wire reset;                     // POR/User reset
  
  wire interrupt;
  wire interrupt_ack;
  wire interrupt0;
  wire interrupt1;
  wire [9:0] address;             // wires to connect address lines from uC to ROM
  wire [17:0] instruction;        // uC data lines, need connection between uC and ROM
  wire [7:0] out_port;            //
  wire [7:0] in_port;             // 
  wire [7:0] port_id;
  reg  [7:0] in_port_reg = 0;     // hold data for mcu
  assign in_port = in_port_reg;

// MIDI & TTY UART receivers
  wire [7:0] rx0data;
  wire rx0rdy;
  wire reset_rx0rdy;
  
  wire [7:0] rx1data;
  wire reset_rx1rdy;

  wire resetsignal;
// MCU
  wire read_strobe;
  wire write_strobe;

///////////////////////////////////////////////////////////////////////////////////////  
///////////////////////////////////////////////////////////////////////////////////////  
// Synth signals

  reg  [7:0] SYSEX_ADDR_MSB = 8'h00;

  reg  [3:0] MIDI_CHANNEL = 4'b0000;     //
 
  reg  [13:0] PW = 14'b10000000000000;   // init pitch wheel at center in case it never sends a message
  reg  [6:0] CHANpres = 0;
  reg  [6:0] MOD_WHL = 0;
  reg  [6:0] VEL = 0;
  reg  [6:0] MCU_VEL = 0;

  
//  reg  [6:0] TRANSPOSE = 7'h00;          // global transposition in half steps
  reg        MCU_GATE = 0;
  reg        SUSTAIN = 0;
  reg        KEY_DOWN = 0;

  reg  [6:0] NOTENUM = 0;                // note number where zero is the lowest Korg Wavestation key
  reg  [7:0] NOTEOCT = 0;                // port register to receive note-octave information from MCU
  wire [3:0] NOTE;                       // 0 to 11 note value
  wire [3:0] OCT;                        // number of times to left shift phase increment
  assign NOTE = NOTEOCT[3:0];
  assign OCT = NOTEOCT[7:4];

// NCO levels
  wire [15:0] NCO0lev;
  wire [15:0] NCO1lev;
  wire [15:0] NCO2lev;
  wire [15:0] NCO3lev;
  
  reg  [6:0]  NCO0levHI;
  reg  [6:0]  NCO0levLO;
  
  reg  [6:0]  NCO1levHI;
  reg  [6:0]  NCO1levLO;
  
  reg  [6:0]  NCO2levHI;
  reg  [6:0]  NCO2levLO;
  
  reg  [6:0]  NCO3levHI;
  reg  [6:0]  NCO3levLO;

  assign NCO0lev = {2'b00,NCO0levHI,NCO0levLO};
  assign NCO1lev = {2'b00,NCO1levHI,NCO1levLO};
  assign NCO2lev = {2'b00,NCO2levHI,NCO2levLO};
  assign NCO3lev = {2'b00,NCO3levHI,NCO3levLO};

///////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////
// POR delay FF chain - taken from Eric Brombaugh's code for the SPI DAC.
  FDCE rst_bit0 (.Q(rstd[0]), .C(clk), .CE(1'b1), .CLR(1'b0), .D(1'b1));
  FDCE rst_bit1 (.Q(rstd[1]), .C(clk), .CE(1'b1), .CLR(1'b0), .D(rstd[0]));
  FDCE rst_bit2 (.Q(rstd[2]), .C(clk), .CE(1'b1), .CLR(1'b0), .D(rstd[1]));
  FDCE rst_bit3 (.Q(rstd[3]), .C(clk), .CE(1'b1), .CLR(1'b0), .D(rstd[2]));
  assign reset = ~rstd[3] | BTN_EAST;    // use east button as reset.

// Tie off the flash enables to allow SPI to work
  assign strataflash_oe = 1'b1;
  assign strataflash_ce = 1'b1;
  assign strataflash_we = 1'b1;
  assign platformflash_oe = 1'b0;
  
// Tie off other SPI enables to isolate DAC
  assign spi_rom_cs = 1'b1;
  assign spi_amp_cs = 1'b1;
  assign spi_adc_conv = 1'b0;
  assign spi_amp_shdn = 1'b1;
  assign spi_dac_clr = 1'b1;
  
///////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////
// instantiate the uC (kcpsm3) with it's ROM

  kcpsm3 MCU ( .address(address), .instruction(instruction), .port_id(port_id), 
    .write_strobe(write_strobe), .out_port(out_port), .read_strobe(read_strobe), .in_port(in_port), 
    .interrupt(interrupt), .interrupt_ack(interrupt_ack), 
    .reset(reset), .clk(clk) );  

  midictrl PSMROM ( .address(address), .instruction(instruction), .clk(clk) );
  
///////////////////////////////////////////////////////////////////////////////////////
// MIDI UART
  MIDIuartrx RX0 ( .dout(rx0data), .clk(clk), .reset(resetsignal), .rxd(Raw_MIDI_In), 
    .frame(), .overrun(), .ready(), .busy(), .CS(), 
    .interrupt(interrupt0), .interrupt_ack(interrupt_ack), 
    .rxrdy(rx0rdy), 
    .reset_rxrdy(reset_rx0rdy)
    );
  /////// VERY IMPORTANT HARDWARE /////////////////////////////////////////////
  // decode read port 01, send pulse to reset rxready flop
  // This allows the mcu to clear the rxready bit automatically just by reading rxdata.
  assign reset_rx0rdy = (read_strobe == 1'b1) & (port_id == 8'h01);

///////////////////////////////////////////////////////////////////////////////////////
// TTY UART, 115.2 or 19.2 kilobuad (baudrate configured in module)
  TTYuartrx RX1 ( .dout(rx1data), .clk(clk), .reset(resetsignal), .rxd(TTY_In), 
    .frame(), .overrun(), .ready(), .busy(), .CS(), 
    .interrupt(interrupt1), .interrupt_ack(interrupt_ack), 
    .rxrdy(rx1rdy), 
    .reset_rxrdy(reset_rx1rdy)
    );
  /////// VERY IMPORTANT HARDWARE /////////////////////////////////////////////
  // decode read port 09, send pulse to reset rxready flop
  // This allows the mcu to clear the rxready bit automatically just by reading rxdata.
  assign reset_rx1rdy = (read_strobe == 1'b1) & (port_id == 8'h09);

// Logic common to both UARTs
  assign interrupt = (interrupt0 | interrupt1);  // ISR gets to figure out which UART did it.
  assign resetsignal = write_strobe && ( port_id == 8'hFF );  // When port_id == FF with write strobe, reset the UARTs

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// SPI DAC  -  SR = 100 kHz

  parameter CYCLES = 12'd500;    // set the number of cycles per DACena

  reg  [11:0] DACreg = 12'h800;  // used by SPI DAC
  reg  [11:0] DACnext = 0;
  reg  [1:0]  DACshifts = 0;
  wire DACena;
  
// DAC, module by Eric Brombaugh
  spi_dac_out DAC ( .clk( clk ), .reset( reset ),
                    .spi_sck( spi_sck ), .spi_sdo( spi_sdi ), .spi_dac_cs( spi_dac_cs ),
                    .ena_out( DACena ), .data_in( DACreg ), .cycles( CYCLES ) );

////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
// 32 bit LFSR (taps: 32,22,2,1)

  reg [31:0] sr = 32'h461B_87AA;  // 32 bit register with seed I pulled out of my ass

////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
// Sine table and phase accumulator logic (48 bit phase accumulator)

  parameter PA_HIBIT = 47;    // This number determines the highest bit we use of the phase accum.

  wire signed [17:0]          tri_out;
  wire signed [17:0]          tri_tmp;        // output of saw to tri converter mux

  reg         [PA_HIBIT:0]  phase_inc;
  reg         [PA_HIBIT:0]  PHacc ;

// Infer PA_RAM as distributed RAM
  reg [PA_HIBIT:0] PHacc_RAM [NCOMAX:0];                 // DIST. RAM

////////////////////////////////////////////////////////////////////////////
// Triangle - output the 17 bits below the sign bit, inverted when sign bit high, not inverted when sign bit low.
  assign tri_tmp = (PHacc[PA_HIBIT] == 1) ? ~PHacc[PA_HIBIT-1:PA_HIBIT-18] : PHacc[PA_HIBIT-1:PA_HIBIT-18] ;
  assign tri_out = tri_tmp + 18'sb100000000000000000 ;

/////////////////////////////////////////////////////////////////////////////
// SINE LOOKUP TABLE LOGIC

// Sine table logic to convert 1/4 cycle table to full cycle output.
// 1st 1/4 cycle: unmodified tri as address, unmodified LUT as out
// 2nd 1/4 cycle: inverted tri as address, unmodified LUT as out
// Last half cycle of tri provides an already inverted address
// 3rd 1/4 cycle: inverted tri as address (provides non-inverted tri as address), inverted LUT as out
// 4th 1/4 cycle: unmodified tri as address, inverted LUT as out
// for 1st 1/2 cycle, output is unmodified, sign = 0
// for 2nd 1/2 cycle, output is made negative, sign = 1

// Note: this has been slightly modified.  sinT is now declared as signed and
//       out is now derived from -sinT and sinT instead of ~sinT and sinT.

  wire        [9:0]  adr;
  wire signed [17:0] sinT;
  wire signed [17:0] SinOut;

  assign adr = (tri_out[17] == 1) ? ~tri_out[16:7] : tri_out[16:7]; // invert address if tri is negative
  assign SinOut = (tri_out[17] == 1) ? -sinT : sinT ;                  // use only 17 bits for now, also use tri sign bit
  
// Sine table, 1024 locations (10 bit address 000-3FF) 18 data bits, 1/4 cycle
  sine_tab SIN ( .CLK( clk ), .A( adr ), .O( sinT ) );

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Sustain pedal logic

  reg GATE = 0;                             // single bit GATE, this is a monosynth

  reg SUS = 0;                              // can't go true without MCU_GATE

  reg OLD_KEY_DOWN = 0;

  reg VEL_OVERBLOW = 0;                     // enable (1) or disable (0) velocity overblow

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// SVF - A digital state variable filter with resonance configured as bandpass.

  reg                SVFena = 0;
  reg  signed [31:0] freq = 0;
  wire signed [31:0] SVFout;
  wire signed [31:0] SVFin;
  
  wire signed [31:0] qval;
  assign qval = 32'sb0010_0000_0000_0000_0000_0000_0000_0000;   // Q of 4  (where Q = 1/qval)

//  assign SVFin = sr >>> 5;           // level adjustment required to prevent binary wrap as Q is increased
  assign SVFin = sr >>> 3;           // level adjustment required to prevent binary wrap as Q is increased
  
  SVF SVF (
    .clk( clk ), 
    .ena( SVFena ),                  // ena tells filter to "go"
    .f( freq ),                      // 32 bit, Filter corner frequency (NOT in Hz, but close)
    .q( qval ),                      // 32 bit, Filter 1/Q value
    .In( SVFin ),                    // 32 bit signed input
    .Out( SVFout )                   // 32 bit signed output 
    );

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////  
// Single Pole Lowpass IIR for zipper removal from channel pressure.

  wire        [31:0] IIR_BW;         // note: the objects here are declared as unsigned
  wire        [31:0] IIR_b1;         // for the a0 = 1.0 - BW computation, they are then
  wire        [31:0] IIR_a0;         // used as signed in the filter logic.
  reg  signed [31:0] IIR_y1 = 0;     // memory cell for last output value y[n-1]

  assign IIR_BW = 32'b0111_0000_0000_0000_0000_0000_0000_0000; // BW of 0.875
  assign IIR_b1 = IIR_BW;
  assign IIR_a0 = 32'h80000000 - IIR_BW;

///////////////////////////////////////////////////////////////////////////////  
///////////////////////////////////////////////////////////////////////////////  
// ADSR

  reg                ADSRena = 0;
  reg         [1:0]  ADSRsel = 0;
  wire signed [15:0] ADSRout;
  reg  signed [15:0] ADSRout_REG0 = 0;
  reg  signed [15:0] ADSRout_REG1 = 0;
  reg  signed [15:0] ADSRout_REG2 = 0;
  reg  signed [15:0] ADSRout_REG3 = 0;

  reg         [14:0] adsrA = 0;
  reg         [14:0] adsrD = 0;
  reg         [14:0] adsrS = 0;
  reg         [14:0] adsrR = 0;
  reg                expo_R = 0;

  reg         [4:0]  adsrA_RAM_hi [3:0];
  reg         [4:0]  adsrA_RAM_mi [3:0];
  reg         [4:0]  adsrA_RAM_lo [3:0];

  reg         [4:0]  adsrD_RAM_hi [3:0];
  reg         [4:0]  adsrD_RAM_mi [3:0];
  reg         [4:0]  adsrD_RAM_lo [3:0];

  reg         [4:0]  adsrS_RAM_hi [3:0];
  reg         [4:0]  adsrS_RAM_mi [3:0];
  reg         [4:0]  adsrS_RAM_lo [3:0];

  reg         [4:0]  adsrR_RAM_hi [3:0];
  reg         [4:0]  adsrR_RAM_mi [3:0];
  reg         [4:0]  adsrR_RAM_lo [3:0];

  reg         [0:0]  expo_R_RAM [3:0];           // looks like this just uses 4 registers instead of a RAM

  wire               ADSR_is_idle;
  reg                All_ADSRs_idle = 1;

  ADSR EG0 (
    .out( ADSRout ), 
    .clk( clk ), 
    .ena( ADSRena ), 
    .sel( ADSRsel ), 
    .GATE( GATE ), 
    .A( adsrA ),          // 15 bit unsigned
    .D( adsrD ),          // 15 bit unsigned 
    .S( adsrS ),          // 15 bit unsigned 
    .R( adsrR ),          // 15 bit unsigned 
    .expo_R( expo_R ),
    .is_idle( ADSR_is_idle )
    );
    
///////////////////////////////////////////////////////////////////////////////  
///////////////////////////////////////////////////////////////////////////////  
// Dual Tuning ROM, LUT4, so no clock needed.  
// The two outputs are 1/2 step above and below
// A linear interpolater is used to calculate the actual tuning value used.
// NOTE is a 4 bit value from 0 to 11 representing the semitone scale.

  wire signed [17:0] ROM_hi;
  wire signed [17:0] ROM_lo;
  reg  signed [17:0] ROM_hi_REG;
  reg  signed [17:0] ROM_lo_REG;

  tuning_ROM TUN0 ( .addr( NOTE ), .out_hi( ROM_hi ), .out_lo( ROM_lo ) );
  
  reg         [31:0] pitch = 0;
  
  reg         [31:0] noise_pitch = 0;       // receives noise pitch value from multiplier
  reg         [31:0] noise_pitch_adj = 0;   // slider for noise pitch


///////////////////////////////////////////////////////////////////////////////  
///////////////////////////////////////////////////////////////////////////////  
// Pitch Wheel ROMs for interpolation.
// The ROMs were generated as 17 bit unsigned binary representing fixed point
// binary values from somewhat less than one to somewhat more than one.  
// This synth will use the ROMs upper 15 bits as 16 bit signed values to take
// advantage of the 16x16 multiplier and make porting easier to dsPIC.
// Output values are in fixed point binary format: sb.bbbbbbbbbbbbbb and the sign
// bit is always 0.

  reg         [13:0] PW_synch;    // PW is transferred to PW_synch at DACena
  wire signed [15:0] PW_ROM_hi;
  wire signed [15:0] PW_ROM_lo;
  
  PW_ROM PW0 ( .ad( PW_synch[13:10] ), .out_hi(PW_ROM_hi), .out_lo(PW_ROM_lo) );

  reg  signed [31:0] PW_multiplier;  // gets the output of the PW interpolator
  
///////////////////////////////////////////////////////////////////////////////  
///////////////////////////////////////////////////////////////////////////////  
// Shared multiplier(s)

// 16 x 16 = 32
  reg  signed [15:0] A0 = 0;
  reg  signed [15:0] B0 = 0;
  wire signed [31:0] P0;
  wire signed [15:0] PROD0;
  assign P0 = A0 * B0;
  assign PROD0 = P0 >>> 15;

// 18 x 18 = 36
  reg  signed [17:0] A1;
  reg  signed [17:0] B1;
  wire signed [35:0] P1;
  wire signed [17:0] PROD1;
  assign P1 = A1 * B1;
  assign PROD1 = P1 >>> 17;

// 32 x 32 = 64
  reg  signed [31:0] A2;
  reg  signed [31:0] B2;
  wire signed [63:0] P2;
  wire signed [31:0] PROD2;
  reg  signed [31:0] PROD2_reg;
  assign P2 = A2 * B2;
  assign PROD2 = P2 >>> 31;

///////////////////////////////////////////////////////////////////////////////  
///////////////////////////////////////////////////////////////////////////////  
// Main State Machine
  
  reg        [5:0]  state = 0;
  reg               run = 0;
  
  reg signed [17:0] SumOut = 0;        // 18 bits because we will sum four 16 bit values
  reg        [1:0]  NCOsel = 0;
  reg        [14:0] tuning = 16384;    // start off in the center.
  
  always @ ( posedge clk )
    begin
    if ( DACena )
      begin
      state   <= 0;
      run     <= 1;
      
      DACreg  <= DACnext + 12'h800;           // get top 12 bits for DAC

      SumOut  <= 0;
      ADSRsel <= 0;
      NCOsel  <= 0;
      
      ROM_lo_REG <= ROM_lo[17:0];
      ROM_hi_REG <= ROM_hi[17:0];
      
      PW_synch <= PW;

      sr <= {sr[30:0],(( sr[31] ^ sr[21] ) ^ ( sr[1] ^ sr[0] ))} ;  // LFSR XOR logic (taps: 32,22,2,1)
      end      

    else

      begin
      if ( run )
        begin
        case ( state )
        
        6'h00:
          begin    state <= state + 1;
          adsrA  <= {adsrA_RAM_hi[ADSRsel],adsrA_RAM_mi[ADSRsel],adsrA_RAM_lo[ADSRsel]};
          adsrD  <= {adsrD_RAM_hi[ADSRsel],adsrD_RAM_mi[ADSRsel],adsrD_RAM_lo[ADSRsel]};
          adsrS  <= {adsrS_RAM_hi[ADSRsel],adsrS_RAM_mi[ADSRsel],adsrS_RAM_lo[ADSRsel]};
          adsrR  <= {adsrR_RAM_hi[ADSRsel],adsrR_RAM_mi[ADSRsel],adsrR_RAM_lo[ADSRsel]};
          expo_R <= expo_R_RAM[ADSRsel];
          
          ADSRena <= 1;
          
          ADSRout_REG0 <= ADSRout;
          end

        6'h01:
          begin    state <= state + 1;      // ADSR ena state complete
          ADSRena <= 0;
          end

        6'h02:
          begin    state <= state + 1;      // ADSR state 0 complete
          end

        6'h03:
          begin    state <= state + 1;      // ADSR state 1 complete
          end

        6'h04:
          begin    state <= state + 1;
          All_ADSRs_idle <= ADSR_is_idle;
          
          ADSRsel <= 1;
          end

        6'h05:
          begin    state <= state + 1;
          adsrA  <= {adsrA_RAM_hi[ADSRsel],adsrA_RAM_mi[ADSRsel],adsrA_RAM_lo[ADSRsel]};
          adsrD  <= {adsrD_RAM_hi[ADSRsel],adsrD_RAM_mi[ADSRsel],adsrD_RAM_lo[ADSRsel]};
          adsrS  <= {adsrS_RAM_hi[ADSRsel],adsrS_RAM_mi[ADSRsel],adsrS_RAM_lo[ADSRsel]};
          adsrR  <= {adsrR_RAM_hi[ADSRsel],adsrR_RAM_mi[ADSRsel],adsrR_RAM_lo[ADSRsel]};
          expo_R <= expo_R_RAM[ADSRsel];
          
          ADSRena <= 1;
          
          ADSRout_REG1 <= ADSRout;
          end

        6'h06:
          begin    state <= state + 1;      // ADSR ena state complete
          ADSRena <= 0;
          end

        6'h07:
          begin    state <= state + 1;      // ADSR state 0 complete
          end

        6'h08:
          begin    state <= state + 1;      // ADSR state 1 complete
          end

        6'h09:
          begin    state <= state + 1;
          All_ADSRs_idle <= ADSR_is_idle & All_ADSRs_idle;
          
          ADSRsel <= 2;
          end

        6'h0A:
          begin    state <= state + 1;
          adsrA  <= {adsrA_RAM_hi[ADSRsel],adsrA_RAM_mi[ADSRsel],adsrA_RAM_lo[ADSRsel]};
          adsrD  <= {adsrD_RAM_hi[ADSRsel],adsrD_RAM_mi[ADSRsel],adsrD_RAM_lo[ADSRsel]};
          adsrS  <= {adsrS_RAM_hi[ADSRsel],adsrS_RAM_mi[ADSRsel],adsrS_RAM_lo[ADSRsel]};
          adsrR  <= {adsrR_RAM_hi[ADSRsel],adsrR_RAM_mi[ADSRsel],adsrR_RAM_lo[ADSRsel]};
          expo_R <= expo_R_RAM[ADSRsel];
          
          ADSRena <= 1;
          
          ADSRout_REG2 <= ADSRout;
          end

        6'h0B:
          begin    state <= state + 1;      // ADSR ena state complete
          ADSRena <= 0;
          end

        6'h0C:
          begin    state <= state + 1;      // ADSR state 0 complete
          end

        6'h0D:
          begin    state <= state + 1;      // ADSR state 1 complete
          end

        6'h0E:
          begin    state <= state + 1;
          All_ADSRs_idle <= ADSR_is_idle & All_ADSRs_idle;
          
          ADSRsel <= 3;
          end

        6'h0F:
          begin    state <= state + 1;
          adsrA  <= {adsrA_RAM_hi[ADSRsel],adsrA_RAM_mi[ADSRsel],adsrA_RAM_lo[ADSRsel]};
          adsrD  <= {adsrD_RAM_hi[ADSRsel],adsrD_RAM_mi[ADSRsel],adsrD_RAM_lo[ADSRsel]};
          adsrS  <= {adsrS_RAM_hi[ADSRsel],adsrS_RAM_mi[ADSRsel],adsrS_RAM_lo[ADSRsel]};
          adsrR  <= {adsrR_RAM_hi[ADSRsel],adsrR_RAM_mi[ADSRsel],adsrR_RAM_lo[ADSRsel]};
          expo_R <= expo_R_RAM[ADSRsel];
          
          ADSRena <= 1;
          
          ADSRout_REG3 <= ADSRout;
          end

        6'h10:
          begin    state <= state + 1;      // ADSR ena state complete
          ADSRena <= 0;
          end

        6'h11:
          begin    state <= state + 1;      // ADSR state 0 complete
          end

        6'h12:
          begin    state <= state + 1;      // ADSR state 1 complete
          end

        6'h13:
          begin    state <= state + 1;
          All_ADSRs_idle <= ADSR_is_idle & All_ADSRs_idle;
          end

        // All ADSRs are processed.  

        // Process monosynth gating
        6'h14:
          begin    state <= state + 1;
          GATE <= KEY_DOWN | SUS;             // assert GATE whenever a key is down or SUS is true  
          
          // detect a new keypress
          OLD_KEY_DOWN <= KEY_DOWN;

          if ( KEY_DOWN != OLD_KEY_DOWN )
            begin
            if ( KEY_DOWN ) 
              begin
              if ( All_ADSRs_idle ) 
                begin
                VEL <= MCU_VEL;
                end
              end
            end

          case ( SUSTAIN )                        // SUSTAIN is a single bit representing the sustain pedal state
            1'b0: SUS <= 1'b0;                    // if pedal is up, always clear SUS
            1'b1: if ( MCU_GATE ) SUS <= 1'b1;    // if pedal is down, only set SUS on MCU_GATE high
          endcase
          end
          
        // process tuning information
        6'h15:
          begin    state <= state + 1;              
          A0 <= PW_ROM_hi - PW_ROM_lo;              // PW interpolation,  fixed point binary: sb.bbbbbbbbbbbbbb
          B0 <= {1'b0,PW_synch[9:0],5'b00000};      // PW interp slider
          end

        6'h16:
          begin    state <= state + 1;
          A1 <= ROM_hi_REG - ROM_lo_REG;            // begin tuning ROM interpolation
          B1 <= {1'b0,tuning,2'b00};                // 15 bits of the tuning "slider"
          
          PW_multiplier <= ( ( PROD0 + PW_ROM_lo ) << 16 );// finish PW interpolation, PW_multiplier has 32 bit value to multiply by each phase increment value
//          PW_multiplier <= ( ( ( PROD0 >>> 1 ) + PW_ROM_lo ) << 16 );// finish PW interpolation, PW_multiplier has 32 bit value to multiply by each phase increment value
          end

        6'h17:
          begin    state <= state + 1;
          pitch <= ( ( PROD1[17:2] + ROM_lo_REG[17:2] ) << OCT ); // finish interpolation
          end
          
        6'h18:
          begin    state <= state + 1;
          A2 <= pitch;
          B2 <= PW_multiplier;
          end
          
        6'h19:
          begin    state <= state + 1;
          A0 <= ADSRout_REG0;
          B0 <= {5'b01111,VEL,4'hF};
          
          pitch <= PROD2;
          end
          
        // Process NCO0        
        6'h1A:
          begin    state <= state + 1;
//          phase_inc   <= ( pitch << 25 );             // phase_inc (48 bits).  << 25 is to get pitch into proper octave range
          phase_inc   <= ( pitch << 26 );             // phase_inc (48 bits).  << 25 is to get pitch into proper octave range
          PHacc       <= PHacc_RAM[NCOsel];           // PHacc is 48 bits, phase accum for NCO0
          end
          
        6'h1B:                      // here we add phase_inc to PHacc, this is the fundamental
          begin    state <= state + 1;
          PHacc <= PHacc + phase_inc;     // advance the phase accumulator
          end
          
        6'h1C:
          begin    state <= state + 1;
          PHacc_RAM[NCOsel] <= PHacc;                // store for next DACena
          end

        6'h1D:
          begin    state <= state + 1;
          A0      <= PROD0;
          B0      <= SinOut[17:2];                   // Note the PHacc <= whatever_data must be 2 states prior
          end
          
        6'h1E: 
          begin    state <= state + 1;
          if ( VEL_OVERBLOW ) A0 <= NCO0lev - {2'b00,VEL,7'b0000000} ; // NCO0LEV is expected to be the largest value, essentially all the way up.  subtract some velocity for overblow
          else                A0 <= NCO0lev;
          B0      <= PROD0;

          NCOsel  <= 1;
          end
          
        6'h1F: 
          begin    state <= state + 1;
          SumOut  <= PROD0;
          end
          
        // Process NCO1  
        6'h20: 
          begin    state <= state + 1;
          PHacc       <= PHacc_RAM[NCOsel];          // PHacc is 48 bits, phase accum for NCO0          
          end

        6'h21:                              // here we add 2 * phase_inc to PHacc, the is the 2nd harmonic
          begin    state <= state + 1;
          PHacc <= PHacc + {phase_inc,1'b0};  // advance the phase accumulator, 2nd harmonic
          end

        6'h22:
          begin    state <= state + 1;
          A0 <= ADSRout_REG1;
          B0 <= {5'b01111,VEL,4'hF};

          PHacc_RAM[NCOsel] <= PHacc;                // store for next DACena
          end
          
        6'h23:
          begin    state <= state + 1;
          A0 <= PROD0;                         // 16 bit arith
          B0 <= SinOut[17:2];
          end
          
        6'h24:
          begin    state <= state + 1;
          if ( VEL_OVERBLOW ) A0 <= NCO1lev + {2'b00,VEL,7'b0000000};    // 16 bit arith
          else                A0 <= NCO1lev;
          B0 <= PROD0;

          NCOsel  <= 2;
          end
          
        6'h25:
          begin    state <= state + 1;
          SumOut  <= SumOut + PROD0;                 // SumOut is 18 bits, for NCO0, just store
          end
          
        // Process NCO2
        6'h26:
          begin    state <= state + 1;               // finished ADSR state ENA
          PHacc       <= PHacc_RAM[NCOsel];                // PHacc is 48 bits, phase accum for NCO0          
          end
          
        6'h27:                            // we add 3 * phase_inc to PHacc, this is the 3rd harmonic
          begin    state <= state + 1;               // finished ADSR state 0
          PHacc <= PHacc + {phase_inc,1'b0} + {1'b0,phase_inc} ; // advance the phase accumulator, 3rd harmonic
          end
          
        6'h28:
          begin    state <= state + 1;               // finished ADSR state 1
          A0 <= ADSRout_REG2;
          B0 <= {5'b01111,VEL,4'hF};

          PHacc_RAM[NCOsel] <= PHacc;                    // store for next DACena
          end

        6'h29:
          begin    state <= state + 1;
          A0 <= PROD0;
          B0 <= SinOut[17:2];
          end
          
        6'h2A:
          begin    state <= state + 1;
          A0 <= NCO2lev;
          B0 <= PROD0;

          A2 <= ( pitch << 1 );                              // multiply pitch by
          B2 <= noise_pitch_adj;                    // noise_pitch_adj (from patch editor)

          NCOsel  <= 3;
          end
          
        6'h2B:
          begin    state <= state + 1;
          SumOut  <= SumOut + PROD0;                // SumOut is 16 bits, for NCO0, just store
          
          noise_pitch <= PROD2;                     // noise_pitch should be tuned to sine fundamental
          end

        // Process NCO3 - Noise        
        6'h2C:
          begin    state <= state + 1;
          freq   <= noise_pitch << 10;
          SVFena <= 1;
          end
          
        6'h2D:                                      // SVF state ENA complete
          begin    state <= state + 1;
          SVFena <= 0;
          end
          
        6'h2E:                                      // SVF state 0 complete
          begin    state <= state + 1;
          end
          
        6'h2F:                                      // SVF state 1 complete
          begin    state <= state + 1;
          end
          
        6'h30:                                      // SVF state 2 complete
          begin    state <= state + 1;
          end
          
        6'h31:
          begin    state <= state + 1;
          A0 <= ADSRout_REG3;
          B0 <= {1'b0,VEL,8'hFF};
          end
          
        6'h32:
          begin    state <= state + 1;
          A0 <= PROD0;                         // ADSR 3 output
//          B0 <= SVFout[29:14];                  // BP filtered noise output, converted to signed 16 bit
          B0 <= SVFout[31:16];                  // BP filtered noise output, converted to signed 16 bit
          end
          
        6'h33:                                 // Q enhancement compensation
          begin    state <= state + 1;
          A0 <= PROD0;
          B0 <= 16'h7FFF - {1'b0,NOTENUM[5:0],9'b0_0000_0000}; 
          end
          
        6'h34:
          begin    state <= state + 1;
          A0 <= NCO3lev;
          B0 <= PROD0 << 3;
          end
          
        6'h35:
          begin    state <= state + 1;
          SumOut  <= SumOut + PROD0;                 // SumOut is 18 bits
          end

        6'h36:
          begin    state <= state + 1;
          A2 <= ((18'h1417F + ((CHANpres+(CHANpres<<1))<<7))<<14);  // 32 bit IIR filter input
          B2 <= IIR_a0;
          end
          
        6'h37:
          begin    state <= state + 1;
          PROD2_reg <= PROD2;

          A2 <= IIR_y1;
          B2 <= IIR_b1;
          end
          
        6'h38:
          begin    state <= state + 1;
          IIR_y1 <= PROD2 + PROD2_reg;   // store output of zipper filter
          end

        6'h39:
          begin    state <= state + 1;
          A1 <= SumOut;
          B1 <= IIR_y1[31:14];           // upper 18 bits of zipper filter output
          end
          
        6'h3A:
          begin    state <= state + 1;
          SumOut <= PROD1;
          end
          
        6'h3B:
          begin    state <= state + 1;
          case ( DACshifts )
            2'h0: DACnext <= SumOut[17:6];
            2'h1: DACnext <= SumOut[16:5];
            2'h2: DACnext <= SumOut[15:4];
            2'h3: DACnext <= SumOut[14:3];
          endcase
          end
          
        6'h3C:
          begin    state <= state + 1;
          end
          
        6'h3D:
          begin    state <= state + 1;
          end
          
        6'h3E:
          begin    state <= state + 1;
          end

        6'h3F:
          begin
          run <= 0;    // Jane! Stop this crazy thing!
          end
          
        endcase
        end
      end
    end

///////////////////////////////////////////////////////////////////////////////  
// Decode structures for hardware receiving data from the MCU

  always @ ( posedge clk )
  begin
    if ( write_strobe == 1'b1 )
    begin
// This case block contains selection logic for system level ports and for CC ports.
// Note that these ports all have the bit 7 or port_id set.
    case ( port_id )
    8'hD0:       NOTENUM               <= out_port[6:0];  // note number
    
    8'hF0:       CHANpres              <= out_port[6:0];  // channel pressure value, global
    8'hF1:       PW[13:7]              <= out_port[6:0];  // pitch wheel MSB, global
    8'hF2:       PW[6:0]               <= out_port[6:0];  // pitch wheel LSB, global
    8'hF3:       MOD_WHL               <= out_port[6:0];  // modulation wheel, global
    8'hF4:       NOTEOCT               <= out_port;       // F4 only
    8'hF8:       MCU_GATE              <= out_port[0];    // MCU_GATE signal
    8'hF9:       MCU_VEL               <= out_port[6:0];  // port to set synth hardware velocity register
    8'hFC:       SUSTAIN               <= out_port[6];    // sustain command register
    8'hFD:       KEY_DOWN              <= out_port[0];

    8'hE0:       SYSEX_ADDR_MSB        <= out_port;
      
    8'h90:       LCD                   <= out_port;
    endcase
//////////////////////////////////////////////////////////////////////////////////////////
// These if blocks contain case blocks for sysex populated parameters     
// below this, all port_id values have bit 7 set to zero
// Address space is 14 bits.

     if ( SYSEX_ADDR_MSB == 8'h01 )
       begin
       casex ( port_id )
         8'b00000000: NCO0levHI                      <= out_port[6:0];  // 00
         8'b00000001: NCO0levLO                      <= out_port[6:0];  // 01

         8'b00000010: NCO1levHI                      <= out_port[6:0];  // 02
         8'b00000011: NCO1levLO                      <= out_port[6:0];  // 03

         8'b00000100: NCO2levHI                      <= out_port[6:0];  // 04
         8'b00000101: NCO2levLO                      <= out_port[6:0];  // 05

         8'b00000110: NCO3levHI                      <= out_port[6:0];  // 06
         8'b00000111: NCO3levLO                      <= out_port[6:0];  // 07
       endcase
       end       

     if ( SYSEX_ADDR_MSB == 8'h02 )
       begin
       casex ( port_id )
         8'b000000xx: adsrA_RAM_hi[port_id[1:0]]     <= out_port[4:0];  // 00 - 03
         8'b000001xx: adsrA_RAM_mi[port_id[1:0]]     <= out_port[4:0];  // 04 - 07
         8'b000010xx: adsrA_RAM_lo[port_id[1:0]]     <= out_port[4:0];  // 08 - 0B

         8'b000100xx: adsrD_RAM_hi[port_id[1:0]]     <= out_port[4:0];  // 10 - 13
         8'b000101xx: adsrD_RAM_mi[port_id[1:0]]     <= out_port[4:0];  // 14 - 17
         8'b000110xx: adsrD_RAM_lo[port_id[1:0]]     <= out_port[4:0];  // 18 - 1B

         8'b001000xx: adsrS_RAM_hi[port_id[1:0]]     <= out_port[4:0];  // 20 - 23
         8'b001001xx: adsrS_RAM_mi[port_id[1:0]]     <= out_port[4:0];  // 24 - 27
         8'b001010xx: adsrS_RAM_lo[port_id[1:0]]     <= out_port[4:0];  // 28 - 2B

         8'b001100xx: adsrR_RAM_hi[port_id[1:0]]     <= out_port[4:0];  // 30 - 33
         8'b001101xx: adsrR_RAM_mi[port_id[1:0]]     <= out_port[4:0];  // 34 - 37
         8'b001110xx: adsrR_RAM_lo[port_id[1:0]]     <= out_port[4:0];  // 38 - 3B
         
         8'b010000xx: expo_R_RAM[port_id[1:0]]       <= out_port[0];    // 40 - 43
       endcase
       end       

     if ( SYSEX_ADDR_MSB == 8'h7F )
       begin
       case ( port_id )
         8'h00: MIDI_CHANNEL                    <= out_port[3:0];
         8'h01: DACshifts                       <= out_port[1:0];
         8'h02: VEL_OVERBLOW                    <= out_port[0];
         
         8'h10: tuning[14]                      <= out_port[0];    // tuning should arrive here unsigned
         8'h11: tuning[13:7]                    <= out_port[6:0];
         8'h12: tuning[6:0]                     <= out_port[6:0];

         8'h20: noise_pitch_adj[31:28]          <= out_port[3:0];
         8'h21: noise_pitch_adj[27:21]          <= out_port[6:0];
         8'h22: noise_pitch_adj[20:14]          <= out_port[6:0];
         8'h23: noise_pitch_adj[13:7]           <= out_port[6:0];
         8'h24: noise_pitch_adj[6:0]            <= out_port[6:0];
       endcase
       end       

    end
  end

// make sure that in_port_reg always contains selected data at rising edge of clk,
// PicoBlaze will read it when it needs it.
  always @ ( posedge clk ) 
    begin
    casex ( port_id[3:0] )                               // decode and transfer data to in_port_reg
    4'h0:    in_port_reg <= {rx1rdy,rx0rdy,6'b000000};   // UART1 & UART0 rxready bits
    4'h1:    in_port_reg <= rx0data;                     // MIDI UART rxdata
    4'h2:    in_port_reg <= {4'b0000,MIDI_CHANNEL};      // 
//    4'h8:    in_port_reg <= {1'b0,TRANSPOSE};
    4'h9:    in_port_reg <= rx1data;                     // TTY UART rxdata
    4'hF:    in_port_reg <= version;                     // The GateMan version number stored in hardware
    default: in_port_reg <= 8'bxxxxxxxx;
    endcase
    end

/////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////
// LEDs

  assign LED = (BTN_WEST) ? NOTEOCT : {~Raw_MIDI_In,~TTY_In,MCU_GATE,GATE,4'b0000}; // default

/////////////////////////////////////////////////////////////////////////////////////////////

endmodule
