unit ESpeakInterface;

{

  An interface on the ESpeak Pascal binding, to make things a bit easier in Pascal.

  You can have multiple speakers, but they will all share the same basic settings for buffersiz, interest in phoneme
  callbacks. using IPA or eSpeak phomes. The only provided interface type to eSpeak is the synchronous one - that is
  a call to Speak will only return after all audio was generated. The async method seems to not be compiled into the
  DLL used.

  // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

   COPYRIGHT 2019 Blue Hell / Jan Punter

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License version 2 as
  published by the Free Software Foundation;

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

  For all listed email addresses :

    _dot. to be substituted by a dot      '.'
    2@t2  to be substituted by an at sign '@'


  Blue Hell is a trade mark owned by

    Jan Punter
    https://www.bluehell.nl/
    jan2@t2mail_dot_bluehell_dot_nl
}


interface

uses

  WinApi.Windows, System.SysUtils, System.Classes, System.Math,

  ESpeak;


  // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  //
  procedure ConfigureModule( anAudioBufferTime: Integer; aWantPhonemeCallBacks, aWantIpaPhonemes: Boolean);
  //
  // Call ConfigureModule() before instantiating the first TESeakSpeaker, the first TESpeakSpeaker created
  // will configure eSpeak using the values passed here.
  //
  // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

  function  ESpeakDllName    : string;
  function  ESpeakApiVersion : Integer;
  function  ESpeakIsVowel( const aPhoneme: string): Boolean;


type

  TESpeakSample      = SmallInt;
  TESpeakSamples     = packed array[ 0 .. MaxInt div SizeOf( TESpeakSample) - 1] of TESpeakSample;
  PESpeakSamples     = ^TESpeakSamples;
  TESpeakPunctuation = ( esPunctNone, esPunctSome, esPunctAll);


  TOnESpeakerDebug          = procedure( const aSender: TObject; const aMsg: string                               )          of object;
  TOnESpeakerListTerminated = function ( const aSender: TObject                                                   ): Integer of object;
  TOnESpeakerWord           = function ( const aSender: TObject; aNumber: LongInt                                 ): Integer of object;
  TOnESpeakerSentence       = function ( const aSender: TObject; aNumber: LongInt                                 ): Integer of object;
  TOnESpeakerMark           = function ( const aSender: TObject; const aName: string                              ): Integer of object;
  TOnESpeakerPlay           = function ( const aSender: TObject; const aName: string                              ): Integer of object;
  TOnESpeakerEnd            = function ( const aSender: TObject                                                   ): Integer of object;
  TOnESpeakerMsgTerminated  = function ( const aSender: TObject                                                   ): Integer of object;
  TOnESpeakerPhoneme        = function ( const aSender: TObject; const aPhoneme: string                           ): Integer of object;
  TOnESpeakerSampleRate     = function ( const aSender: TObject; aRate: LongInt                                   ): Integer of object;
  TOnESpeakerWaveData       = function ( const aSender: TObject; const aData: PESpeakSamples; aNumSamples: LongInt): Integer of object;
  TOnESpeakerCompleted      = function ( const aSender: TObject                                                   ): Integer of object;


  TESpeakSpeaker = class
  private
    FUseTranslation   : Boolean;
    FSelectedVoice    : Integer;
    FSelectedLanguage : Integer;
    FSelectedVariation: Integer;
    FRate             : Integer;
    FVolume           : Integer;
    FPitch            : Integer;
    FRange            : Integer;
    FPunctuation      : TESpeakPunctuation;
    FCapitals         : Integer;
    FWordGap          : Integer;
  private
    FOnDebug          : TOnESpeakerDebug;
    FOnListTerminated : TOnESpeakerListTerminated;
    FOnWord           : TOnESpeakerWord;
    FOnSentence       : TOnESpeakerSentence;
    FOnMark           : TOnESpeakerMark;
    FOnPlay           : TOnESpeakerPlay;
    FOnEnd            : TOnESpeakerEnd;
    FOnMsgTerminated  : TOnESpeakerMsgTerminated;
    FOnPhoneme        : TOnESpeakerPhoneme;
    FOnSampleRate     : TOnESpeakerSampleRate;
    FOnWaveData       : TOnESpeakerWaveData;
    FOnCompleted      : TOnESpeakerCompleted;
  private
    function    GetESpeakInitialized: Boolean;
    function    GetESpeakIsPlaying  : Boolean;
    function    GetESpeakVersion    : string;
    function    GetESpeakDllPath    : string;
    function    GetSampleRate       : LongInt;
    function    GetVoiceCount       : Integer;
    function    GetVoiceName        ( anIndex: Integer): string;
    function    GetVoiceIdentifier  ( anIndex: Integer): string;
    procedure   SetSelectedVoice    ( aValue : Integer);
    function    GetLanguageCount    : Integer;
    function    GetLanguage( anIndex: Integer): string;
    procedure   SetSelectedLanguage ( aValue: Integer);
    function    GetVariationCount   : Integer;
    function    GetVariation        ( anIndex: Integer): string;
    procedure   SetSelectedVariation( aValue: Integer);
    procedure   SetRate             ( aValue: Integer);
    procedure   SetVolume           ( aValue: Integer);
    procedure   SetPitch            ( aValue: Integer);
    procedure   SetRange            ( aValue: Integer);
    procedure   SetPunctuation      ( aValue: TESpeakPunctuation);
    procedure   SetCapitals         ( aValue: Integer);
    procedure   SetWordGap          ( aValue: Integer);
  private
    function    HandleEvent( aWaveData: PEShort; aNumSamples: Integer; const anEvent: TESpeakEvent): Integer;
    procedure   Debug   ( const aMsg: string);
    procedure   DebugFmt( const aFmt: string; anArgs: array of const);
    function    Translate( const aMsg: string): string;
  public
    constructor Create;
    destructor  Destroy;                                                                                       override;
    procedure   SelectVoice   ( anIndex: Integer);
    procedure   SelectLanguage( anIndex: Integer);
    procedure   Speak( const aText: string);
    procedure   Stop;
    function    CreateVoiceNames: TStringList;
    function    CreateLanguageNames( aVoiceIndex: Integer): TStringList;
  public
    property    ESpeakInitialized                   : Boolean                   read GetESpeakInitialized;
    property    ESpeakIsPlaying                     : Boolean                   read GetESpeakIsPlaying  ;
    property    ESpeakVersion                       : string                    read GetESpeakVersion    ;
    property    ESpeakDllPath                       : string                    read GetESpeakDllPath    ;
    property    UseTranslation                      : Boolean                   read FUseTranslation       write FUseTranslation;
    property    SampleRate                          : Integer                   read GetSampleRate       ;
    property    VoiceCount                          : Integer                   read GetVoiceCount       ;
    property    VoiceName      [ anIndex : Integer] : string                    read GetVoiceName        ;
    property    VoiceIdentifier[ anIndex : Integer] : string                    read GetVoiceIdentifier  ;
    property    SelectedVoice                       : Integer                   read FSelectedVoice        write SetSelectedVoice   ;
    property    LanguageCount                       : Integer                   read GetLanguageCount    ;
    property    Language       [ anIndex : Integer] : string                    read GetLanguage         ;
    property    SelectedLanguage                    : Integer                   read FSelectedLanguage     write SetSelectedLanguage;
    property    VariationCount                      : Integer                   read GetVariationCount;
    property    Variation      [ anIndex : Integer] : string                    read GetVariation        ;
    property    SelectedVariation                   : Integer                   read FSelectedVariation    write SetSelectedVariation;
    property    Rate                                : Integer                   read FRate                 write SetRate            ;
    property    Volume                              : Integer                   read FVolume               write SetVolume          ;
    property    Pitch                               : Integer                   read FPitch                write SetPitch           ;
    property    Range                               : Integer                   read FRange                write SetRange           ;
    property    Punctuation                         : TESpeakPunctuation        read FPunctuation          write SetPunctuation     ;
    property    Capitals                            : Integer                   read FCapitals             write SetCapitals        ;
    property    WordGap                             : Integer                   read FWordGap              write SetWordGap         ;
  public
    property    OnDebug                             : TOnESpeakerDebug          read FOnDebug              write FOnDebug         ;
    property    OnListTerminated                    : TOnESpeakerListTerminated read FOnListTerminated     write FOnListTerminated;
    property    OnWord                              : TOnESpeakerWord           read FOnWord               write FOnWord          ;
    property    OnSentence                          : TOnESpeakerSentence       read FOnSentence           write FOnSentence      ;
    property    OnMark                              : TOnESpeakerMark           read FOnMark               write FOnMark          ;
    property    OnPlay                              : TOnESpeakerPlay           read FOnPlay               write FOnPlay          ;
    property    OnEnd                               : TOnESpeakerEnd            read FOnEnd                write FOnEnd           ;
    property    OnMsgTerminated                     : TOnESpeakerMsgTerminated  read FOnMsgTerminated      write FOnMsgTerminated ;
    property    OnPhoneme                           : TOnESpeakerPhoneme        read FOnPhoneme            write FOnPhoneme       ;
    property    OnSampleRate                        : TOnESpeakerSampleRate     read FOnSampleRate         write FOnSampleRate    ;
    property    OnWaveData                          : TOnESpeakerWaveData       read FOnWaveData           write FOnWaveData      ;
    property    OnCompleted                         : TOnESpeakerCompleted      read FOnCompleted          write FOnCompleted     ;
  end;

  // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////



implementation



  // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

type

  TESpeakEvents   = array[ 0 .. MaxInt div SizeOf( TESpeakEvent) - 1] of TESpeakEvent;
  PESpeakEvents   = ^TESpeakEvents;
  TSpeakers       = array of TESpeakSpeaker;
  TOnEspeechEvent = function( const aSender: TObject; aData: PESpeakSamples; aSampleCount: Integer; anEvents: PESpeakEvents): Integer of object;


  TESpeakerVoice = class
  private
    FVoiceData       : TESpeakVoice;
    FVoiceName       : string;
    FVoiceIdentifier : string;
    FLanguages       : array of string;
  private
    procedure   AddLanguage( aPrio: Integer; const aValue: string);
    procedure   ParseVoice;
    function    GetLanguageCount: Integer;
    function    GetLanguage( anIndex: Integer): string;
  public
    constructor Create( const aVoice: TESpeakVoice);
    destructor  Destroy;                                                                                       override;
  public
    property    VoiceData                   : TESpeakVoice read FVoiceData;
    property    VoiceName                   : string       read FVoiceName;
    property    VoiceIdentifier             : string       read FVoiceIdentifier;
    property    LanguageCount               : Integer      read GetLanguageCount;
    property    Language[ anIndex: Integer] : string       read GetLanguage;
  end;


  TESpeakerVoices = class
  private
    FVoices    : TStringList;              // Temporary for sorting voices on their name
    FVoiceList : array of TESpeakerVoice;
  private
    function    GetVoiceCount : Integer;
    function    GetVoiceName( anIndex: Integer): string;
    function    GetVoiceIdentifier( anIndex: Integer): string;
    function    GetLanguageCount( aVoiceIndex: Integer): Integer;
    function    GetLanguage( aVoiceIndex, anIndex: Integer): string;
  public
    constructor Create;
    destructor  Destroy;                                                                                       override;
  public
    property    VoiceCount                                      : Integer read GetVoiceCount;
    property    VoiceName      [ anIndex             : Integer] : string  read GetVoiceName;
    property    VoiceIdentifier[ anIndex             : Integer] : string  read GetVoiceIdentifier;
    property    LanguageCount  [ aVoiceIndex         : Integer] : Integer read GetLanguageCount;
    property    Language       [ aVoiceIndex, anIndex: Integer] : string  read GetLanguage;
  end;


  // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

var

  AudioBufferTime      : Integer = 20;    // Size in ms of audio chunks returned by ESpeak. 0 is default, for 200 ms.
  WantPhonemeCallbacks : Boolean = True;  // Set to False to disable phoneme callbacks
  WantIpaPhonems       : Boolean = True;  // False uses eSpeak phoneme names, True uses IPA phoneme names

  Version              : string  = 'ESpeak not initialized';
  ESpeakActive         : Integer = 0;
  ESpeakSampleRate     : LongInt = -1;
  ESpeakerVoices       : TESpeakerVoices;
  ESpeakerVariations   : array[ 0 .. 19] of string = (
    ''        ,
    'm1'      ,
    'm2'      ,
    'm3'      ,
    'm4'      ,
    'm5'      ,
    'm6'      ,
    'm7'      ,
    'f1'      ,
    'f2'      ,
    'f3'      ,
    'f4'      ,
    'f5'      ,
    'croak'   ,
    'whisper' ,
    'whisperf',
    'klatt'   ,
    'klatt2'  ,
    'klatt3'  ,
    'klatt4'
  );


  // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

  procedure ConfigureModule( anAudioBufferTime: Integer; aWantPhonemeCallBacks, aWantIpaPhonemes: Boolean);
  begin
    AudioBufferTime      := anAudioBufferTime;
    WantPhonemeCallbacks := aWantPhonemeCallBacks;
    WantIpaPhonems       := aWantIpaPhonemes;
  end;


  // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

  function  ESpeakDllName: string;
  begin
    Result := espeak_dllpath;
  end;


  // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

  function  ESpeakApiVersion : Integer;
  begin
    Result := ESPEAK_API_REVISION;
  end;


  // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

  function  ESpeakIsVowel( const aPhoneme: string): Boolean;
  var
    aPh : string;
  begin
    if   aPhoneme[ Length( aPhoneme)] = ':'                // : can be used to lenghten a vowel
    then aPh := Copy( aPhoneme, 1, Length( aPhoneme) - 1)
    else aPh := aPhoneme;

    Result :=
         ( aPh = '@'  ) or ( aPh = '3'  ) or ( aPh = '3:' ) or ( aPh = '@L' )
      or ( aPh = '@2' ) or ( aPh = '@5' ) or ( aPh = 'a'  ) or ( aPh = 'aa' )
      or ( aPh = 'a#' ) or ( aPh = 'A:' ) or ( aPh = 'A@' ) or ( aPh = 'E'  )
      or ( aPh = 'E'  ) or ( aPh = 'e@' ) or ( aPh = 'I'  ) or ( aPh = 'I2' )
      or ( aPh = 'i'  ) or ( aPh = 'i:' ) or ( aPh = 'i@' ) or ( aPh = 'o'  )
      or ( aPh = 'V'  ) or ( aPh = 'u:' ) or ( aPh = 'U'  ) or ( aPh = 'U@' )
      or ( aPh = 'O:' ) or ( aPh = 'O@' ) or ( aPh = 'o@' ) or ( aPh = 'aI' )
      or ( aPh = 'eI' ) or ( aPh = 'OI' ) or ( aPh = 'aU' ) or ( aPh = 'oU' )
      or ( aPh = 'aI@') or ( aPh = 'aU@') or ( aPh = 'e'  ) or ( aPh = 'o'  )
      or ( aPh = 'y'  ) or ( aPh = 'Y'  )
  end;


  // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

  function  ESpeakErrorToStr( anError: TESpeakError): string;
  begin
    case anError of
      EE_OK             : Result := 'ESpeak OK';
      EE_INTERNAL_ERROR : Result := 'ESpeak Internal error';
      EE_BUFFER_FULL    : Result := 'ESpeak buffer full';
      EE_NOT_FOUND      : Result := 'ESpeak not found';
      else                Result := Format( 'ESpeak unknown error (%d)', [ Ord( anError)]);
    end;
  end;



  // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

  function  ESpeakHandler( WaveData: PESHORT; NumSamples: Integer; Events: PESpeakEvent): Integer; cdecl;
  // The common callback handler
  var
    anEvents : PESpeakEvents;
    anEvent  : TESpeakEvent;
    anIndex  : Integer;
    aSpeaker : TESpeakSpeaker;
  begin
    Result   := 0;

    if Assigned( Events)
    then begin
      try
        anEvents := PEspeakEvents( Events);
        anIndex  := 0;

        repeat
          anEvent  := anEvents[ anIndex];
          aSpeaker := TESpeakSpeaker( anEvent.UserData);
          Inc( anIndex);

          if Assigned( anEvent.UserData)
          then Result := Result + aSpeaker.HandleEvent( WaveData, NumSamples, anEvent); // When any handler wants to quit .. quit
        until anEvent.EventType = EV_LIST_TERMINATED;
      except
        on E: Exception
        do Result := 1; // Terminate stuff on any exceptions
      end;

      if   Result <> 0
      then Result := 1;
    end
    else Result := 1;
  end;


  // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

  function  ESpeakUriHandler( EventType: LongInt; Uri: PECHAR; Base: PECHAR): LongInt; cdecl;
  // The URI callback handler.
  // There is no user data nor any ID, so this can not be dispatched to any TESpeakSpeaker.
  // Just tell ESpeak to not use this.
  begin
    Result := 1; // Do not play the sound, let the URI be spoken instead.
  end;


  // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

  function  ESpeakInitialize: Boolean;
  var
    VersionInfo : PECHAR;
    anOptions   : LongInt;
  begin
    if   ESpeakActive = 0
    then begin
      VersionInfo := espeak_Info( nil);
      Version     := string( VersionInfo);
      espeak_SetPhonemeTraceSafe;     // Can't use phoneme tracing

      if   WantPhonemeCallbacks
      then begin
        if   WantIpaPhonems
        then anOptions := 1
        else anOptions := 3;
      end
      else anOptions := 0;

      ESpeakSampleRate := espeak_Initialize( AUDIO_OUTPUT_SYNCHRONOUS, AudioBufferTime, nil, anOptions);

      if   ESpeakSampleRate >= 0
      then begin
        if not Assigned( ESpeakerVoices)
        then ESpeakerVoices := TESpeakerVoices.Create;

        espeak_SetSynthCallback( ESpeakHandler   );
        espeak_SetUriCallback  ( ESpeakUriHandler);
        Inc( ESpeakActive);
      end;
    end
    else Inc( ESpeakActive);

    Result := ESpeakActive > 0;
  end;


  // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

  procedure ESpeakUnInitialize( Forced: Boolean);
  // False result indicates a failure
  begin
    if   ESpeakActive > 0
    then begin
      Dec( ESpeakActive);

      if ESpeakActive <= 0
      then begin
        if   ( ESpeakActive     > 0)
        and  ( espeak_IsPlaying = 1)
        then espeak_Terminate;

        espeak_SetSynthCallback( nil);
        espeak_SetUriCallback  ( nil);
        espeak_terminate;
        ESpeakSampleRate := -1;
        ESpeakActive     := 0;
        FreeAndNil( ESpeakerVoices);
      end;
    end
  end;


  // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

{ ========
  TESpeakerVoice = class
  private
    FVoiceData       : TESpeakVoice;
    FVoiceName       : string;
    FVoiceIdentifier : string;
    FLanguages       : array of string;
  public
    property    VoiceData                   : TESpeakVoice read FVoiceData;
    property    VoiceName                   : string       read FVoiceName;
    property    VoiceIdentifier             : string       read FVoiceIdentifier;
    property    LanguageCount               : Integer      read GetLanguageCount;
    property    Language[ anIndex: Integer] : string       read GetLanguage;
  private
}

    procedure   TESpeakerVoice.AddLanguage( aPrio: Integer; const aValue: string);
    begin
      SetLength( FLanguages, LanguageCount + 1);
      FLanguages[ LanguageCount - 1] := aValue;
    end;


    procedure   TESpeakerVoice.ParseVoice;
    type
      TLBytes = array[ 0 .. ( Maxint div SizeOf( Byte)) - 1] of Byte;
      PLBytes = ^TLBytes;
    var
      aLanguages : PEChar;
      aPrio      : Byte;
      aByte      : Byte;
      S          : AnsiString;
      C          : AnsiChar;
      p          : integer;
    begin
      FVoiceName       := string( FVoiceData.Name);
      FVoiceIdentifier := string( FVoiceData.Identifier);
      aLanguages       := FVoiceData.Languages;

      p      := 0;
      S      := '';
      aByte  := PLBytes( aLanguages)^[ p];

      while aByte <> 0
      do begin
        aPrio := aByte;
        Inc( p);
        aByte := PLBytes( aLanguages)^[ p];

        while aByte <> 0
        do begin
          C := AnsiChar( aByte);
          S := S + C;
          Inc( p);
          aByte := PLBytes( aLanguages)^[ p];
        end;

        AddLanguage( aPrio, string( S));
        S := '';
        Inc( p);
        aByte := PLBytes( aLanguages)^[ p];
      end;
    end;


    function    TESpeakerVoice.GetLanguageCount: Integer;
    begin
      Result := Length( FLanguages);
    end;


    function    TESpeakerVoice.GetLanguage( anIndex: Integer): string;
    begin
      if   ( anIndex >= 0            )
      and  ( anIndex < LanguageCount)
      then Result := FLanguages[ anIndex]
      else Result := '';
    end;


//  public

    constructor TESpeakerVoice.Create( const aVoice: TESpeakVoice);
    begin
      inherited Create;
      FVoiceData := aVoice;
      ParseVoice;
    end;


    destructor  TESpeakerVoice.Destroy; // override;
    begin
      inherited;
    end;


  // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

{ ========
  TESpeakSpeaker = class
  private
}

    function    TESpeakSpeaker.GetESpeakInitialized: Boolean;
    begin
      Result := ESpeakActive > 0;
    end;


    function    TESpeakSpeaker.GetESpeakIsPlaying: Boolean;
    begin
      Result := ESpeakInitialized and ( espeak_IsPlaying = 1);
    end;


    function    TESpeakSpeaker.GetESpeakVersion: string;
    begin
      Result := Version;
    end;


    function    TESpeakSpeaker.GetESpeakDllPath: string;
    begin
      Result := espeak_dllpath;
    end;


    function    TESpeakSpeaker.GetSampleRate: LongInt;
    begin
      Result := ESpeakSampleRate;
    end;


    function    TESpeakSpeaker.GetVoiceCount: Integer;
    begin
      if   Assigned( ESpeakerVoices)
      then Result := ESpeakerVoices.VoiceCount
      else Result := 0;
    end;


    function    TESpeakSpeaker.GetVoiceName( anIndex: Integer): string;
    begin
      if   Assigned( ESpeakerVoices)
      then Result := ESpeakerVoices.VoiceName[ anIndex]
      else Result := 'Espeaker voices not assigned';
    end;


    function    TESpeakSpeaker.GetVoiceIdentifier( anIndex: Integer): string;
    begin
      if   Assigned( ESpeakerVoices)
      then Result := ESpeakerVoices.VoiceIdentifier[ anIndex]
      else Result := 'Espeaker voices not assigned';
    end;


    procedure   TESpeakSpeaker.SetSelectedVoice( aValue: Integer);
    begin
      if   Assigned( ESpeakerVoices)
      and  ( aValue >= 0)
      and  ( aValue < VoiceCount)
      then FSelectedVoice := aValue
      else FSelectedVoice := -1;
    end;


    function    TESpeakSpeaker.GetLanguageCount: Integer;
    begin
      if   Assigned( ESpeakerVoices)
      and  ( SelectedVoice >= 0        )
      and  ( SelectedVoice < VoiceCount)
      then Result := ESpeakerVoices.LanguageCount[ SelectedVoice]
      else Result := 0;
    end;


    function    TESpeakSpeaker.GetLanguage( anIndex: Integer): string;
    begin
      if   Assigned( ESpeakerVoices)
      and  ( SelectedVoice >= 0        )
      and  ( SelectedVoice < VoiceCount)
      and  ( anIndex       >= 0                                          )
      and  ( anIndex       < ESpeakerVoices.LanguageCount[ SelectedVoice])
      then Result := ESpeakerVoices.Language[ SelectedVoice, anIndex]
      else Result := 'Invalid language selection';
    end;


    procedure   TESpeakSpeaker.SetSelectedLanguage( aValue: Integer);
    begin
      if   ( SelectedVoice >= 0        )
      and  ( SelectedVoice < VoiceCount)
      and  ( aValue        >= 0                                          )
      and  ( aValue        < ESpeakerVoices.LanguageCount[ SelectedVoice])
      then FSelectedLanguage := aValue
      else FSelectedLanguage := -1;
    end;


    function    TESpeakSpeaker.GetVariationCount: Integer;
    begin
      Result := Length( ESpeakerVariations);
    end;


    function    TESpeakSpeaker.GetVariation( anIndex: Integer): string;
    begin
      if   ( anIndex >= 0)
      and  ( anIndex < VariationCount)
      then Result := ESpeakerVariations[ anIndex]
      else Result := '';
    end;


    procedure   TESpeakSpeaker.SetSelectedVariation( aValue: Integer);
    begin
      if   ( aValue >= 0)
      and  ( aValue < VariationCount)
      then FSelectedVariation := aValue
      else FSelectedVariation := -1;
    end;


    procedure   TESpeakSpeaker.SetRate( aValue: Integer);
    begin
      if   ( aValue >= ES_RATE_MINIMUM)
      and  ( aValue <= ES_RATE_MAXIMUM)
      then FRate := aValue;
    end;


    procedure   TESpeakSpeaker.SetVolume( aValue: Integer);
    begin
      if   ( aValue >=   0)
      and  ( aValue <= 500)
      then FVolume := aValue;
    end;


    procedure   TESpeakSpeaker.SetPitch( aValue: Integer);
    begin
      if   ( aValue >=   0)
      and  ( aValue <= 100)
      then FPitch := aValue;
    end;


    procedure   TESpeakSpeaker.SetRange( aValue: Integer);
    begin
      if   ( aValue >=   0)
      and  ( aValue <= 100)
      then FRange := aValue;
    end;


    procedure   TESpeakSpeaker.SetPunctuation( aValue: TESpeakPunctuation);
    begin
      FPunctuation := aValue;
    end;


    procedure   TESpeakSpeaker.SetCapitals( aValue: Integer);
    begin
      if   aValue >= 0
      then FCapitals := aValue;
    end;


    procedure   TESpeakSpeaker.SetWordGap( aValue: Integer);
    begin
      if   aValue >= 0
      then FWordGap := aValue;
    end;


//  private

    function    TESpeakSpeaker.HandleEvent( aWaveData: PEShort; aNumSamples: Integer; const anEvent: TESpeakEvent): Integer;
    var
      aPhoneme : string;
      i        : Integer;
    begin
      Result := 0;

      if Assigned( aWaveData)
      then begin
        case anEvent.EventType of

          EV_LIST_TERMINATED : // = 0;  // Retrieval mode: terminates the event list.

            begin
              if Assigned( FOnListTerminated)
              then Result := FOnListTerminated( Self);
            end;

          EV_WORD            : // = 1,  // Start of word

            begin
              if Assigned( FOnWord)
              then Result := FonWord( Self, anEvent.ID.Number)
              else DebugFmt( 'WORD %d', [ anEvent.ID.Number]);
            end;

          EV_SENTENCE        : // = 2,  // Start of sentence

            begin
              if Assigned( FOnSentence)
              then Result := FonSentence( Self, anEvent.ID.Number)
              else DebugFmt( 'SENTENCE %d', [ anEvent.ID.Number]);
            end;

          EV_MARK            : // = 3,  // Mark

            begin
              if Assigned( FOnMark)
              then Result := FonMark( Self, string( anEvent.ID.Name))
              else DebugFmt( 'MARK "%s"', [ string( anEvent.ID.Name)]);
            end;

          EV_PLAY            : // = 4,  // Audio element

            begin
              if Assigned( FOnPlay)
              then Result := FonPlay( Self, string( anEvent.ID.Name))
              else DebugFmt( 'PLAY "%s"', [ string( anEvent.ID.Name)]);
            end;

          EV_END             : // = 5,  // End of sentence or clause

            begin
              if Assigned( FOnEnd)
              then Result := FonEnd( Self)
              else Debug( 'END');
            end;

          EV_MSG_TERMINATED  : // = 6,  // End of message

            begin
              if Assigned( FOnMsgTerminated)
              then Result := FonMsgTerminated( Self)
              else Debug( 'MSG_TERMINATED');
            end;

          EV_PHONEME         : // = 7,  // Phoneme, if enabled in espeak_Initialize()

            begin
              aPhoneme := '';

              for i := 0 to 7
              do begin
                if anEvent.ID._string[ i] = 0
                then Break
                else aPhoneme := aPhoneme + Char( anEvent.ID._string[ i]);
              end;

              if Assigned( FOnPhoneme)
              then Result := FonPhoneme( Self, aPhoneme)
              else DebugFmt( 'PHONEME "%s"', [ aPhoneme]);
            end;

          EV_SAMPLERATE      : // = 8   // internal use, set sample rate

            begin
              ESpeakSampleRate := anEvent.ID.Number;

              if Assigned( FOnSampleRate)
              then Result := FonSampleRate( Self, SampleRate)
              else DebugFmt( 'SAMPLERATE %d', [ SampleRate]);
            end;

          else begin
            DebugFmt( 'Unhandled event type %d', [ Ord( anEvent.EventType)]);
            Result := 1;
          end;
        end;

        if   aNumSamples > 0
        then begin
          if Assigned( FOnWaveData)
          then Result := FonWaveData( Self, PESpeakSamples( aWaveData), aNumSamples)
          else begin
            if   SampleRate > 0
            then DebugFmt( 'WAVE_DATA %d samples, %g s', [ aNumSamples, aNumSamples / SampleRate])
            else DebugFmt( 'WAVE_DATA %d samples (no sample rate set yet)', [ aNumSamples]);
          end;
        end;
      end
      else begin
        if   Assigned( FOnCompleted)
        then FOnCompleted( Self)
        else Debug( 'COMPLETED');
      end;
    end;


    procedure   TESpeakSpeaker.Debug( const aMsg: string);
    begin
      if   Assigned( FOnDebug)
      then FOnDebug( Self, aMsg);
    end;


    procedure   TESpeakSpeaker.DebugFmt( const aFmt: string; anArgs: array of const);
    begin
      Debug( Format( aFmt, anArgs));
    end;


    function    TESpeakSpeaker.Translate( const aMsg: string): string;
    // Translate shorthand markers into ones understood by eSpeak, and
    // later on by a valued <mark> tag interpreter.
    // {!}         -> <MARK NAME="1"/>
    // {!<number>} -> <MARK NAME="user1=<number>"/>
    type
      TTranslationState = (
        tsNone       ,
        tsInBracket  ,
        tsExclamation,
        tsInNumber
      );
    var
      TranslationState : TTranslationState;
      i                : Integer;
      C                : Char;
      StartPoint       : Integer;
      Num              : string;
      Val              : Double;
    begin
      if UseTranslation
      then begin
        Result           := '';
        Num              := '';
        StartPoint       := 0;
        TranslationState := tsNone;

        for i := Low( aMsg) to High( aMsg)
        do begin
          C := aMsg[ i];

          case TranslationState of

            tsNone :
              begin
                if   C = '{'
                then TranslationState := tsInBracket
                else Result := Result + C;
              end;

            tsInBracket :
              begin
                if   C = '!'
                then TranslationState := tsExclamation
                else begin
                  Result           := Result + '{' + C;
                  TranslationState := tsNone;
                end;
              end;

            tsExclamation :
              begin
                if   C = '}'
                then begin
                  Result           := Result + '<mark name="1"/>';
                  TranslationState := tsNone;
                end
                else begin
                  Num              := '';
                  StartPoint       := i;
                  TranslationState := tsInNumber;
                end;
              end;

            tsInNumber :
              begin
                if   C = '}'
                then begin
                  Val := StrToFloatDef( Num, NaN);

                  if   IsNan( Val)
                  then Result := Result + '{!' + Copy( aMsg, StartPoint, i - StartPoint + 1)
                  else Result := Result + Format( '<mark name="user1=%g"/>', [ Val]);

                  TranslationState := tsNone;
                end
                else Num := Num + C;
              end;

          end;
        end;

        if   TranslationState = tsInNumber
        then Result := Result + '{!' + Copy( aMsg, StartPoint, Length( aMsg));
      end
      else Result := aMsg;
    end;


//  public

    constructor TESpeakSpeaker.Create;
    begin
      inherited Create;

      if not ESpeakInitialize
      then Debug( 'Could not initialize eSpeak');

      FUseTranslation   := False;
      FSelectedVoice    :=  -1;
      FSelectedLanguage :=  -1;
      FRate             := ES_RATE_NORMAL;
      FVolume           := 100;
      FPitch            :=  50;
      FRange            :=  50;
      FCapitals         :=   0;
      FWordGap          :=   5; // in 10 ms units .. so 5 would mean 50 ms
      FPunctuation      := esPunctNone;
    end;


    destructor  TESpeakSpeaker.Destroy; // override;
    begin
      ESpeakUnInitialize( False);
      Inherited;
    end;


    procedure   TESpeakSpeaker.SelectVoice( anIndex: Integer);
    begin
      FSelectedVoice := anIndex;
    end;


    procedure   TESpeakSpeaker.SelectLanguage( anIndex: Integer);
    begin
      FSelectedLanguage := anIndex;
    end;


    procedure   TESpeakSpeaker.Speak( const aText: string);
    var
      anError    : TESpeakError;
      aVoiceName : string;
      aVariation : string;
      aMessage   : string;
    begin
      aMessage := Translate( aText);

      if SelectedLanguage >= 0
      then aVoiceName := Language [ SelectedLanguage]
      else aVoiceName := VoiceName[ SelectedVoice   ];

      aVariation := Variation[ SelectedVariation];

      if aVariation <> ''
      then aVoiceName := aVoiceName + '+' + aVariation;

      anError := espeak_SetVoiceByName( PEChar( AnsiString( aVoiceName)));

      if anError <> EE_OK
      then DebugFmt( 'Error in selecting voice %d', [ FSelectedVoice]);

      espeak_SetParameter( ES_PUNCTUATION, Ord( FPunctuation), 0);
      espeak_SetParameter( ES_RATE       , FRate             , 0);
      espeak_SetParameter( ES_VOLUME     , FVolume           , 0);
      espeak_SetParameter( ES_PITCH      , FPitch            , 0);
      espeak_SetParameter( ES_RANGE      , FRange            , 0);
      espeak_SetParameter( ES_CAPITALS   , FCapitals         , 0);
      espeak_SetParameter( ES_WORDGAP    , FWordGap          , 0);

      anError := espeak_Synth( PChar( aMessage), Length( aMessage), 0, POS_CHARACTER, 0, EF_CHARS_WCHAR or EF_SSML or EF_PHONEMES, nil, Self);

      if anError <> EE_OK
      then DebugFmt( 'Error speaking text: %s', [ ESpeakErrorToStr( anError)]);
    end;


    procedure   TESpeakSpeaker.Stop;
    begin
      espeak_Cancel;
    end;


    function    TESpeakSpeaker.CreateVoiceNames: TStringList;
    var
      i : Integer;
    begin
      Result := TStringList.Create;

      for i := 0 to VoiceCount - 1
      do Result.Add( VoiceName[ i]);
    end;


    function    TESpeakSpeaker.CreateLanguageNames( aVoiceIndex: Integer): TStringList;
    var
      i : Integer;
    begin
      Result        := TStringList.Create;
      SelectedVoice := aVoiceIndex;

      for i := 0 to LanguageCount - 1
      do Result.Add( Language[ i]);;
    end;


  // ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

{ ========
  TESpeakerVoices = class
  private
    FVoices    : TStringList;
    FVoiceList : array of TSpeakerVoice;
  public
    property    VoiceCount                                      : Integer read GetVoiceCount;
    property    VoiceName      [ anIndex             : Integer] : string  read GetVoiceName;
    property    VoiceIdentifier[ anIndex             : Integer] : string  read GetVoiceIdentifier;
    property    LanguageCount  [ aVoiceIndex         : Integer] : Integer read GetLanguageCount;
    property    Language       [ aVoiceIndex, anIndex: Integer] : string  read GetLanguage;
  private
}

    function    TESpeakerVoices.GetVoiceCount : Integer;
    begin
      Result := Length( FVoiceList);
    end;


    function    TESpeakerVoices.GetVoiceName( anIndex: Integer): string;
    begin
      if   ( anIndex >= 0)
      and  ( anIndex < VoiceCount)
      then Result := FVoiceList[ anIndex].VoiceName
      else Result := 'Invalid voice index';
    end;


    function    TESpeakerVoices.GetVoiceIdentifier( anIndex: Integer): string;
    begin
      if   ( anIndex >= 0)
      and  ( anIndex < VoiceCount)
      then Result := FVoiceList[ anIndex].VoiceIdentifier
      else Result := 'Invalid voice index';
    end;


    function    TESpeakerVoices.GetLanguageCount( aVoiceIndex: Integer): Integer;
    begin
      if   ( aVoiceIndex >= 0)
      and  ( aVoiceIndex < VoiceCount)
      then Result := FVoiceList[ aVoiceIndex].LanguageCount
      else Result := -1;
    end;


    function    TESpeakerVoices.GetLanguage( aVoiceIndex, anIndex: Integer): string;
    begin
      if   ( aVoiceIndex >= 0)
      and  ( aVoiceIndex < VoiceCount)
      then begin
        if   ( anIndex >= 0)
        and  ( anIndex < LanguageCount[ aVoiceIndex])
        then Result := FVoiceList[ aVoiceIndex].Language[ anIndex]
        else Result := 'Invalid language index';
      end
      else Result := 'Invalid voice index';
    end;


//  public

    constructor TESpeakerVoices.Create;
    var
      aVoice : PPESpeakVoice;
      i      : Integer;
    begin
      inherited Create;
      FVoices := TStringList.Create;
      FVoices.Sorted        := True;
      FVoices.CaseSensitive := False;
      FVoices.Duplicates    := dupAccept;
      FVoices.OwnsObjects   := False;

      aVoice := espeak_ListVoices( nil);     // List all voices

      while assigned( aVoice^)
      do begin
        FVoices.AddObject( string( aVoice^^.Name), TESpeakerVoice.Create( aVoice^^));
        Inc( aVoice);
      end;

      for i := 0 to FVoices.Count - 1
      do begin
        SetLength( FVoiceList, VoiceCount + 1);
        FVoiceList[ VoiceCount - 1] := TESpeakerVoice( FVoices.Objects[ i]);
      end;

      FreeAndNil( FVoices);
    end;


    destructor  TESpeakerVoices.Destroy; // override;
    var
      i : Integer;
    begin
      FreeAndNil( FVoices); // Just in case ...

      for i := 0 to VoiceCount - 1
      do FreeAndNil( FVoiceList[ i]);

      SetLength( FVoiceList, 0);
      inherited;
    end;


initialization

finalization

  ESpeakUnInitialize( True);

end.

