crystal/tcpsocket: add aws and gcs voice rendering to tcp protocol

author: Joe Rayhawk <jrayhawk+git@omgwallhack.org> 2024-01-29 12:17:47 -0800
committer: Joe Rayhawk <jrayhawk@fairlystable.org> 2024-01-29 13:42:43 -0800
commit: b0a607fc32d12a8195743d402d1bd69664526b3c (patch)
tree: 1be36acbbafc60d0076a682d63d0666e1ab90d15 /crystal
parent: d89da9fdc2dfad70d80304da728bcc57438a0a63 (diff)
download: twitchtools-b0a607fc32d12a8195743d402d1bd69664526b3c.tar.gz
twitchtools-b0a607fc32d12a8195743d402d1bd69664526b3c.zip
1 files changed, 134 insertions, 60 deletions
diff --git a/crystal/tcpsocket.cr b/crystal/tcpsocket.cr
index a73b37c..67e036c 100644
--- a/crystal/tcpsocket.cr
+++ b/crystal/tcpsocket.cr
@@ -22,6 +22,23 @@ struct Nil
   end
 end
 
+class OpenSSL::SSL::Socket::Client
+  def fill_read( slice : Bytes )
+    datasize = slice.size
+    datarcvdtotal = UInt32.new( 0 )
+    data = Bytes.new( 0 )
+    while datarcvdtotal < datasize
+      # OpenSSL only unbuffered_read's TLS records of max size 16384, so we may have to reassemble
+      data_buffer = Bytes.new( datasize )
+      datarcvd = self.unbuffered_read( data_buffer )
+      datarcvdtotal = ( datarcvdtotal + datarcvd )
+      data = data + data_buffer[0..datarcvd-1]
+      data_buffer = Bytes.new( datasize - datarcvdtotal )
+    end
+    slice.copy_from( data )
+  end
+end
+
 macro testrefuser2uid( path )
   {% if flag?(:windows) %}
     File.exists?( {{path}} ) && ( File.read( {{ path }} ) =~ /^[0-9]+$/ )
@@ -175,10 +192,6 @@ else
   end
 end
 
-# FIXME: might need twitch_channel_id eventually ...?
-if twitchapi
-end
-
 # enable direct gcloud api?
 if secrets.gcloud_token
   gcloud = true
@@ -192,14 +205,15 @@ if ! File.exists?( Path.home./("/.aws/credentials") )
   # FIXME: work out where this is on Windows
   STDERR.puts "Warning: #{Path.home}/.aws/credentials is missing; AWS voices disabled."
   aws = false
-elsif ! Process.find_executable( "aws.exe" )
-  STDERR.puts "Warning: aws.exe is missing; AWS voices disabled."
+elsif ! Process.find_executable( "aws.exe" ) && ! Process.find_executable( "aws" )
+  STDERR.puts "Warning: aws CLI executable is missing; AWS voices disabled."
   aws = false
 else
   aws = true
 end
 
 # enable microsoft speech services?
+# TODO: download and msiexec /i https://www.microsoft.com/en-us/download/details.aspx?id=27224
 mss : Bool
 {% if flag?(:windows) %}
     mss = true
@@ -234,7 +248,6 @@ bbscliipc    = Channel( String ).new
 bbssrvipc    = Channel( String ).new
 waitgroup    = Channel( String ).new
 
-
 fiberipc   = Channel( Fiber  ).new
 fibers = Hash( String, Fiber ).new
 
@@ -365,7 +378,7 @@ def generatevoicelistaws( )
   return voices
 end
 
-def generatevoicelistgcs( gcloud_token )
+def generatevoicelistgcs( gcloud_token : String )
   voices = Array(String).new
   ssl_context = OpenSSL::SSL::Context::Client.new
   headers = HTTP::Headers.new
@@ -421,6 +434,14 @@ def regeneratevoicelist( defaultsettings : Hash( String, String ), aws : Bool, g
   return voices
 end
 
+# TODO: add piping into mpv on POSIX
+def playaudiodata( tempdir, data : Bytes )
+  filepath="#{tempdir}#{Time.utc.to_unix_ms}.mp3"
+  File.write( filepath, data )
+  playaudiofile( filepath )
+  File.delete( filepath )
+end
+
 def playaudiofile( filepath : String )
   p = Process.new(
     "powershell.exe",
@@ -643,6 +664,7 @@ spawn name: "command_dispatch" do
                     end
                     if text2speech
                       if ( exec.func == "text_to_speech" )
+                        puts "Exec-ing text_to_speech"
                         if ( t2sreturn = t2s( t2sipc, config, userdir, chatuser, message.params[1] ) )
                           lastvoice.insert( 0, t2sreturn )
                           lastvoice = lastvoice[0..4]
@@ -1002,6 +1024,40 @@ end
 fiber = fiberipc.receive
 fibers[fiber.name.not_nil!] = fiber
 
+def ttsgcs( languagecode : String, voice : String, text : String, gcskey : String ) : Bytes
+  request = Hash( String, Hash( String, String ) ){
+    "input" => { "text" => text },
+    "audioConfig" => { "audioEncoding" => "MP3" },
+    "voice" => {
+      "name" => voice,
+      "languageCode" => languagecode,
+    },
+  }
+  ssl_context = OpenSSL::SSL::Context::Client.new
+  #ssl_context.verify_mode = OpenSSL::SSL::VerifyMode::NONE
+
+  headers = HTTP::Headers.new
+  headers["Content-Type"] = "application/json; charset=utf-8"
+
+  response = HTTP::Client.exec( "POST", "https://texttospeech.googleapis.com/v1/text:synthesize?key=#{gcskey}", headers, request.to_json, tls: ssl_context )
+
+  json=JSON.parse(response.body)
+  return Base64.decode( json["audioContent"].as_s )
+end
+
+def ttsaws( filepath : String, voice : String, text : String )
+  p = Process.new(
+    "aws", [
+      "polly", "synthesize-speech",
+      "--output-format", "mp3",
+      "--voice-id", voice,
+      "--text", text,
+      filepath
+    ], output: STDOUT, error: STDERR
+  )
+  p.wait
+end
+
 
 # Put tts stuff into the same thread so each playback blocks the next
 spawn name: "text2speech" do
@@ -1030,46 +1086,30 @@ spawn name: "text2speech" do
           p.input.puts text
           p.input.close
           p.wait
-        elsif gcloud && ( match = voice.match( /^([a-zA-Z]{2,3}-[a-zA-Z]{2})/ ) ) # Google cloud voice
-          request = Hash( String, Hash( String, String ) ){
-            "input" => { "text" => text },
-            "audioConfig" => { "audioEncoding" => "MP3" },
-            "voice" => {
-              "name" => voice,
-              "languageCode" => match[1],
-            },
-          }
-          ssl_context = OpenSSL::SSL::Context::Client.new
-          #ssl_context.verify_mode = OpenSSL::SSL::VerifyMode::NONE
-
-          headers = HTTP::Headers.new
-          headers["Content-Type"] = "application/json; charset=utf-8"
-
-          response = HTTP::Client.exec( "POST", "https://texttospeech.googleapis.com/v1/text:synthesize?key=#{secrets.gcloud_token}", headers, request.to_json, tls: ssl_context )
-
-          response.body
-
-          filepath="#{config.tempdir}#{Time.utc.to_unix_ms}.mp3"
-          json=JSON.parse(response.body)
-          File.write( filepath, Base64.decode_string( json["audioContent"].as_s ) )
-          playaudiofile( filepath )
-          File.delete( filepath )
-        elsif aws # AWS polly voices
-          filepath="#{config.tempdir}#{Time.utc.to_unix_ms}.mp3"
-          p = Process.new(
-            "aws.exe", [
-              "polly", "synthesize-speech",
-              "--output-format", "mp3",
-              "--voice-id", voice,
-              "--text", text,
-              filepath
-            ], output: STDOUT, error: STDERR
-          )
-          p.wait
-          playaudiofile( filepath )
-          File.delete( filepath )
-        else # unknown
-          STDERR.puts "Voice not recognized or available."
+        elsif ( match = voice.match( /^([a-zA-Z]{2,3}-[a-zA-Z]{2})/ ) )
+          if ( gcloud_token = secrets.gcloud_token ).is_a?( String ) # Google cloud voice
+            mp3data = ttsgcs( match[1], voice, text, gcloud_token )
+            playaudiodata( config.tempdir, mp3data )
+          elsif fibers["BungmoBott::Socket client"]?
+            bbscliipc.send( "gcst2s #{voice} #{text}" )
+            # The rest of this is dealt with in the BungmoBott::Socket client
+          else
+            STDERR.puts( "ERROR: google cloud voice requested, but no gcloud_token or BungmoBott::Socket client is available" )
+          end
+        else
+          if aws # AWS polly voices
+            filepath="#{config.tempdir}#{Time.utc.to_unix_ms}.mp3"
+            ttsaws( filepath, voice, text )
+            playaudiofile( filepath )
+            File.delete( filepath )
+          elsif fibers["BungmoBott::Socket client"]?
+            bbscliipc.send( "awst2s #{voice} #{text}" )
+            # The rest of this is dealt with in the BungmoBott::Socket client
+          else
+            STDERR.puts( "ERROR: aws polly voice requested, but no aws CLI executable or BungmoBott::Socket client is available" )
+          end
+        #else # unknown
+        #  STDERR.puts "Voice not recognized or available."
         end
       end
     rescue ex
@@ -1248,14 +1288,23 @@ if config.bungmobott_connect
       spawn do
         while message = ssl_socket.gets
           puts "BungmoBott::Socket cli recv: " + message.gsub( bungmobott_key, "CENSORED" )
-          case message
-          when /^error/
+          if    message =~ /^error/i
             raise Exception.new("BungmoBott::Socket Error: #{message}")
-          when /^authed/
+          elsif message =~ /^authed/
             negotiated = true
             ssl_socket.puts( "say twitch #{user} test" )
-          when /^msg twitch/
+          elsif message =~ /^msg twitch/
             commandircipc.send( { "twitch", FastIRC.parse_line( message.split(" ")[2..].join(" ") ) } )
+          elsif ( match = message.match( /^awst2s ([0-9]+)/ ) )
+            datasize = match[1].to_u32
+            audiodata = Bytes.new( datasize )
+            ssl_socket.fill_read( audiodata )
+            playaudiodata( config.tempdir, audiodata )
+          elsif ( match = message.match( /^gcst2s ([0-9]+)/ ) )
+            datasize = match[1].to_u32
+            audiodata = Bytes.new( datasize )
+            ssl_socket.fill_read( audiodata )
+            playaudiodata( config.tempdir, audiodata )
           end
         end
       end
@@ -1264,6 +1313,7 @@ if config.bungmobott_connect
         # ssl_socket gets redefined in the event of I/O errors, so we deal with it here.
         #if ( climsg = input.match( /^irc +twitch +JOIN \#+([a-zA-Z0-9_]+) *$/ ) )
         #end
+        puts( "bbscli tx: " + input )
         ssl_socket.puts( input )
       end
     end
@@ -1327,14 +1377,38 @@ if config.bungmobott_listen
                 channelsubs[ { ircservice, "#" + ircchannel } ].push( client )
                 pp channelsubs
               end
-            elsif ( match = message.match( /^awst2s (#{regexvoice}) (.+)/i ) )
-              # FIXME # powershell doesn't take bytestreams; maybe use http file hosting for this?
-            elsif ( match = message.match( /^gcst2s (#{regexvoice}) (.+)/i ) )
-              # FIXME
-            elsif ( match = message.match( /^awsvoicelist$/i ) )
-              client.puts "awsvoicelist " + generatevoicelistaws().join(" ")
-            elsif ( match = message.match( /^gcsvoicelist$/i ) )
-              client.puts "gcsvoicelist " + generatevoicelistgcs( secrets.gcloud_token ).join(" ")
+            elsif ( message =~ /^aws/i)
+              if aws
+                if ( match = message.match( /^awsvoicelist$/i ) )
+                  client.puts "awsvoicelist " + generatevoicelistaws().join(" ")
+                elsif ( match = message.match( /^awst2s ([a-zA-Z-]+) (.+)/i ) )
+                  filepath="#{config.tempdir}#{Time.utc.to_unix_ms}.mp3"
+                  ttsaws( filepath, match[1], match[2] )
+                  mp3datasize = File.size( filepath )
+                  mp3data = Bytes.new( mp3datasize )
+                  content = File.open( filepath ) do |file|
+                    file.read( mp3data )
+                  end
+                  client.puts "awst2s #{mp3data.size}"
+                  STDOUT.puts "SENT: awst2s #{mp3data.size}"
+                  client.unbuffered_write( mp3data ) # Normal writes create TLS records of 4608 bytes. Unbuffered_writes create maximum 16384 byte records that need to be reassembled on the read end.
+                  STDOUT.puts "SENT: mp3data"
+                end
+              end
+            elsif ( message =~ /^gcs/i)
+              if ( gcloud_token = secrets.gcloud_token ).is_a?( String )
+                if ( match = message.match( /^gcsvoicelist$/i ) )
+                  client.puts "gcsvoicelist " + generatevoicelistgcs( ( gcloud_token ) ).join(" ")
+                elsif ( match = message.match( /^gcst2s (([a-zA-Z]{2,3}-[a-zA-Z]{2})[a-zA-Z-]+) (.+)/i ) )
+                  mp3data = ttsgcs( match[2], match[1], match[3], gcloud_token )
+                  client.puts "gcst2s #{mp3data.size}"
+                  STDOUT.puts "SENT: gcst2s #{mp3data.size}"
+                  client.unbuffered_write( mp3data )
+                  STDOUT.puts "SENT: mp3data"
+                end
+              else
+                client.puts "ERROR: gcloud_token missing, gcs commands disabled."
+              end
             elsif ( match = message.match( /^say twitch (.+)/i ) )
               say( twitchircipc, connections[client]["user"], match[1] )
             elsif ( message =~ /testchannelsubs/ )
author	Joe Rayhawk <jrayhawk+git@omgwallhack.org>	2024-01-29 12:17:47 -0800
committer	Joe Rayhawk <jrayhawk@fairlystable.org>	2024-01-29 13:42:43 -0800
commit	b0a607fc32d12a8195743d402d1bd69664526b3c (patch)
tree	1be36acbbafc60d0076a682d63d0666e1ab90d15 /crystal
parent	d89da9fdc2dfad70d80304da728bcc57438a0a63 (diff)
download	twitchtools-b0a607fc32d12a8195743d402d1bd69664526b3c.tar.gz twitchtools-b0a607fc32d12a8195743d402d1bd69664526b3c.zip