From b0a607fc32d12a8195743d402d1bd69664526b3c Mon Sep 17 00:00:00 2001 From: Joe Rayhawk Date: Mon, 29 Jan 2024 12:17:47 -0800 Subject: crystal/tcpsocket: add aws and gcs voice rendering to tcp protocol --- crystal/tcpsocket.cr | 194 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 134 insertions(+), 60 deletions(-) (limited to 'crystal/tcpsocket.cr') diff --git a/crystal/tcpsocket.cr b/crystal/tcpsocket.cr index a73b37c..67e036c 100644 --- a/crystal/tcpsocket.cr +++ b/crystal/tcpsocket.cr @@ -22,6 +22,23 @@ struct Nil end end +class OpenSSL::SSL::Socket::Client + def fill_read( slice : Bytes ) + datasize = slice.size + datarcvdtotal = UInt32.new( 0 ) + data = Bytes.new( 0 ) + while datarcvdtotal < datasize + # OpenSSL only unbuffered_read's TLS records of max size 16384, so we may have to reassemble + data_buffer = Bytes.new( datasize ) + datarcvd = self.unbuffered_read( data_buffer ) + datarcvdtotal = ( datarcvdtotal + datarcvd ) + data = data + data_buffer[0..datarcvd-1] + data_buffer = Bytes.new( datasize - datarcvdtotal ) + end + slice.copy_from( data ) + end +end + macro testrefuser2uid( path ) {% if flag?(:windows) %} File.exists?( {{path}} ) && ( File.read( {{ path }} ) =~ /^[0-9]+$/ ) @@ -175,10 +192,6 @@ else end end -# FIXME: might need twitch_channel_id eventually ...? -if twitchapi -end - # enable direct gcloud api? if secrets.gcloud_token gcloud = true @@ -192,14 +205,15 @@ if ! File.exists?( Path.home./("/.aws/credentials") ) # FIXME: work out where this is on Windows STDERR.puts "Warning: #{Path.home}/.aws/credentials is missing; AWS voices disabled." aws = false -elsif ! Process.find_executable( "aws.exe" ) - STDERR.puts "Warning: aws.exe is missing; AWS voices disabled." +elsif ! Process.find_executable( "aws.exe" ) && ! Process.find_executable( "aws" ) + STDERR.puts "Warning: aws CLI executable is missing; AWS voices disabled." aws = false else aws = true end # enable microsoft speech services? +# TODO: download and msiexec /i https://www.microsoft.com/en-us/download/details.aspx?id=27224 mss : Bool {% if flag?(:windows) %} mss = true @@ -234,7 +248,6 @@ bbscliipc = Channel( String ).new bbssrvipc = Channel( String ).new waitgroup = Channel( String ).new - fiberipc = Channel( Fiber ).new fibers = Hash( String, Fiber ).new @@ -365,7 +378,7 @@ def generatevoicelistaws( ) return voices end -def generatevoicelistgcs( gcloud_token ) +def generatevoicelistgcs( gcloud_token : String ) voices = Array(String).new ssl_context = OpenSSL::SSL::Context::Client.new headers = HTTP::Headers.new @@ -421,6 +434,14 @@ def regeneratevoicelist( defaultsettings : Hash( String, String ), aws : Bool, g return voices end +# TODO: add piping into mpv on POSIX +def playaudiodata( tempdir, data : Bytes ) + filepath="#{tempdir}#{Time.utc.to_unix_ms}.mp3" + File.write( filepath, data ) + playaudiofile( filepath ) + File.delete( filepath ) +end + def playaudiofile( filepath : String ) p = Process.new( "powershell.exe", @@ -643,6 +664,7 @@ spawn name: "command_dispatch" do end if text2speech if ( exec.func == "text_to_speech" ) + puts "Exec-ing text_to_speech" if ( t2sreturn = t2s( t2sipc, config, userdir, chatuser, message.params[1] ) ) lastvoice.insert( 0, t2sreturn ) lastvoice = lastvoice[0..4] @@ -1002,6 +1024,40 @@ end fiber = fiberipc.receive fibers[fiber.name.not_nil!] = fiber +def ttsgcs( languagecode : String, voice : String, text : String, gcskey : String ) : Bytes + request = Hash( String, Hash( String, String ) ){ + "input" => { "text" => text }, + "audioConfig" => { "audioEncoding" => "MP3" }, + "voice" => { + "name" => voice, + "languageCode" => languagecode, + }, + } + ssl_context = OpenSSL::SSL::Context::Client.new + #ssl_context.verify_mode = OpenSSL::SSL::VerifyMode::NONE + + headers = HTTP::Headers.new + headers["Content-Type"] = "application/json; charset=utf-8" + + response = HTTP::Client.exec( "POST", "https://texttospeech.googleapis.com/v1/text:synthesize?key=#{gcskey}", headers, request.to_json, tls: ssl_context ) + + json=JSON.parse(response.body) + return Base64.decode( json["audioContent"].as_s ) +end + +def ttsaws( filepath : String, voice : String, text : String ) + p = Process.new( + "aws", [ + "polly", "synthesize-speech", + "--output-format", "mp3", + "--voice-id", voice, + "--text", text, + filepath + ], output: STDOUT, error: STDERR + ) + p.wait +end + # Put tts stuff into the same thread so each playback blocks the next spawn name: "text2speech" do @@ -1030,46 +1086,30 @@ spawn name: "text2speech" do p.input.puts text p.input.close p.wait - elsif gcloud && ( match = voice.match( /^([a-zA-Z]{2,3}-[a-zA-Z]{2})/ ) ) # Google cloud voice - request = Hash( String, Hash( String, String ) ){ - "input" => { "text" => text }, - "audioConfig" => { "audioEncoding" => "MP3" }, - "voice" => { - "name" => voice, - "languageCode" => match[1], - }, - } - ssl_context = OpenSSL::SSL::Context::Client.new - #ssl_context.verify_mode = OpenSSL::SSL::VerifyMode::NONE - - headers = HTTP::Headers.new - headers["Content-Type"] = "application/json; charset=utf-8" - - response = HTTP::Client.exec( "POST", "https://texttospeech.googleapis.com/v1/text:synthesize?key=#{secrets.gcloud_token}", headers, request.to_json, tls: ssl_context ) - - response.body - - filepath="#{config.tempdir}#{Time.utc.to_unix_ms}.mp3" - json=JSON.parse(response.body) - File.write( filepath, Base64.decode_string( json["audioContent"].as_s ) ) - playaudiofile( filepath ) - File.delete( filepath ) - elsif aws # AWS polly voices - filepath="#{config.tempdir}#{Time.utc.to_unix_ms}.mp3" - p = Process.new( - "aws.exe", [ - "polly", "synthesize-speech", - "--output-format", "mp3", - "--voice-id", voice, - "--text", text, - filepath - ], output: STDOUT, error: STDERR - ) - p.wait - playaudiofile( filepath ) - File.delete( filepath ) - else # unknown - STDERR.puts "Voice not recognized or available." + elsif ( match = voice.match( /^([a-zA-Z]{2,3}-[a-zA-Z]{2})/ ) ) + if ( gcloud_token = secrets.gcloud_token ).is_a?( String ) # Google cloud voice + mp3data = ttsgcs( match[1], voice, text, gcloud_token ) + playaudiodata( config.tempdir, mp3data ) + elsif fibers["BungmoBott::Socket client"]? + bbscliipc.send( "gcst2s #{voice} #{text}" ) + # The rest of this is dealt with in the BungmoBott::Socket client + else + STDERR.puts( "ERROR: google cloud voice requested, but no gcloud_token or BungmoBott::Socket client is available" ) + end + else + if aws # AWS polly voices + filepath="#{config.tempdir}#{Time.utc.to_unix_ms}.mp3" + ttsaws( filepath, voice, text ) + playaudiofile( filepath ) + File.delete( filepath ) + elsif fibers["BungmoBott::Socket client"]? + bbscliipc.send( "awst2s #{voice} #{text}" ) + # The rest of this is dealt with in the BungmoBott::Socket client + else + STDERR.puts( "ERROR: aws polly voice requested, but no aws CLI executable or BungmoBott::Socket client is available" ) + end + #else # unknown + # STDERR.puts "Voice not recognized or available." end end rescue ex @@ -1248,14 +1288,23 @@ if config.bungmobott_connect spawn do while message = ssl_socket.gets puts "BungmoBott::Socket cli recv: " + message.gsub( bungmobott_key, "CENSORED" ) - case message - when /^error/ + if message =~ /^error/i raise Exception.new("BungmoBott::Socket Error: #{message}") - when /^authed/ + elsif message =~ /^authed/ negotiated = true ssl_socket.puts( "say twitch #{user} test" ) - when /^msg twitch/ + elsif message =~ /^msg twitch/ commandircipc.send( { "twitch", FastIRC.parse_line( message.split(" ")[2..].join(" ") ) } ) + elsif ( match = message.match( /^awst2s ([0-9]+)/ ) ) + datasize = match[1].to_u32 + audiodata = Bytes.new( datasize ) + ssl_socket.fill_read( audiodata ) + playaudiodata( config.tempdir, audiodata ) + elsif ( match = message.match( /^gcst2s ([0-9]+)/ ) ) + datasize = match[1].to_u32 + audiodata = Bytes.new( datasize ) + ssl_socket.fill_read( audiodata ) + playaudiodata( config.tempdir, audiodata ) end end end @@ -1264,6 +1313,7 @@ if config.bungmobott_connect # ssl_socket gets redefined in the event of I/O errors, so we deal with it here. #if ( climsg = input.match( /^irc +twitch +JOIN \#+([a-zA-Z0-9_]+) *$/ ) ) #end + puts( "bbscli tx: " + input ) ssl_socket.puts( input ) end end @@ -1327,14 +1377,38 @@ if config.bungmobott_listen channelsubs[ { ircservice, "#" + ircchannel } ].push( client ) pp channelsubs end - elsif ( match = message.match( /^awst2s (#{regexvoice}) (.+)/i ) ) - # FIXME # powershell doesn't take bytestreams; maybe use http file hosting for this? - elsif ( match = message.match( /^gcst2s (#{regexvoice}) (.+)/i ) ) - # FIXME - elsif ( match = message.match( /^awsvoicelist$/i ) ) - client.puts "awsvoicelist " + generatevoicelistaws().join(" ") - elsif ( match = message.match( /^gcsvoicelist$/i ) ) - client.puts "gcsvoicelist " + generatevoicelistgcs( secrets.gcloud_token ).join(" ") + elsif ( message =~ /^aws/i) + if aws + if ( match = message.match( /^awsvoicelist$/i ) ) + client.puts "awsvoicelist " + generatevoicelistaws().join(" ") + elsif ( match = message.match( /^awst2s ([a-zA-Z-]+) (.+)/i ) ) + filepath="#{config.tempdir}#{Time.utc.to_unix_ms}.mp3" + ttsaws( filepath, match[1], match[2] ) + mp3datasize = File.size( filepath ) + mp3data = Bytes.new( mp3datasize ) + content = File.open( filepath ) do |file| + file.read( mp3data ) + end + client.puts "awst2s #{mp3data.size}" + STDOUT.puts "SENT: awst2s #{mp3data.size}" + client.unbuffered_write( mp3data ) # Normal writes create TLS records of 4608 bytes. Unbuffered_writes create maximum 16384 byte records that need to be reassembled on the read end. + STDOUT.puts "SENT: mp3data" + end + end + elsif ( message =~ /^gcs/i) + if ( gcloud_token = secrets.gcloud_token ).is_a?( String ) + if ( match = message.match( /^gcsvoicelist$/i ) ) + client.puts "gcsvoicelist " + generatevoicelistgcs( ( gcloud_token ) ).join(" ") + elsif ( match = message.match( /^gcst2s (([a-zA-Z]{2,3}-[a-zA-Z]{2})[a-zA-Z-]+) (.+)/i ) ) + mp3data = ttsgcs( match[2], match[1], match[3], gcloud_token ) + client.puts "gcst2s #{mp3data.size}" + STDOUT.puts "SENT: gcst2s #{mp3data.size}" + client.unbuffered_write( mp3data ) + STDOUT.puts "SENT: mp3data" + end + else + client.puts "ERROR: gcloud_token missing, gcs commands disabled." + end elsif ( match = message.match( /^say twitch (.+)/i ) ) say( twitchircipc, connections[client]["user"], match[1] ) elsif ( message =~ /testchannelsubs/ ) -- cgit v1.2.3