// adattato da https://github.com/Azure-Samples/AzureSpeechReactSample

import * as speechsdk from 'microsoft-cognitiveservices-speech-sdk'
import axios from 'axios'
import Cookie from 'universal-cookie'


const azureSpeechKey = "444eb1042cf7422c8d66f169b2ff12bc"
const azureSpeechRegion = "westeurope"


const retrieveToken = async () => {
    const key = azureSpeechKey
    const region = azureSpeechRegion

    const headers = {
        headers: {
            'Ocp-Apim-Subscription-Key': key,
            'Content-Type': 'application/x-www-form-urlencoded'
        }
    }
    try {
        const tokenResponse = await axios.post(`https://${region}.api.cognitive.microsoft.com/sts/v1.0/issueToken`, null, headers)
        console.log(tokenResponse)
        return {token: tokenResponse.data, region: region}
    } catch (err) {
        console.error(err)
        return null
    }
}

const getTokenOrRefresh = async () => {
    const tokenName = 'azure-speech-token'
    const cookie = new Cookie();
    const speechToken = cookie.get(tokenName);

    if (speechToken === undefined) {
        const res = await retrieveToken()
        if (res === null) {
            return null
        }
        const token = res.token
        const region = res.region
        cookie.set(tokenName, region + ':' + token, {maxAge: 540, path: '/'})

        console.log('Token fetched from Azure: ' + token)
        return { authToken: token, region: region }
    } else {
        console.log('Token fetched from cookie: ' + speechToken)
        const idx = speechToken.indexOf(':')
        return { authToken: speechToken.slice(idx + 1), region: speechToken.slice(0, idx) }
    }
}

export type RecognitionEvent = {
    type: 'starting',
} | {
    type: 'started',
} | {
    type: 'ended',
    result: string | null,
}

export const sttFromMic = async (azureLanguage: string, callback: (event: RecognitionEvent) => void): Promise<boolean> => {
    const tokenObj = await getTokenOrRefresh()
    if (tokenObj) {
        const speechConfig = speechsdk.SpeechConfig.fromAuthorizationToken(tokenObj.authToken, tokenObj.region)
        speechConfig.speechRecognitionLanguage = azureLanguage
        
        const audioConfig = speechsdk.AudioConfig.fromDefaultMicrophoneInput()
        const recognizer = new speechsdk.SpeechRecognizer(speechConfig, audioConfig)

        recognizer.sessionStarted = () => {
            callback({type: 'started'})
        }

        callback({type: 'starting'})
        recognizer.recognizeOnceAsync(result => {
            if (result.reason === speechsdk.ResultReason.RecognizedSpeech) {
                callback({type: 'ended', result: result.text})
            } else {
                callback({type: 'ended', result: null})
            }
        }, err => {
            console.error(err)
            callback({type: 'ended', result: null})
        })
        return true
    } else {
        return false
    }
}

let ttsPlayer: speechsdk.SpeakerAudioDestination | null = null;
let playAudioTimeout: undefined | ReturnType<typeof setTimeout> = undefined;
export const tts = async (
    text: string, azureLanguage: string, azureVoice: string,
    audioStartCallback: (audioPlayer: speechsdk.SpeakerAudioDestination) => void,
    audioEndCallback: (audioPlayer: speechsdk.SpeakerAudioDestination) => void
): Promise<boolean> => {
    const tokenObj = await getTokenOrRefresh()
    if (tokenObj) {
        if (ttsPlayer !== null) {
            ttsPlayer.pause()
            ttsPlayer.close()
        }
        // per avere eventi inizio e fine audio, ma azure fa schifo e non si riesce a impostare il silenzio alla fine, quindi fatto con timeout
        const newTtsPlayer = new speechsdk.SpeakerAudioDestination()
        //newTtsPlayer.onAudioStart = () => audioStartCallback(newTtsPlayer)
        //newTtsPlayer.onAudioEnd = () => audioEndCallback(newTtsPlayer)
        ttsPlayer = newTtsPlayer

        const speechConfig = speechsdk.SpeechConfig.fromAuthorizationToken(tokenObj.authToken, tokenObj.region)
        speechConfig.speechSynthesisLanguage = azureLanguage
        speechConfig.speechSynthesisVoiceName = azureVoice
        // per togliere il silenzio alla fine dell'audio, ma non funziona (e non va neanche da ssml)
        //speechConfig.setProperty(speechsdk.PropertyId.SpeechServiceConnection_EndSilenceTimeoutMs, "0")

        const audioConfig = speechsdk.AudioConfig.fromSpeakerOutput(ttsPlayer)

        const synth = new speechsdk.SpeechSynthesizer(speechConfig, audioConfig)

        synth.speakTextAsync(
            text,
            result => {
                if (result) {
                    const durationMs = result.audioDuration / 10000
                    console.log("audio duration: " + durationMs + " ms")

                    audioStartCallback(newTtsPlayer)
                    clearTimeout(playAudioTimeout)
                    playAudioTimeout = setTimeout(() => audioEndCallback(newTtsPlayer), Math.max(1, durationMs - 675))
                }
                synth.close()
            },
            error => {
                console.error(error)
                synth.close()
            }
        )

        return true;
    } else {
        return false;
    }
}