Speech Tiles Developer Area, BZVoiceFlow Developer Guide

{
    "$schema": "https://json-schema.org/draft/2020-12/schema",
    "$id": "https://speechtiles.com/draft/schemas/AudioPromptModuleList.schema.json",
    "copyrightNotice": "© Speech Tiles LLC. 2024, All rights reserved.",
    "title": "Audio Prompt Modules",
    "description": "Version 1.1; last updated 02/23/2024. This document contains schema for Audio Prompt Modules used for audio playback of multiple Audio Segments or for audio playback of other referenced multiple Audio Prompt Modules. Unless documented otherwise, value of properties that are of type \"string\" can be updated at runtime by an application if their value is entered as a field name wrapped by \"$[\" and \"]\". If a property is of a type other than \"string\", then an additional property of type \"string\" with the same property name appended with the string \"Runtime\" is added to have its value entered as a field name wrapped by \"$[\" and \"]\". This allows the application to set the value of the field name at runtime to be the value of the original non-runtime property. Valid values of runtime fields override the values of non-runtime properties.",
    
    "type": "array",
    "items": {
        "description": "Array of Audio Prompt Modules",
        "type": "object",
        "properties": {
            "_COMMENT_": {
                "description": "Placeholder for entering a comment. Voice Flow processing ignores this property.",
                "type": "string"
            },
            "id": {
                "description": "Unique ID of an Audio Prompt Module. The value cannot be set at runtime.",
                "type": "string"
            },
            "style": {
                "description": "Style of selection of audio segments or Audio Prompt Modules. Values: \"single\", \"select\", \"combo\", or \"serial\". If Audio Prompt Module references a collection of other Audio Prompt Module IDs then Default: \"combo\". The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                "type": "string"
            },
            "_audioFile": {
                "description": "Placeholder for documenting or noting other audioFile options. Voice Flow processing ignores this property.",
                "type": "string"
            },
            "audioFile": {
                "description": "The name of the audio file containing the recorded Audio Segment for audio playback. The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                "type": "string"
            },
            "_textString": {
                "description": "Placeholder for documenting or noting other textString options. Voice Flow processing ignores this property.",
                "type": "string"
            },
            "textString": {
                "description": "The text string to be synthesized for audio playback. The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                "type": "string"
            },
            "_textFile": {
                "description": "Placeholder for documenting or noting other textFile options. Voice Flow processing ignores this property.",
                "type": "string"
            },
            "textFile": {
                "description": "The name of the text file containing the text to be synthesized for audio playback. The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                "type": "string"
            },
            "textIsSSML": {
                "description": "Specifies whether the text to be synthesized contains Speech Synthesis Markup Language \"SSML\". Values: \"true\" or \"false\". Default: \"false\". The value can be set at runtime using the \"textIsSSMLRuntime\" property.",
                "type": "boolean"
            },
            "textIsSSMLRuntime": {
                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose boolean value can be set at runtime by an application. If the value of the field is a valid boolean, then it will replace the value entered in the \"textIsSSML\" property. Example: \"$[TextSSML]\".",
                "type": "string"
            },
            "ssEngine": {
                "description": "The Text To Speech or Speech Synthesizer engine to be used for Speech Synthesis. Values: \"apple\" or \"flite\". If both \"apple\" and \"flite\" are active then, if not specified, Default: \"flite\". The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                "type": "string"
            },
            "languageCode": {
                "description": "The language locale folder name set up to contain the recorded audio files. Default: \"en-US\". The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                "type": "string"
            },
            "textLanguageCode": {
                "description": "The language locale of the text to be speech synthesised. Default: \"en-US\". The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                "type": "string"
            },
            "promptCollection": {
                "description": "Specifies a reference to an array of other Audio Prompt Module IDs referenced from this Audio Prompt Module.",
                "type": "array",
                "items": {
                    "description": "Array of Audio Prompt Module IDs.",
                    "type": "object",
                    "properties": {
                        "_COMMENT_": {
                            "description": "Placeholder for entering a comment. Voice Flow processing ignores this property.",
                            "type": "string"
                        },
                        "_promptID": {
                            "description": "Placeholder for documenting or noting other promptID options. Voice Flow processing ignores this property.",
                            "type": "string"
                        },
                        "promptID": {
                            "description": "ID of another referenced Audio Prompt Module. The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                            "type": "string"
                        },
                    },
                    "required": ["promptID"],
                    "additionalProperties": false
                }
            },
            "audioPlaybackParams": {
                "description": "Specifies audio playback parameters of the Audio Segment referenced in an Audio Prompt Module.",
                "type": "object",
                "properties": {
                    "_COMMENT_": {
                        "description": "Placeholder for entering a comment. Voice Flow processing ignores this property.",
                        "type": "string"
                    },
                    "loopPlay": {
                        "description": "Specifies whether to repeat audio playback of Audio Segment after audio playback ends.  The value can be set at runtime using the \"loopPlayRuntime\" property. Default: \"false\".",
                        "type": "boolean"
                    },
                    "loopPlayRuntime": {
                        "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose boolean value can be set at runtime by an application. If the value of the field is a valid boolean, then it will replace the value entered in the \"loopPlay\" property. Example: \"$[LoopPlay]\".",
                        "type": "string"
                    },
                    "maxPlayCount": {
                        "description": "Automatic maximum count of audio playbacks of Audio Segment if \"loopPlay\" property is \"true\". The value can be set at runtime using the \"maxPlayCountRuntime\" property. Default: 1.",
                        "type": "integer"
                    },
                    "maxPlayCountRuntime": {
                        "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"maxPlayCount\" property. Example: \"$[MaxPlayCount]\".",
                        "type": "string"
                    },
                    "audioFormat": {
                        "description": "Audio format of recorded audio in audio file. May be required if audio file does not contain header that specifies audio format of audio data. Values: \"pcm_8khz\", \"pcm_16khz\", \"pcm_32khz\", \"pcm_48khz\", \"wav\", \"compressed\" (experimental), \"preencoded\" (experimental), \"media\", or \"raw\". Default: \"pcm_16khz\". Default: \"pcm_16khz\" if audio format is not automatically detected. The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                        "type": "string"
                    },
                    "volumeScale": {
                        "description": "Specifies a volume scale to apply to audio playback. The value can be set at runtime using the \"volumeScaleRuntime\" property. Values: minimum value > 0.0. Default: 1.0.",
                        "type": "number"
                    },
                    "volumeScaleRuntime": {
                        "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose float value can be set at runtime by an application. If the value of the field is a valid float, then it will replace the value entered in the \"volumeScale\" property. Example: \"$[VolumeScale]\".",
                        "type": "string"
                    },
                    "startPosMs": {
                        "description": "Time position in milliseconds for when audio playback to start for the Audio Segment. The value can be set at runtime using the \"startPosMsRuntime\" property. Default: 0.",
                        "type": "integer"
                    },
                    "startPosMsRuntime": {
                        "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"startPosMs\" property. Example: \"$[StartPosMs]\".",
                        "type": "string"
                    },
                    "stopPosMs": {
                        "description": "Time position in milliseconds for when audio playback to stop for the Audio Segment. The value can be set at runtime using the \"stopPosMsRuntime\" property. Default: 0 for audio playback to continue to the end of the audio.",
                        "type": "integer"
                    },
                    "stopPosMsRuntime": {
                        "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"stopPosMs\" property. Example: \"$[StopPosMs]\".",
                        "type": "string"
                    },
                    "notifyPosMs": {
                        "description": "Time position in milliseconds for when audio playback is reached, a notification is sent to Client. The value can be set at runtime using the \"notifyPosMsRuntime\" property.",
                        "type": "integer"
                    },
                    "notifyPosMsRuntime": {
                        "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"notifyPosMs\" property. Example: \"$[NotifyPosMs]\".",
                        "type": "string"
                    },
                    "audioCodec": {
                        "description": "(Experimental) The audio codec parameters to use for audio playback if one is not specified in \"audioFormat\" parameter.",
                        "type": "object",
                        "properties": {
                            "_COMMENT_": {
                                "description": "Placeholder for entering a comment. Voice Flow processing ignores this property.",
                                "type": "string"
                            },
                            "payloadType": {
                                "description": "Type of payload. Examples: 0 for PCMU; 8 for PCMA; 9 for G&22; 120 foe OPUS, etc. The value can be set at runtime using the \"payloadTypeRuntime\" property.",
                                "type": "integer"
                            },
                            "payloadTypeRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"payloadType\" property. Example: \"$[PayloadType]\".",
                                "type": "string"
                            },
                            "payloadName": {
                                "description": "Name of audio codec payload. Examples: \"PCMU\", \"PCMA\", \"G722\", \"OPUS\". etc. The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                                "type": "string"
                            },
                            "payloadFrequency": {
                                "description": "Sampling rate: number of samples per second. Examples: 8000, 16000, 320000, 48000, etc. The value can be set at runtime using the \"payloadFrequencyRuntime\" property.",
                                "type": "integer"
                            },
                            "payloadFrequencyRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"payloadFrequency\" property. Example: \"$[PayloadFrequency]\".",
                                "type": "string"
                            },
                            "audioPacketSizeBytes": {
                                "description": "Size of an audio frame packet. Example: sampling rate divided by 100 for a 10ms frame and multiplied by number of bytes per sample. The value can be set at runtime using the \"audioPacketSizeBytesRuntime\" property.",
                                "type": "integer"
                            },
                            "audioPacketSizeBytesRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"audioPacketSizeBytes\" property. Example: \"$[AudioPacketSizeBytes]\".",
                                "type": "string"
                            },
                            "numberChannelsPerAudioSample": {
                                "description": "Number of channels. Values: 1 for mono or 2 for stereo. The value can be set at runtime using the \"numberChannelsPerAudioSampleRuntime\" property.",
                                "type": "integer"
                            },
                            "numberChannelsPerAudioSampleRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"numberChannelsPerAudioSample\" property. Example: \"$[NumberChannelsPerAudioSample]\".",
                                "type": "string"
                            },
                            "sampleRateBitsPerSecond": {
                                "description": "Bit rate per second. Example: sampling rate multiplied by number of bytes per sample multiplied by number of bits per byte. The value can be set at runtime using the \"sampleRateBitsPerSecondRuntime\" property.",
                                "type": "integer"
                            },
                            "sampleRateBitsPerSecondRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"sampleRateBitsPerSecond\" property. Example: \"$[SampleRateBitsPerSecond]\".",
                                "type": "string"
                            },
                        },
                        "required": ["payloadType", "payloadName", "payloadFrequency", "audioPacketSizeBytes", "numberChannelsPerAudioSample", "sampleRateBitsPerSecond"],
                        "additionalProperties": false
                    },
                    "ssEngineParams": {
                        "description": "Specifies the Speech Synthesizer parameters.",
                        "type": "object",
                        "properties": {
                            "_COMMENT_": {
                                "description": "Placeholder for entering a comment. Voice Flow processing ignores this property.",
                                "type": "string"
                            },
                            "ssVoiceParams": {
                                "description": "Specifies the parameters of the voice used for Speech Synthesis.",
                                "type": "object",
                                "properties": {
                                    "_COMMENT_": {
                                        "description": "Placeholder for entering a comment. Voice Flow processing ignores this property.",
                                        "type": "string"
                                    },
                                    "id": {
                                        "description": "ID of the voice, if known. Apple Example: \"com.apple.ttsbundle.Samantha-compact\". Flite example: \"slt-hts\". The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                                        "type": "string"
                                    },
                                    "name": {
                                        "description": "Name of the voice, if known. Apple Example: \"Samantha\". Flite example: \"slt-hts\". The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                                        "type": "string"
                                    },
                                    "gender": {
                                        "description": "Gender of the voice. Values: \"male\" or \"female\". The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                                        "type": "string"
                                    },
                                    "quality": {
                                        "description": "Voice quality. Values: \"compact\", \"enhanced\", \"premium\" or an empty string. The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                                        "type": "string"
                                    },
                                    "languageCode": {
                                        "description": "Language locale of the voice. Example: \"en-US\". The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                                        "type": "string"
                                    },
                                },
                                "required": [],
                                "additionalProperties": false
                            },
                            "volume": {
                                "description": "Specifies volume of synthesized audio. Values: minimum value > 0.0. Default: 1.0. The value can be set at runtime using the \"volumeRuntime\" property.",
                                "type": "number"
                            },
                            "volumeRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose float value can be set at runtime by an application. If the value of the field is a valid float, then it will replace the value entered in the \"volume\" property. Example: \"$[Volume]\".",
                                "type": "string"
                            },
                            "inStreamMode": {
                                "description": "(Flite only) Specifies whether to stream synthesized audio blocks for audio playback in small packets to simulate real-time audio streaming. If false, audio blocks generated by a speech synthesizer are sent as whole to audio playback. Default: \"false\". The value can be set at runtime using the \"inStreamModeRuntime\" property.",
                                "type": "boolean"
                            },
                            "inStreamModeRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose boolean value can be set at runtime by an application. If the value of the field is a valid boolean, then it will replace the value entered in the \"inStreamMode\" property. Example: \"$[InStreamMode]\".",
                                "type": "string"
                            },
                            "startPlayFileTextPosBytes": {
                                "description": "(Flite only) Specifies the position in text in bytes where to start speech synthesis for audio playback. Default: 0. The value can be set at runtime using the \"startPlayFileTextPosBytesRuntime\" property.",
                                "type": "integer"
                            },
                            "startPlayFileTextPosBytesRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"startPlayFileTextPosBytes\" property. Example: \"$[StartPlayFileTextPosBytes]\".",
                                "type": "string"
                            },
                            "durationStretch": {
                                "description": "(Flite only) Specifies duration stretch for synthesized audio. Default: 1.0. The value can be set at runtime using the \"durationStretchRuntime\" property.",
                                "type": "number"
                            },
                            "durationStretchRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose float value can be set at runtime by an application. If the value of the field is a valid float, then it will replace the value entered in the \"durationStretch\" property. Example: \"$[DurationStretch]\".",
                                "type": "string"
                            },
                            "targetMean": {
                                "description": "(Flite only) Specifies target mean for synthesized audio. Default: 1.0. The value can be set at runtime using the \"targetMeanRuntime\" property.",
                                "type": "number"
                            },
                            "targetMeanRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose float value can be set at runtime by an application. If the value of the field is a valid float, then it will replace the value entered in the \"targetMean\" property. Example: \"$[TargetMean]\".",
                                "type": "string"
                            },
                            "targetStandardDeviation": {
                                "description": "(Flite only) Specifies target standard deviation for synthesized audio. Default: 1.0. The value can be set at runtime using the \"targetStandardDeviationRuntime\" property.",
                                "type": "number"
                            },
                            "targetStandardDeviationRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose float value can be set at runtime by an application. If the value of the field is a valid float, then it will replace the value entered in the \"targetStandardDeviation\" property. Example: \"$[TargetStandardDeviation]\".",
                                "type": "string"
                            },
                            "pitchMultiplier": {
                                "description": "(Apple only) Specifies pitch multiplier for synthesized audio. Default: 1.0. The value can be set at runtime using the \"pitchMultiplierRuntime\" property.",
                                "type": "number"
                            },
                            "pitchMultiplierRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose float value can be set at runtime by an application. If the value of the field is a valid float, then it will replace the value entered in the \"pitchMultiplier\" property. Example: \"$[PitchMultiplier]\".",
                                "type": "string"
                            },
                            "preUtteranceDelaySec": {
                                "description": "(Apple only) Specifies pre utterance delay in seconds to be inserted at start of synthesized audio. Default: 0. The value can be set at runtime using the \"preUtteranceDelaySecRuntime\" property.",
                                "type": "integer"
                            },
                            "preUtteranceDelaySecRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"preUtteranceDelaySec\" property. Example: \"$[PreUtteranceDelaySec]\".",
                                "type": "string"
                            },
                            "postUtteranceDelaySec": {
                                "description": "(Apple only) Specifies post utterance delay in seconds to be inserted at end of synthesized audio. Default: 0. The value can be set at runtime using the \"postUtteranceDelaySecRuntime\" property.",
                                "type": "integer"
                            },
                            "postUtteranceDelaySecRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose integer value can be set at runtime by an application. If the value of the field is a valid integer, then it will replace the value entered in the \"postUtteranceDelaySec\" property. Example: \"$[PostUtteranceDelaySec]\".",
                                "type": "string"
                            },
                            "rate": {
                                "description": "(Apple only) Specifies the speaking rate of synthesized audio. Default: 0.5. The value can be set at runtime using the \"rateRuntime\" property.",
                                "type": "number"
                            },
                            "rateRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose float value can be set at runtime by an application. If the value of the field is a valid float, then it will replace the value entered in the \"rate\" property. Example: \"$[Rate]\".",
                                "type": "string"
                            },
                        },
                        "required": [],
                        "additionalProperties": false
                    },
                    "recordSSParams": {
                        "description": "The parameters to control saving speech synthesis of an audio segment into a file",
                        "type": "object",
                        "properties": {
                            "_COMMENT_": {
                                "description": "Placeholder for entering a comment. Voice Flow processing ignores this property.",
                                "type": "string"
                            },
                            "enable": {
                                "description": "Enable or disable saving speech synthesis into a file. The value can be set at runtime using the \"enableRuntime\" property.",
                                "type": "boolean"
                            },
                            "enableRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose boolean value can be set at runtime by an application. If the value of the field is a valid boolean, then it will replace the value entered in the \"enable\" property. Example: \"$[Enable]\".",
                                "type": "string"
                            },
                            "offlineMode": {
                                "description": "If true, then audio playback will not occur on a device. Only speech synthesis is saved into a file. The value can be set at runtime using the \"offlineModeRuntime\" property.",
                                "type": "boolean"
                            },
                            "offlineModeRuntime": {
                                "description": "Contains the name of a field, as a string wrapped between \"$[\" and \"]\", whose boolean value can be set at runtime by an application. If the value of the field is a valid boolean, then it will replace the value entered in the \"offlineMode\" property. Example: \"$[OfflineMode]\".",
                                "type": "string"
                            },
                            "filetype": {
                                "description": "Type of audio file to contain the synthesized audip. options are \"wav\" and \"caf\".  The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                                "type": "string"
                            },
                            "fileBaseName": {
                                "description": "File base name. \".wav\" or \".caf\" is appended. The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                                "type": "string"
                            },
                            "fileURLPath": {
                                "description": "Folder path where the synthesized audio file is saved. The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                                "type": "string"
                            },
                            "languageCode": {
                                "description": "if specified that \"languageCode\" will be appended ot the \"fileURLPath\". The value can be set at runtime using \"$[<FieldName>]\" entry convention.",
                                "type": "string"
                            },
                        },
                        "required": ["enable"],
                        "additionalProperties": false
                    },
                },
                "required": [],
                "additionalProperties": false
            },
        },
        "required": ["id"],
        "additionalProperties": false
    }
}

Speech Tiles Developers

Conversational Speech Frameworks

Speech Tiles
Developers

Conversational Speech Frameworks

Conversational Speech Frameworks

BZVoiceFlow Framework Reference
1.2.0
Audio Prompt Module List JSON Schema

BZVoiceFlow Framework Reference 1.2.0 Audio Prompt Module List JSON Schema

BZVoiceFlow Framework Reference
1.2.0
Audio Prompt Module List JSON Schema