Translucent Text-to-Speech Example

In [1]:
%env GOOGLE_APPLICATION_CREDENTIALS=/Users/patrykattc/projects/git/public/machine-learning/audio/tcinc-dev-6ad066d43323.json
env: GOOGLE_APPLICATION_CREDENTIALS=/Users/patrykattc/projects/git/public/machine-learning/audio/tcinc-dev-6ad066d43323.json
In [2]:
from bs4 import BeautifulSoup
import urllib.request, urllib.error, urllib.parse
from google.cloud import texttospeech
from google.cloud import translate_v2
import html
import six
In [3]:
# Code from google git
def synthesize_text(text):
    """Synthesizes speech from the input string of text."""    
    client = texttospeech.TextToSpeechClient()

    input_text = texttospeech.types.SynthesisInput(text=text)

    # Note: the voice can also be specified by name.
    # Names of voices can be retrieved with client.list_voices().
    voice = texttospeech.types.VoiceSelectionParams(
        language_code='en-US',
        name='en-US-Standard-C',
        ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE)

    audio_config = texttospeech.types.AudioConfig(
        audio_encoding=texttospeech.enums.AudioEncoding.MP3)

    response = client.synthesize_speech(input_text, voice, audio_config)

    # The response's audio_content is binary.
    with open('output.mp3', 'wb') as out:
        out.write(response.audio_content)
        print('Audio content written to file "output.mp3"')
        
def text_to_ssml(text):
    escaped_lines = html.escape(text)

    # Convert plaintext to SSML
    # Wait two seconds between each address
    ssml = '<speak>{}</speak>'.format(
        escaped_lines.replace('\n', '\n<break time="2s"/>'))

    # Return the concatenated string of ssml script
    return ssml    

def synthesize_ssml(ssml,code,name):
    """Synthesizes speech from the input string of ssml.
    Note: ssml must be well-formed according to:
        https://www.w3.org/TR/speech-synthesis/
    Example: <speak>Hello there.</speak>
    """
    from google.cloud import texttospeech
    client = texttospeech.TextToSpeechClient()

    input_text = texttospeech.types.SynthesisInput(ssml=ssml)

    # Note: the voice can also be specified by name.
    # Names of voices can be retrieved with client.list_voices().
    voice = texttospeech.types.VoiceSelectionParams(
        language_code=code,
        name=name,
        ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE)

    audio_config = texttospeech.types.AudioConfig(
        audio_encoding=texttospeech.enums.AudioEncoding.MP3)

    response = client.synthesize_speech(input_text, voice, audio_config)

    # The response's audio_content is binary.
    with open('output.mp3', 'wb') as out:
        out.write(response.audio_content)
        print('Audio content written to file "output.mp3"')
        
def list_voices():
    """Lists the available voices."""
    from google.cloud import texttospeech
    from google.cloud.texttospeech import enums
    client = texttospeech.TextToSpeechClient()

    # Performs the list voices request
    voices = client.list_voices()

    for voice in voices.voices:
        # Display the voice's name. Example: tpc-vocoded
        print('Name: {}'.format(voice.name))

        # Display the supported language codes for this voice. Example: "en-US"
        for language_code in voice.language_codes:
            print('Supported language: {}'.format(language_code))

        ssml_gender = enums.SsmlVoiceGender(voice.ssml_gender)

        # Display the SSML Voice Gender
        print('SSML Voice Gender: {}'.format(ssml_gender.name))

        # Display the natural sample rate hertz for this voice. Example: 24000
        print('Natural Sample Rate Hertz: {}\n'.format(
            voice.natural_sample_rate_hertz))        
In [4]:
# Get the webpage content
url = 'https://www.translucentcomputing.com/2020/01/strategies-for-managing-kubernetes-cloud-cost-part-1/'
response = urllib.request.urlopen(url)
webContent = response.read()
soup = BeautifulSoup(webContent, 'html.parser')
In [5]:
# find the text we need from the webpage
divs = soup.find_all('div', attrs={'class':'row'})
tag = divs[2]
In [6]:
# Create SSML text file
ssml_text = text_to_ssml(tag.text)
f= open("kube_cloud_cost_part_1_blog.txt","w+")
f.write(ssml_text)
f.close() 
In [7]:
# Convert the SSML text file to audio file
with open("kube_cloud_cost_part_1_blog.txt", 'r') as ssml_file:
    ssml_text = ssml_file.read()  
    synthesize_ssml(ssml_text,'en-US','en-US-Wavenet-C')
Audio content written to file "output.mp3"