// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "base/memory/raw_ptr.h"

#import "content/browser/speech/tts_mac.h"

#include <string>

#include "base/bind.h"
#include "base/strings/sys_string_conversions.h"
#include "base/values.h"
#include "content/public/browser/tts_controller.h"

namespace {

std::vector<content::VoiceData>& VoicesRef() {
  static base::NoDestructor<std::vector<content::VoiceData>> voices([]() {
    [[NSNotificationCenter defaultCenter]
        addObserverForName:NSApplicationWillBecomeActiveNotification
                    object:nil
                     queue:nil
                usingBlock:^(NSNotification* notification) {
                  // The user might have switched to Settings or some other app
                  // to change voices or locale settings. Avoid a stale cache by
                  // forcing a rebuild of the voices vector after the app
                  // becomes active.
                  VoicesRef().clear();
                }];
    return std::vector<content::VoiceData>();
  }());

  return *voices;
}

std::vector<content::VoiceData>& Voices() {
  std::vector<content::VoiceData>& voices = VoicesRef();

  if (!voices.empty())
    return voices;

  base::scoped_nsobject<NSMutableArray> voiceIdentifiers(
      [NSSpeechSynthesizer.availableVoices mutableCopy]);

  NSString* defaultVoice = NSSpeechSynthesizer.defaultVoice;
  if (defaultVoice) {
    [voiceIdentifiers removeObject:defaultVoice];
    [voiceIdentifiers insertObject:defaultVoice atIndex:0];
  }

  voices.reserve([voiceIdentifiers count]);

  for (NSString* voiceIdentifier in voiceIdentifiers.get()) {
    voices.push_back(content::VoiceData());
    content::VoiceData& data = voices.back();

    NSDictionary* attributes =
        [NSSpeechSynthesizer attributesForVoice:voiceIdentifier];
    NSString* name = attributes[NSVoiceName];
    NSString* localeIdentifier = attributes[NSVoiceLocaleIdentifier];

    data.native = true;
    data.native_voice_identifier = base::SysNSStringToUTF8(voiceIdentifier);
    data.name = base::SysNSStringToUTF8(name);

    NSDictionary* localeComponents =
        [NSLocale componentsFromLocaleIdentifier:localeIdentifier];
    NSString* language = localeComponents[NSLocaleLanguageCode];
    NSString* country = localeComponents[NSLocaleCountryCode];
    if (language && country) {
      data.lang = base::SysNSStringToUTF8(
          [NSString stringWithFormat:@"%@-%@", language, country]);
    } else {
      data.lang = base::SysNSStringToUTF8(language);
    }
    data.events.insert(content::TTS_EVENT_START);
    data.events.insert(content::TTS_EVENT_END);
    data.events.insert(content::TTS_EVENT_WORD);
    data.events.insert(content::TTS_EVENT_ERROR);
    data.events.insert(content::TTS_EVENT_CANCELLED);
    data.events.insert(content::TTS_EVENT_INTERRUPTED);
    data.events.insert(content::TTS_EVENT_PAUSE);
    data.events.insert(content::TTS_EVENT_RESUME);
  }

  return voices;
}

}  // namespace

// static
content::TtsPlatformImpl* content::TtsPlatformImpl::GetInstance() {
  return TtsPlatformImplMac::GetInstance();
}

TtsPlatformImplMac::~TtsPlatformImplMac() = default;

bool TtsPlatformImplMac::PlatformImplSupported() {
  return true;
}

bool TtsPlatformImplMac::PlatformImplInitialized() {
  return true;
}

void TtsPlatformImplMac::Speak(
    int utterance_id,
    const std::string& utterance,
    const std::string& lang,
    const content::VoiceData& voice,
    const content::UtteranceContinuousParameters& params,
    base::OnceCallback<void(bool)> on_speak_finished) {
  // Parse SSML and process speech.
  content::TtsController::GetInstance()->StripSSML(
      utterance, base::BindOnce(&TtsPlatformImplMac::ProcessSpeech,
                                base::Unretained(this), utterance_id, lang,
                                voice, params, std::move(on_speak_finished)));
}

void TtsPlatformImplMac::ProcessSpeech(
    int utterance_id,
    const std::string& lang,
    const content::VoiceData& voice,
    const content::UtteranceContinuousParameters& params,
    base::OnceCallback<void(bool)> on_speak_finished,
    const std::string& parsed_utterance) {
  utterance_ = parsed_utterance;
  paused_ = false;

  NSString* utterance_nsstring =
      [NSString stringWithUTF8String:utterance_.c_str()];
  if (!utterance_nsstring) {
    std::move(on_speak_finished).Run(false);
    return;
  }

  // Deliberately construct a new speech synthesizer every time Speak is
  // called, otherwise there's no way to know whether calls to the delegate
  // apply to the current utterance or a previous utterance. In
  // experimentation, the overhead of constructing and destructing a
  // NSSpeechSynthesizer is minimal.
  speech_synthesizer_.reset([[SingleUseSpeechSynthesizer alloc]
      initWithUtterance:utterance_nsstring]);
  [speech_synthesizer_ setDelegate:delegate_];

  if (!voice.native_voice_identifier.empty()) {
    NSString* native_voice_identifier =
        [NSString stringWithUTF8String:voice.native_voice_identifier.c_str()];
    [speech_synthesizer_ setVoice:native_voice_identifier];
  }

  utterance_id_ = utterance_id;

  // TODO: support languages other than the default: crbug.com/88059

  if (params.rate >= 0.0) {
    // The TTS api defines rate via words per minute. Let 200 be the default.
    [speech_synthesizer_ setObject:[NSNumber numberWithInt:params.rate * 200]
                       forProperty:NSSpeechRateProperty
                             error:nil];
  }

  if (params.pitch >= 0.0) {
    // The input is a float from 0.0 to 2.0, with 1.0 being the default.
    // Get the default pitch for this voice and modulate it by 50% - 150%.
    NSError* errorCode;
    NSNumber* defaultPitchObj =
        [speech_synthesizer_ objectForProperty:NSSpeechPitchBaseProperty
                                         error:&errorCode];
    int defaultPitch = defaultPitchObj ? [defaultPitchObj intValue] : 48;
    int newPitch = static_cast<int>(defaultPitch * (0.5 * params.pitch + 0.5));
    [speech_synthesizer_ setObject:[NSNumber numberWithInt:newPitch]
                       forProperty:NSSpeechPitchBaseProperty
                             error:nil];
  }

  if (params.volume >= 0.0) {
    [speech_synthesizer_ setObject:[NSNumber numberWithFloat:params.volume]
                       forProperty:NSSpeechVolumeProperty
                             error:nil];
  }

  bool success = [speech_synthesizer_ startSpeakingRetainedUtterance];
  if (success) {
    content::TtsController* controller = content::TtsController::GetInstance();
    controller->OnTtsEvent(utterance_id_, content::TTS_EVENT_START, 0, -1, "");
  }
  std::move(on_speak_finished).Run(success);
}

bool TtsPlatformImplMac::StopSpeaking() {
  if (speech_synthesizer_.get()) {
    [speech_synthesizer_ stopSpeaking];
    speech_synthesizer_.reset(nil);
  }
  paused_ = false;
  return true;
}

void TtsPlatformImplMac::Pause() {
  if (speech_synthesizer_.get() && utterance_id_ && !paused_) {
    [speech_synthesizer_ pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
    paused_ = true;
    content::TtsController::GetInstance()->OnTtsEvent(
        utterance_id_, content::TTS_EVENT_PAUSE, last_char_index_, -1, "");
  }
}

void TtsPlatformImplMac::Resume() {
  if (speech_synthesizer_.get() && utterance_id_ && paused_) {
    [speech_synthesizer_ continueSpeaking];
    paused_ = false;
    content::TtsController::GetInstance()->OnTtsEvent(
        utterance_id_, content::TTS_EVENT_RESUME, last_char_index_, -1, "");
  }
}

bool TtsPlatformImplMac::IsSpeaking() {
  if (speech_synthesizer_)
    return [speech_synthesizer_ isSpeaking];
  return false;
}

void TtsPlatformImplMac::GetVoices(std::vector<content::VoiceData>* outVoices) {
  *outVoices = Voices();
}

void TtsPlatformImplMac::OnSpeechEvent(NSSpeechSynthesizer* sender,
                                       content::TtsEventType event_type,
                                       int char_index,
                                       int char_length,
                                       const std::string& error_message) {
  // Don't send events from an utterance that's already completed.
  // This depends on the fact that we construct a new NSSpeechSynthesizer
  // each time we call Speak.
  if (sender != speech_synthesizer_.get())
    return;

  if (event_type == content::TTS_EVENT_END)
    char_index = utterance_.size();

  content::TtsController::GetInstance()->OnTtsEvent(
      utterance_id_, event_type, char_index, char_length, error_message);
  last_char_index_ = char_index;
}

TtsPlatformImplMac::TtsPlatformImplMac() {
  delegate_.reset([[ChromeTtsDelegate alloc] initWithPlatformImplMac:this]);
}

// static
TtsPlatformImplMac* TtsPlatformImplMac::GetInstance() {
  static base::NoDestructor<TtsPlatformImplMac> tts_platform;
  return tts_platform.get();
}

// static
std::vector<content::VoiceData>& TtsPlatformImplMac::VoicesRefForTesting() {
  return VoicesRef();
}

@implementation ChromeTtsDelegate {
 @private
  raw_ptr<TtsPlatformImplMac> _ttsImplMac;  // weak.
}

- (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac {
  if ((self = [super init])) {
    _ttsImplMac = ttsImplMac;
  }
  return self;
}

- (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
        didFinishSpeaking:(BOOL)finished_speaking {
  _ttsImplMac->OnSpeechEvent(sender, content::TTS_EVENT_END, 0, -1, "");
}

- (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
            willSpeakWord:(NSRange)word_range
                 ofString:(NSString*)string {
  // Ignore bogus word_range. The Mac speech synthesizer is a bit
  // buggy and occasionally returns a number way out of range.
  if (word_range.location > [string length])
    return;

  _ttsImplMac->OnSpeechEvent(sender, content::TTS_EVENT_WORD,
                             word_range.location, word_range.length, "");
}

- (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
    didEncounterErrorAtIndex:(NSUInteger)character_index
                    ofString:(NSString*)string
                     message:(NSString*)message {
  // Ignore bogus character_index. The Mac speech synthesizer is a bit
  // buggy and occasionally returns a number way out of range.
  if (character_index > [string length])
    return;

  std::string message_utf8 = base::SysNSStringToUTF8(message);
  _ttsImplMac->OnSpeechEvent(sender, content::TTS_EVENT_ERROR, character_index,
                             -1, message_utf8);
}

@end

@implementation SingleUseSpeechSynthesizer {
 @private
  base::scoped_nsobject<NSString> _utterance;
  bool _didSpeak;
}

- (id)initWithUtterance:(NSString*)utterance {
  self = [super init];
  if (self) {
    _utterance.reset([utterance retain]);
    _didSpeak = false;
  }
  return self;
}

- (bool)startSpeakingRetainedUtterance {
  CHECK(!_didSpeak);
  CHECK(_utterance);
  _didSpeak = true;
  return [super startSpeakingString:_utterance];
}

- (bool)startSpeakingString:(NSString*)utterance {
  CHECK(false);
  return false;
}

@end