blob: 1aff0b8a227c77fc400b42c0f0fe2a55069affbc [file] [log] [blame]
/*
* Copyright (C) 2020 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "SpeechRecognizer.h"
#include "SpeechRecognitionRequest.h"
#include "SpeechRecognitionUpdate.h"
#include <wtf/MediaTime.h>
#if PLATFORM(COCOA)
#include "MediaUtilities.h"
#include <pal/cf/CoreMediaSoftLink.h>
#endif
namespace WebCore {
SpeechRecognizer::SpeechRecognizer(DelegateCallback&& delegateCallback, UniqueRef<SpeechRecognitionRequest>&& request)
: m_delegateCallback(WTFMove(delegateCallback))
, m_request(WTFMove(request))
#if HAVE(SPEECHRECOGNIZER)
, m_currentAudioSampleTime(PAL::kCMTimeZero)
#endif
{
}
void SpeechRecognizer::abort(std::optional<SpeechRecognitionError>&& error)
{
if (m_state == State::Aborting || m_state == State::Inactive)
return;
m_state = State::Aborting;
if (error)
m_delegateCallback(SpeechRecognitionUpdate::createError(clientIdentifier(), *error));
stopCapture();
abortRecognition();
}
void SpeechRecognizer::stop()
{
if (m_state == State::Aborting || m_state == State::Inactive)
return;
m_state = State::Stopping;
stopCapture();
stopRecognition();
}
SpeechRecognitionConnectionClientIdentifier SpeechRecognizer::clientIdentifier() const
{
return m_request->clientIdentifier();
}
void SpeechRecognizer::prepareForDestruction()
{
if (m_state == State::Inactive)
return;
auto delegateCallback = std::exchange(m_delegateCallback, [](const SpeechRecognitionUpdate&) { });
delegateCallback(SpeechRecognitionUpdate::create(clientIdentifier(), SpeechRecognitionUpdateType::End));
m_state = State::Inactive;
}
#if ENABLE(MEDIA_STREAM)
void SpeechRecognizer::start(Ref<RealtimeMediaSource>&& source, bool mockSpeechRecognitionEnabled)
{
if (!startRecognition(mockSpeechRecognitionEnabled, clientIdentifier(), m_request->lang(), m_request->continuous(), m_request->interimResults(), m_request->maxAlternatives())) {
auto error = SpeechRecognitionError { SpeechRecognitionErrorType::ServiceNotAllowed, "Failed to start recognition"_s };
m_delegateCallback(SpeechRecognitionUpdate::createError(clientIdentifier(), WTFMove(error)));
return;
}
m_state = State::Running;
m_delegateCallback(SpeechRecognitionUpdate::create(clientIdentifier(), SpeechRecognitionUpdateType::Start));
startCapture(WTFMove(source));
}
void SpeechRecognizer::startCapture(Ref<RealtimeMediaSource>&& source)
{
auto dataCallback = [weakThis = WeakPtr { *this }](const auto& time, const auto& data, const auto& description, auto sampleCount) {
if (weakThis)
weakThis->dataCaptured(time, data, description, sampleCount);
};
auto stateUpdateCallback = [weakThis = WeakPtr { *this }](const auto& update) {
if (weakThis)
weakThis->m_delegateCallback(update);
};
m_source = makeUnique<SpeechRecognitionCaptureSource>(clientIdentifier(), WTFMove(dataCallback), WTFMove(stateUpdateCallback), WTFMove(source));
}
#endif
void SpeechRecognizer::stopCapture()
{
if (!m_source)
return;
m_source = nullptr;
m_delegateCallback(SpeechRecognitionUpdate::create(clientIdentifier(), SpeechRecognitionUpdateType::AudioEnd));
}
#if !HAVE(SPEECHRECOGNIZER)
void SpeechRecognizer::dataCaptured(const MediaTime&, const PlatformAudioData&, const AudioStreamDescription&, size_t)
{
}
bool SpeechRecognizer::startRecognition(bool, SpeechRecognitionConnectionClientIdentifier, const String&, bool, bool, uint64_t)
{
return true;
}
void SpeechRecognizer::abortRecognition()
{
m_delegateCallback(SpeechRecognitionUpdate::create(clientIdentifier(), SpeechRecognitionUpdateType::End));
}
void SpeechRecognizer::stopRecognition()
{
m_delegateCallback(SpeechRecognitionUpdate::create(clientIdentifier(), SpeechRecognitionUpdateType::End));
}
#endif
} // namespace WebCore