[MSE] Decode glitches when watching videos on CNN.com
https://bugs.webkit.org/show_bug.cgi?id=206412
<rdar://problem/55685630>

Reviewed by Xabier Rodriguez-Calvar.

Source/WebCore:

Test: media/media-source/media-source-samples-out-of-order.html

The "Coded frame processing" algorithm has a known shortcoming <https://github.com/w3c/media-source/issues/187>
when dealing appends of with "SAP Type 2" content, or in general terms, appending data where the resulting samples
have presentation times that do not increase monotonically. When this occurs, the ordering of samples in presentation
time will be different from the ordering of samples in decode time. The decoder requires samples to be enqueued in
decode time order, but the MSE specification only checks for overlapping samples in presentation time order. During
appends of out-of-order samples, this can lead to new samples being inserted between a previously appended sample and
the sample on which that sample depends.

To resolve this, add a new step in the implementation of the "coded frame processing" algorithm in
SourceBuffer::sourceBufferPrivateDidReceiveSample(). When the incoming frame is a sync sample, search forward
in the TrackBuffer for all previous samples in between the new sync sample, and the next sync sample. All the
samples found in this step would fail to decode correctly if enqueued after the new (possibly different resolution)
sync sample, so they are removed in this step.

* Modules/mediasource/SampleMap.cpp:
(WebCore::DecodeOrderSampleMap::findSampleAfterDecodeKey):
* Modules/mediasource/SampleMap.h:
* Modules/mediasource/SourceBuffer.cpp:
(WebCore::SourceBuffer::sourceBufferPrivateDidReceiveSample):

LayoutTests:

* media/media-source/media-source-samples-out-of-order-expected.txt: Added.
* media/media-source/media-source-samples-out-of-order.html: Added.


git-svn-id: http://svn.webkit.org/repository/webkit/trunk@254761 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/LayoutTests/ChangeLog b/LayoutTests/ChangeLog
index 2888d13..3f49c80 100644
--- a/LayoutTests/ChangeLog
+++ b/LayoutTests/ChangeLog
@@ -1,3 +1,14 @@
+2020-01-17  Jer Noble  <jer.noble@apple.com>
+
+        [MSE] Decode glitches when watching videos on CNN.com
+        https://bugs.webkit.org/show_bug.cgi?id=206412
+        <rdar://problem/55685630>
+
+        Reviewed by Xabier Rodriguez-Calvar.
+
+        * media/media-source/media-source-samples-out-of-order-expected.txt: Added.
+        * media/media-source/media-source-samples-out-of-order.html: Added.
+
 2020-01-17  Said Abou-Hallawa  <sabouhallawa@apple.com>
 
         REGRESSION: [iOS 13] svg/custom/glyph-selection-arabic-forms.svg is failing
diff --git a/LayoutTests/media/media-source/media-source-samples-out-of-order-expected.txt b/LayoutTests/media/media-source/media-source-samples-out-of-order-expected.txt
new file mode 100644
index 0000000..7e5c117
--- /dev/null
+++ b/LayoutTests/media/media-source/media-source-samples-out-of-order-expected.txt
@@ -0,0 +1,20 @@
+
+RUN(video.src = URL.createObjectURL(source))
+EVENT(sourceopen)
+RUN(sourceBuffer = source.addSourceBuffer("video/mock; codecs=mock"))
+First segment has normal, monotonically increasing samples.
+RUN(sourceBuffer.appendBuffer(mediaSegment))
+EVENT(updateend)
+EXPECTED (bufferedSamples.length == '3') OK
+{PTS({1/1 = 1.000000}), DTS({1/1 = 1.000000}), duration({1/1 = 1.000000}), flags(1), generation(0)}
+{PTS({2/1 = 2.000000}), DTS({2/1 = 2.000000}), duration({1/1 = 1.000000}), flags(0), generation(0)}
+{PTS({3/1 = 3.000000}), DTS({3/1 = 3.000000}), duration({1/1 = 1.000000}), flags(0), generation(0)}
+Second, overlapping segment has out-of-display-order samples. This append should replace the last sample from the previous append.
+RUN(sourceBuffer.appendBuffer(mediaSegment))
+EVENT(updateend)
+EXPECTED (bufferedSamples.length == '3') OK
+{PTS({1/1 = 1.000000}), DTS({1/1 = 1.000000}), duration({1/1 = 1.000000}), flags(1), generation(0)}
+{PTS({2/1 = 2.000000}), DTS({2/1 = 2.000000}), duration({1/1 = 1.000000}), flags(0), generation(0)}
+{PTS({4/1 = 4.000000}), DTS({2/1 = 2.000000}), duration({1/1 = 1.000000}), flags(1), generation(1)}
+END OF TEST
+
diff --git a/LayoutTests/media/media-source/media-source-samples-out-of-order.html b/LayoutTests/media/media-source/media-source-samples-out-of-order.html
new file mode 100644
index 0000000..aa8b65e
--- /dev/null
+++ b/LayoutTests/media/media-source/media-source-samples-out-of-order.html
@@ -0,0 +1,57 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <title>media-source-samples-out-of-order</title>
+    <script src="mock-media-source.js"></script>
+    <script src="../video-test.js"></script>
+    <script>
+    var source;
+    var sourceBuffer;
+    var initSegment;
+    var mediaSegment;
+
+    if (window.internals)
+        internals.initializeMockMediaSource();
+
+    window.addEventListener('load', async event => {
+        findMediaElement();
+
+        source = new MediaSource();
+        run('video.src = URL.createObjectURL(source)');
+        await waitFor(source, 'sourceopen');
+
+        run('sourceBuffer = source.addSourceBuffer("video/mock; codecs=mock")');
+        consoleWrite('First segment has normal, monotonically increasing samples.')
+        mediaSegment = concatenateSamples([
+            makeAInit(2, [makeATrack(1, 'mock', TRACK_KIND.AUDIO)]), 
+            makeASample(1, 1, 1, 1, 1, SAMPLE_FLAG.SYNC, 0),
+            makeASample(2, 2, 1, 1, 1, SAMPLE_FLAG.NONE, 0),
+            makeASample(3, 3, 1, 1, 1, SAMPLE_FLAG.NONE, 0),
+        ]);
+        run('sourceBuffer.appendBuffer(mediaSegment)');
+        await waitFor(sourceBuffer, 'updateend');
+
+        bufferedSamples = internals.bufferedSamplesForTrackID(sourceBuffer, 1);
+        testExpected("bufferedSamples.length", 3);
+        bufferedSamples.forEach(consoleWrite);
+
+        consoleWrite('Second, overlapping segment has out-of-display-order samples. This append should replace the last sample from the previous append.')
+        mediaSegment = concatenateSamples([
+            makeAInit(2, [makeATrack(1, 'mock', TRACK_KIND.AUDIO)]), 
+            makeASample(4, 2, 1, 1, 1, SAMPLE_FLAG.SYNC, 1),
+        ]);
+        run('sourceBuffer.appendBuffer(mediaSegment)');
+        await waitFor(sourceBuffer, 'updateend');
+
+        bufferedSamples = internals.bufferedSamplesForTrackID(sourceBuffer, 1);
+        testExpected("bufferedSamples.length", 3);
+        bufferedSamples.forEach(consoleWrite);
+
+        endTest();
+    });
+    </script>
+</head>
+<body>
+    <video></video>
+</body>
+</html>
diff --git a/Source/WebCore/ChangeLog b/Source/WebCore/ChangeLog
index 45fb9c6..708675d 100644
--- a/Source/WebCore/ChangeLog
+++ b/Source/WebCore/ChangeLog
@@ -1,3 +1,33 @@
+2020-01-17  Jer Noble  <jer.noble@apple.com>
+
+        [MSE] Decode glitches when watching videos on CNN.com
+        https://bugs.webkit.org/show_bug.cgi?id=206412
+        <rdar://problem/55685630>
+
+        Reviewed by Xabier Rodriguez-Calvar.
+
+        Test: media/media-source/media-source-samples-out-of-order.html
+
+        The "Coded frame processing" algorithm has a known shortcoming <https://github.com/w3c/media-source/issues/187>
+        when dealing appends of with "SAP Type 2" content, or in general terms, appending data where the resulting samples
+        have presentation times that do not increase monotonically. When this occurs, the ordering of samples in presentation
+        time will be different from the ordering of samples in decode time. The decoder requires samples to be enqueued in
+        decode time order, but the MSE specification only checks for overlapping samples in presentation time order. During
+        appends of out-of-order samples, this can lead to new samples being inserted between a previously appended sample and
+        the sample on which that sample depends.
+
+        To resolve this, add a new step in the implementation of the "coded frame processing" algorithm in 
+        SourceBuffer::sourceBufferPrivateDidReceiveSample(). When the incoming frame is a sync sample, search forward
+        in the TrackBuffer for all previous samples in between the new sync sample, and the next sync sample. All the
+        samples found in this step would fail to decode correctly if enqueued after the new (possibly different resolution)
+        sync sample, so they are removed in this step.
+
+        * Modules/mediasource/SampleMap.cpp:
+        (WebCore::DecodeOrderSampleMap::findSampleAfterDecodeKey):
+        * Modules/mediasource/SampleMap.h:
+        * Modules/mediasource/SourceBuffer.cpp:
+        (WebCore::SourceBuffer::sourceBufferPrivateDidReceiveSample):
+
 2020-01-17  Antti Koivisto  <antti@apple.com>
 
         [LFC][IFC] Use Optional for partialLeading/TrailingTextItem
diff --git a/Source/WebCore/Modules/mediasource/SampleMap.cpp b/Source/WebCore/Modules/mediasource/SampleMap.cpp
index 07cf5d7..b71b337 100644
--- a/Source/WebCore/Modules/mediasource/SampleMap.cpp
+++ b/Source/WebCore/Modules/mediasource/SampleMap.cpp
@@ -188,6 +188,11 @@
     return m_samples.find(key);
 }
 
+DecodeOrderSampleMap::iterator DecodeOrderSampleMap::findSampleAfterDecodeKey(const KeyType& key)
+{
+    return m_samples.upper_bound(key);
+}
+
 PresentationOrderSampleMap::reverse_iterator PresentationOrderSampleMap::reverseFindSampleContainingPresentationTime(const MediaTime& time)
 {
     auto range = std::equal_range(rbegin(), rend(), time, SampleIsGreaterThanMediaTimeComparator<MapType>());
diff --git a/Source/WebCore/Modules/mediasource/SampleMap.h b/Source/WebCore/Modules/mediasource/SampleMap.h
index f783082..eaf4931 100644
--- a/Source/WebCore/Modules/mediasource/SampleMap.h
+++ b/Source/WebCore/Modules/mediasource/SampleMap.h
@@ -93,6 +93,7 @@
     const_reverse_iterator rend() const { return m_samples.rend(); }
 
     WEBCORE_EXPORT iterator findSampleWithDecodeKey(const KeyType&);
+    WEBCORE_EXPORT iterator findSampleAfterDecodeKey(const KeyType&);
     WEBCORE_EXPORT reverse_iterator reverseFindSampleWithDecodeKey(const KeyType&);
     WEBCORE_EXPORT reverse_iterator findSyncSamplePriorToPresentationTime(const MediaTime&, const MediaTime& threshold = MediaTime::positiveInfiniteTime());
     WEBCORE_EXPORT reverse_iterator findSyncSamplePriorToDecodeIterator(reverse_iterator);
diff --git a/Source/WebCore/Modules/mediasource/SourceBuffer.cpp b/Source/WebCore/Modules/mediasource/SourceBuffer.cpp
index 2f4e598..b7698b6 100644
--- a/Source/WebCore/Modules/mediasource/SourceBuffer.cpp
+++ b/Source/WebCore/Modules/mediasource/SourceBuffer.cpp
@@ -1683,6 +1683,29 @@
                 erasedSamples.addRange(iter_pair.first, iter_pair.second);
         }
 
+        // When appending media containing B-frames (media whose samples' presentation timestamps
+        // do not increase monotonically, the prior erase steps could leave a sample in the trackBuffer
+        // which will be disconnected from its previous I-frame. If the incoming frame is an I-frame,
+        // remove all samples in decode order between the incoming I-frame's decode timestamp and the
+        // next I-frame. See <https://github.com/w3c/media-source/issues/187> for a discussion of what
+        // the how the MSE specification should handlie this secnario.
+        do {
+            if (!sample.isSync())
+                break;
+
+            DecodeOrderSampleMap::KeyType decodeKey(sample.decodeTime(), sample.presentationTime());
+            auto nextSampleInDecodeOrder = trackBuffer.samples.decodeOrder().findSampleAfterDecodeKey(decodeKey);
+            if (nextSampleInDecodeOrder == trackBuffer.samples.decodeOrder().end())
+                break;
+
+            if (nextSampleInDecodeOrder->second->isSync())
+                break;
+
+            auto nextSyncSample = trackBuffer.samples.decodeOrder().findSyncSampleAfterDecodeIterator(nextSampleInDecodeOrder);
+            INFO_LOG(LOGIDENTIFIER, "Discovered out-of-order frames, from: ", *nextSampleInDecodeOrder->second, " to: ", (nextSyncSample == trackBuffer.samples.decodeOrder().end() ? "[end]"_s : toString(*nextSyncSample->second)));
+            erasedSamples.addRange(nextSampleInDecodeOrder, nextSyncSample);
+        } while (false);
+
         // There are many files out there where the frame times are not perfectly contiguous and may have small overlaps
         // between the beginning of a frame and the end of the previous one; therefore a tolerance is needed whenever
         // durations are considered.