Skip to content

Commit

Permalink
feat(TTML): Add support for IMSC1 (CMAF) image subtitle (#6968)
Browse files Browse the repository at this point in the history
  • Loading branch information
avelad authored Jul 2, 2024
1 parent 62881f6 commit 3b62296
Show file tree
Hide file tree
Showing 12 changed files with 117 additions and 13 deletions.
8 changes: 8 additions & 0 deletions demo/common/assets.js
Original file line number Diff line number Diff line change
Expand Up @@ -1102,6 +1102,14 @@ shakaAssets.testAssets = [
.addFeature(shakaAssets.Feature.DASH)
.addFeature(shakaAssets.Feature.MP4)
.addFeature(shakaAssets.Feature.MPD_CHAINING),
new ShakaDemoAssetInfo(
/* name= */ 'DASH-IF IMSC1 (CMAF) Image Subtitle',
/* iconUri= */ 'https://storage.googleapis.com/shaka-asset-icons/dash_if_test_pattern.png',
/* manifestUri= */ 'https://livesim2.dashif.org/vod/testpic_2s/imsc1_img.mpd',
/* source= */ shakaAssets.Source.DASH_IF)
.addFeature(shakaAssets.Feature.DASH)
.addFeature(shakaAssets.Feature.SUBTITLES)
.addFeature(shakaAssets.Feature.MP4),
// End DASH-IF Assets }}}

// bitcodin assets {{{
Expand Down
3 changes: 2 additions & 1 deletion externs/shaka/text.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,12 @@ shaka.extern.TextParser = class {
* for each cue.
* @param {?(string|undefined)} uri
* The media uri.
* @param {!Array.<string>} images
* @return {!Array.<!shaka.text.Cue>}
*
* @exportDoc
*/
parseMedia(data, timeContext, uri) {}
parseMedia(data, timeContext, uri, images) {}

/**
* Notifies the stream if the manifest is in sequence mode or not.
Expand Down
5 changes: 3 additions & 2 deletions lib/player.js
Original file line number Diff line number Diff line change
Expand Up @@ -5787,7 +5787,7 @@ shaka.Player = class extends shaka.util.FakeEventTarget {
vttOffset: 0,
};
const data = shaka.util.BufferUtils.toUint8(buffer);
const cues = TextParser.parseMedia(data, time, uri);
const cues = TextParser.parseMedia(data, time, uri, /* images= */ []);

const references = [];
for (const cue of cues) {
Expand Down Expand Up @@ -6054,7 +6054,8 @@ shaka.Player = class extends shaka.util.FakeEventTarget {
vttOffset: 0,
};
const data = shaka.util.BufferUtils.toUint8(buffer);
const cues = obj.parseMedia(data, time, /* uri= */ null);
const cues = obj.parseMedia(
data, time, /* uri= */ null, /* images= */ []);
return shaka.text.WebVttGenerator.convert(cues, adCuePoints);
}
throw new shaka.util.Error(
Expand Down
54 changes: 53 additions & 1 deletion lib/text/mp4_ttml_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@ goog.provide('shaka.text.Mp4TtmlParser');

goog.require('shaka.text.TextEngine');
goog.require('shaka.text.TtmlTextParser');
goog.require('shaka.util.BufferUtils');
goog.require('shaka.util.Error');
goog.require('shaka.util.Mp4Parser');
goog.require('shaka.util.Uint8ArrayUtils');


/**
Expand Down Expand Up @@ -81,12 +83,54 @@ shaka.text.Mp4TtmlParser = class {
let sawMDAT = false;
let payload = [];

/** @type {!Array.<number>} */
let subSizes = [];

const parser = new Mp4Parser()
.box('moof', Mp4Parser.children)
.box('traf', Mp4Parser.children)
.fullBox('subs', (box) => {
subSizes = [];
const reader = box.reader;
const entryCount = reader.readUint32();
for (let i = 0; i < entryCount; i++) {
reader.readUint32(); // sample_delta
const subsampleCount = reader.readUint16();
for (let j = 0; j < subsampleCount; j++) {
if (box.version == 1) {
subSizes.push(reader.readUint32());
} else {
subSizes.push(reader.readUint16());
}
reader.readUint8(); // priority
reader.readUint8(); // discardable
reader.readUint32(); // codec_specific_parameters
}
}
})
.box('mdat', Mp4Parser.allData((data) => {
sawMDAT = true;
// Join this to any previous payload, in case the mp4 has multiple
// mdats.
payload = payload.concat(this.parser_.parseMedia(data, time, uri));
if (subSizes.length) {
const contentData =
shaka.util.BufferUtils.toUint8(data, 0, subSizes[0]);
const images = [];
let offset = subSizes[0];
for (let i = 1; i < subSizes.length; i++) {
const imageData =
shaka.util.BufferUtils.toUint8(data, offset, subSizes[i]);
const raw =
shaka.util.Uint8ArrayUtils.toStandardBase64(imageData);
images.push('data:image/png;base64,' + raw);
offset += subSizes[i];
}
payload = payload.concat(
this.parser_.parseMedia(contentData, time, uri, images));
} else {
payload = payload.concat(
this.parser_.parseMedia(data, time, uri, /* images= */ []));
}
}));
parser.parse(data, /* partialOkay= */ false);

Expand All @@ -97,6 +141,8 @@ shaka.text.Mp4TtmlParser = class {
shaka.util.Error.Code.INVALID_MP4_TTML);
}

console.log(payload);

return payload;
}
};
Expand All @@ -107,9 +153,15 @@ shaka.text.TextEngine.registerParser(
shaka.text.TextEngine.registerParser(
'application/mp4; codecs="stpp.ttml"',
() => new shaka.text.Mp4TtmlParser());
shaka.text.TextEngine.registerParser(
'application/mp4; codecs="stpp.ttml.im1i"',
() => new shaka.text.Mp4TtmlParser());
shaka.text.TextEngine.registerParser(
'application/mp4; codecs="stpp.ttml.im1t"',
() => new shaka.text.Mp4TtmlParser());
shaka.text.TextEngine.registerParser(
'application/mp4; codecs="stpp.ttml.im2i"',
() => new shaka.text.Mp4TtmlParser());
shaka.text.TextEngine.registerParser(
'application/mp4; codecs="stpp.ttml.im2t"',
() => new shaka.text.Mp4TtmlParser());
Expand Down
2 changes: 1 addition & 1 deletion lib/text/srt_text_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ shaka.text.SrtTextParser = class {

const newData = BufferUtils.toUint8(StringUtils.toUTF8(vvtText));

return this.parser_.parseMedia(newData, time, uri);
return this.parser_.parseMedia(newData, time, uri, /* images= */ []);
}

/**
Expand Down
2 changes: 1 addition & 1 deletion lib/text/text_engine.js
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ shaka.text.TextEngine = class {

// Parse the buffer and add the new cues.
const allCues = this.parser_.parseMedia(
shaka.util.BufferUtils.toUint8(buffer), time, uri);
shaka.util.BufferUtils.toUint8(buffer), time, uri, /* images= */ []);
for (const cue of allCues) {
this.modifyCueCallback_(cue, uri || null, time);
}
Expand Down
18 changes: 14 additions & 4 deletions lib/text/ttml_text_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ shaka.text.TtmlTextParser = class {
* @override
* @export
*/
parseMedia(data, time, uri) {
parseMedia(data, time, uri, images) {
const TtmlTextParser = shaka.text.TtmlTextParser;
const TXml = shaka.util.TXml;
const ttpNs = TtmlTextParser.parameterNs_;
Expand Down Expand Up @@ -148,7 +148,7 @@ shaka.text.TtmlTextParser = class {
body, time, rateInfo, metadataElements, styles,
regionElements, cueRegions, collapseMultipleSpaces,
cellResolutionInfo, /* parentCueElement= */ null,
/* isContent= */ false, uri);
/* isContent= */ false, uri, images);
if (cue) {
// According to the TTML spec, backgrounds default to transparent.
// So default the background of the top-level element to transparent.
Expand Down Expand Up @@ -177,13 +177,14 @@ shaka.text.TtmlTextParser = class {
* @param {?shaka.extern.xml.Node} parentCueElement
* @param {boolean} isContent
* @param {?(string|undefined)} uri
* @param {!Array.<string>} images
* @return {shaka.text.Cue}
* @private
*/
static parseCue_(
cueNode, timeContext, rateInfo, metadataElements, styles, regionElements,
cueRegions, collapseMultipleSpaces, cellResolution, parentCueElement,
isContent, uri) {
isContent, uri, images) {
const TXml = shaka.util.TXml;
const StringUtils = shaka.util.StringUtils;
/** @type {shaka.extern.xml.Node} */
Expand Down Expand Up @@ -230,7 +231,15 @@ shaka.text.TtmlTextParser = class {
cueElement,
shaka.text.TtmlTextParser.smpteNsList_,
'backgroundImage');
if (uri && backgroundImage && !backgroundImage.startsWith('#')) {
const imsc1ImgUrnTester =
/^(urn:)(mpeg:[a-z0-9][a-z0-9-]{0,31}:)(subs:)([0-9]+)$/;
if (backgroundImage && imsc1ImgUrnTester.test(backgroundImage)) {
const index = parseInt(backgroundImage.split(':').pop(), 10) -1;
if (index >= images.length) {
return null;
}
imageUri = images[index];
} else if (uri && backgroundImage && !backgroundImage.startsWith('#')) {
const baseUri = new goog.Uri(uri);
const relativeUri = new goog.Uri(backgroundImage);
const newUri = baseUri.resolve(relativeUri).toString();
Expand Down Expand Up @@ -270,6 +279,7 @@ shaka.text.TtmlTextParser = class {
cueElement,
isContent,
uri,
images,
);

// This node may or may not generate a nested cue.
Expand Down
Binary file added test/test/assets/imsc-image-init.cmft
Binary file not shown.
Binary file added test/test/assets/imsc-image-segment.cmft
Binary file not shown.
28 changes: 27 additions & 1 deletion test/text/mp4_ttml_parser_unit.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ describe('Mp4TtmlParser', () => {
const ttmlSegmentUri = '/base/test/test/assets/ttml-segment.mp4';
const ttmlSegmentMultipleMDATUri =
'/base/test/test/assets/ttml-segment-multiplemdat.mp4';
const imscImageInitSegmentUri =
'/base/test/test/assets/imsc-image-init.cmft';
const imscImageSegmentUri =
'/base/test/test/assets/imsc-image-segment.cmft';
const audioInitSegmentUri = '/base/test/test/assets/sintel-audio-init.mp4';

/** @type {!Uint8Array} */
Expand All @@ -18,19 +22,27 @@ describe('Mp4TtmlParser', () => {
/** @type {!Uint8Array} */
let ttmlSegmentMultipleMDAT;
/** @type {!Uint8Array} */
let imscImageInitSegment;
/** @type {!Uint8Array} */
let imscImageSegment;
/** @type {!Uint8Array} */
let audioInitSegment;

beforeAll(async () => {
const responses = await Promise.all([
shaka.test.Util.fetch(ttmlInitSegmentUri),
shaka.test.Util.fetch(ttmlSegmentUri),
shaka.test.Util.fetch(ttmlSegmentMultipleMDATUri),
shaka.test.Util.fetch(imscImageInitSegmentUri),
shaka.test.Util.fetch(imscImageSegmentUri),
shaka.test.Util.fetch(audioInitSegmentUri),
]);
ttmlInitSegment = shaka.util.BufferUtils.toUint8(responses[0]);
ttmlSegment = shaka.util.BufferUtils.toUint8(responses[1]);
ttmlSegmentMultipleMDAT = shaka.util.BufferUtils.toUint8(responses[2]);
audioInitSegment = shaka.util.BufferUtils.toUint8(responses[3]);
imscImageInitSegment = shaka.util.BufferUtils.toUint8(responses[3]);
imscImageSegment = shaka.util.BufferUtils.toUint8(responses[4]);
audioInitSegment = shaka.util.BufferUtils.toUint8(responses[5]);
});

it('parses init segment', () => {
Expand Down Expand Up @@ -168,4 +180,18 @@ describe('Mp4TtmlParser', () => {
shaka.test.TtmlUtils.verifyHelper(
cues, result, {startTime: 23, endTime: 53.5});
});

it('handles IMSC1 (CMAF) image subtitle', () => {
const parser = new shaka.text.Mp4TtmlParser();
parser.parseInit(imscImageInitSegment);
const time =
{periodStart: 0, segmentStart: 0, segmentEnd: 60, vttOffset: 0};
const ret = parser.parseMedia(imscImageSegment, time, null);
// Bodies.
expect(ret.length).toBe(1);
// Divs.
expect(ret[0].nestedCues.length).toBe(1);
// Cues.
expect(ret[0].nestedCues[0].backgroundImage).toBeDefined();
});
});
6 changes: 6 additions & 0 deletions test/text/text_engine_unit.js
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ describe('TextEngine', () => {
dummyData,
{periodStart: 0, segmentStart: 0, segmentEnd: 3, vttOffset: 0},
undefined,
[],
]);

expect(mockDisplayer.appendSpy).toHaveBeenCalledOnceMoreWith([
Expand All @@ -122,6 +123,7 @@ describe('TextEngine', () => {
dummyData,
{periodStart: 0, segmentStart: 3, segmentEnd: 5, vttOffset: 0},
undefined,
[],
]);

expect(mockDisplayer.appendSpy).toHaveBeenCalledOnceMoreWith([
Expand Down Expand Up @@ -289,6 +291,7 @@ describe('TextEngine', () => {
dummyData,
{periodStart: 0, segmentStart: 0, segmentEnd: 3, vttOffset: 0},
undefined,
[],
]);
expect(mockDisplayer.appendSpy).toHaveBeenCalledOnceMoreWith([
[
Expand All @@ -304,6 +307,7 @@ describe('TextEngine', () => {
dummyData,
{periodStart: 4, segmentStart: 4, segmentEnd: 7, vttOffset: 4},
undefined,
[],
]);
expect(mockDisplayer.appendSpy).toHaveBeenCalledOnceMoreWith([
[
Expand Down Expand Up @@ -335,6 +339,7 @@ describe('TextEngine', () => {
dummyData,
{periodStart: 0, segmentStart: 0, segmentEnd: 3, vttOffset: 0},
undefined,
[],
]);

textEngine.setTimestampOffset(8);
Expand All @@ -345,6 +350,7 @@ describe('TextEngine', () => {
dummyData,
{periodStart: 8, segmentStart: 4, segmentEnd: 7, vttOffset: 4},
undefined,
[],
]);
});
});
Expand Down
4 changes: 2 additions & 2 deletions test/text/ttml_text_parser_unit.js
Original file line number Diff line number Diff line change
Expand Up @@ -2154,7 +2154,7 @@ describe('TtmlTextParser', () => {
const data =
shaka.util.BufferUtils.toUint8(shaka.util.StringUtils.toUTF8(text));
const result = new shaka.text.TtmlTextParser()
.parseMedia(data, time, 'foo://bar');
.parseMedia(data, time, 'foo://bar', /* images= */ []);
shaka.test.TtmlUtils.verifyHelper(
cues, result, bodyProperties, divProperties);
}
Expand Down Expand Up @@ -2182,7 +2182,7 @@ describe('TtmlTextParser', () => {
new shaka.text.TtmlTextParser().parseMedia(
shaka.util.BufferUtils.toUint8(data),
{periodStart: 0, segmentStart: 0, segmentEnd: 10, vttOffset: 0},
'foo://bar');
'foo://bar', /* images= */ []);
}).toThrow(error);
}
});

0 comments on commit 3b62296

Please sign in to comment.