You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1208 lines
38 KiB

/**
* mux.js
*
* Copyright (c) Brightcove
* Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE
*
* A stream-based mp2t to mp4 converter. This utility can be used to
* deliver mp4s to a SourceBuffer on platforms that support native
* Media Source Extensions.
*/
'use strict';
var Stream = require('../utils/stream.js');
var mp4 = require('./mp4-generator.js');
var frameUtils = require('./frame-utils');
var audioFrameUtils = require('./audio-frame-utils');
var trackDecodeInfo = require('./track-decode-info');
var m2ts = require('../m2ts/m2ts.js');
var clock = require('../utils/clock');
var AdtsStream = require('../codecs/adts.js');
var H264Stream = require('../codecs/h264').H264Stream;
var AacStream = require('../aac');
var isLikelyAacData = require('../aac/utils').isLikelyAacData;
var ONE_SECOND_IN_TS = require('../utils/clock').ONE_SECOND_IN_TS;
var AUDIO_PROPERTIES = require('../constants/audio-properties.js');
var VIDEO_PROPERTIES = require('../constants/video-properties.js');
// object types
var VideoSegmentStream, AudioSegmentStream, Transmuxer, CoalesceStream;
/**
* Compare two arrays (even typed) for same-ness
*/
var arrayEquals = function(a, b) {
var
i;
if (a.length !== b.length) {
return false;
}
// compare the value of each element in the array
for (i = 0; i < a.length; i++) {
if (a[i] !== b[i]) {
return false;
}
}
return true;
};
var generateSegmentTimingInfo = function(
baseMediaDecodeTime,
startDts,
startPts,
endDts,
endPts,
prependedContentDuration
) {
var
ptsOffsetFromDts = startPts - startDts,
decodeDuration = endDts - startDts,
presentationDuration = endPts - startPts;
// The PTS and DTS values are based on the actual stream times from the segment,
// however, the player time values will reflect a start from the baseMediaDecodeTime.
// In order to provide relevant values for the player times, base timing info on the
// baseMediaDecodeTime and the DTS and PTS durations of the segment.
return {
start: {
dts: baseMediaDecodeTime,
pts: baseMediaDecodeTime + ptsOffsetFromDts
},
end: {
dts: baseMediaDecodeTime + decodeDuration,
pts: baseMediaDecodeTime + presentationDuration
},
prependedContentDuration: prependedContentDuration,
baseMediaDecodeTime: baseMediaDecodeTime
};
};
/**
* Constructs a single-track, ISO BMFF media segment from AAC data
* events. The output of this stream can be fed to a SourceBuffer
* configured with a suitable initialization segment.
* @param track {object} track metadata configuration
* @param options {object} transmuxer options object
* @param options.keepOriginalTimestamps {boolean} If true, keep the timestamps
* in the source; false to adjust the first segment to start at 0.
*/
AudioSegmentStream = function(track, options) {
var
adtsFrames = [],
sequenceNumber = 0,
earliestAllowedDts = 0,
audioAppendStartTs = 0,
videoBaseMediaDecodeTime = Infinity;
options = options || {};
AudioSegmentStream.prototype.init.call(this);
this.push = function(data) {
trackDecodeInfo.collectDtsInfo(track, data);
if (track) {
AUDIO_PROPERTIES.forEach(function(prop) {
track[prop] = data[prop];
});
}
// buffer audio data until end() is called
adtsFrames.push(data);
};
this.setEarliestDts = function(earliestDts) {
earliestAllowedDts = earliestDts;
};
this.setVideoBaseMediaDecodeTime = function(baseMediaDecodeTime) {
videoBaseMediaDecodeTime = baseMediaDecodeTime;
};
this.setAudioAppendStart = function(timestamp) {
audioAppendStartTs = timestamp;
};
this.flush = function() {
var
frames,
moof,
mdat,
boxes,
frameDuration,
segmentDuration,
videoClockCyclesOfSilencePrefixed;
// return early if no audio data has been observed
if (adtsFrames.length === 0) {
this.trigger('done', 'AudioSegmentStream');
return;
}
frames = audioFrameUtils.trimAdtsFramesByEarliestDts(
adtsFrames, track, earliestAllowedDts);
track.baseMediaDecodeTime = trackDecodeInfo.calculateTrackBaseMediaDecodeTime(
track, options.keepOriginalTimestamps);
// amount of audio filled but the value is in video clock rather than audio clock
videoClockCyclesOfSilencePrefixed = audioFrameUtils.prefixWithSilence(
track, frames, audioAppendStartTs, videoBaseMediaDecodeTime);
// we have to build the index from byte locations to
// samples (that is, adts frames) in the audio data
track.samples = audioFrameUtils.generateSampleTable(frames);
// concatenate the audio data to constuct the mdat
mdat = mp4.mdat(audioFrameUtils.concatenateFrameData(frames));
adtsFrames = [];
moof = mp4.moof(sequenceNumber, [track]);
boxes = new Uint8Array(moof.byteLength + mdat.byteLength);
// bump the sequence number for next time
sequenceNumber++;
boxes.set(moof);
boxes.set(mdat, moof.byteLength);
trackDecodeInfo.clearDtsInfo(track);
frameDuration = Math.ceil(ONE_SECOND_IN_TS * 1024 / track.samplerate);
// TODO this check was added to maintain backwards compatibility (particularly with
// tests) on adding the timingInfo event. However, it seems unlikely that there's a
// valid use-case where an init segment/data should be triggered without associated
// frames. Leaving for now, but should be looked into.
if (frames.length) {
segmentDuration = frames.length * frameDuration;
this.trigger(
'segmentTimingInfo',
generateSegmentTimingInfo(
// The audio track's baseMediaDecodeTime is in audio clock cycles, but the
// frame info is in video clock cycles. Convert to match expectation of
// listeners (that all timestamps will be based on video clock cycles).
clock.audioTsToVideoTs(track.baseMediaDecodeTime, track.samplerate),
// frame times are already in video clock, as is segment duration
frames[0].dts,
frames[0].pts,
frames[0].dts + segmentDuration,
frames[0].pts + segmentDuration,
videoClockCyclesOfSilencePrefixed || 0
)
);
this.trigger('timingInfo', {
start: frames[0].pts,
end: frames[0].pts + segmentDuration
});
}
this.trigger('data', {track: track, boxes: boxes});
this.trigger('done', 'AudioSegmentStream');
};
this.reset = function() {
trackDecodeInfo.clearDtsInfo(track);
adtsFrames = [];
this.trigger('reset');
};
};
AudioSegmentStream.prototype = new Stream();
/**
* Constructs a single-track, ISO BMFF media segment from H264 data
* events. The output of this stream can be fed to a SourceBuffer
* configured with a suitable initialization segment.
* @param track {object} track metadata configuration
* @param options {object} transmuxer options object
* @param options.alignGopsAtEnd {boolean} If true, start from the end of the
* gopsToAlignWith list when attempting to align gop pts
* @param options.keepOriginalTimestamps {boolean} If true, keep the timestamps
* in the source; false to adjust the first segment to start at 0.
*/
VideoSegmentStream = function(track, options) {
var
sequenceNumber = 0,
nalUnits = [],
gopsToAlignWith = [],
config,
pps;
options = options || {};
VideoSegmentStream.prototype.init.call(this);
delete track.minPTS;
this.gopCache_ = [];
/**
* Constructs a ISO BMFF segment given H264 nalUnits
* @param {Object} nalUnit A data event representing a nalUnit
* @param {String} nalUnit.nalUnitType
* @param {Object} nalUnit.config Properties for a mp4 track
* @param {Uint8Array} nalUnit.data The nalUnit bytes
* @see lib/codecs/h264.js
**/
this.push = function(nalUnit) {
trackDecodeInfo.collectDtsInfo(track, nalUnit);
// record the track config
if (nalUnit.nalUnitType === 'seq_parameter_set_rbsp' && !config) {
config = nalUnit.config;
track.sps = [nalUnit.data];
VIDEO_PROPERTIES.forEach(function(prop) {
track[prop] = config[prop];
}, this);
}
if (nalUnit.nalUnitType === 'pic_parameter_set_rbsp' &&
!pps) {
pps = nalUnit.data;
track.pps = [nalUnit.data];
}
// buffer video until flush() is called
nalUnits.push(nalUnit);
};
/**
* Pass constructed ISO BMFF track and boxes on to the
* next stream in the pipeline
**/
this.flush = function() {
var
frames,
gopForFusion,
gops,
moof,
mdat,
boxes,
prependedContentDuration = 0,
firstGop,
lastGop;
// Throw away nalUnits at the start of the byte stream until
// we find the first AUD
while (nalUnits.length) {
if (nalUnits[0].nalUnitType === 'access_unit_delimiter_rbsp') {
break;
}
nalUnits.shift();
}
// Return early if no video data has been observed
if (nalUnits.length === 0) {
this.resetStream_();
this.trigger('done', 'VideoSegmentStream');
return;
}
// Organize the raw nal-units into arrays that represent
// higher-level constructs such as frames and gops
// (group-of-pictures)
frames = frameUtils.groupNalsIntoFrames(nalUnits);
gops = frameUtils.groupFramesIntoGops(frames);
// If the first frame of this fragment is not a keyframe we have
// a problem since MSE (on Chrome) requires a leading keyframe.
//
// We have two approaches to repairing this situation:
// 1) GOP-FUSION:
// This is where we keep track of the GOPS (group-of-pictures)
// from previous fragments and attempt to find one that we can
// prepend to the current fragment in order to create a valid
// fragment.
// 2) KEYFRAME-PULLING:
// Here we search for the first keyframe in the fragment and
// throw away all the frames between the start of the fragment
// and that keyframe. We then extend the duration and pull the
// PTS of the keyframe forward so that it covers the time range
// of the frames that were disposed of.
//
// #1 is far prefereable over #2 which can cause "stuttering" but
// requires more things to be just right.
if (!gops[0][0].keyFrame) {
// Search for a gop for fusion from our gopCache
gopForFusion = this.getGopForFusion_(nalUnits[0], track);
if (gopForFusion) {
// in order to provide more accurate timing information about the segment, save
// the number of seconds prepended to the original segment due to GOP fusion
prependedContentDuration = gopForFusion.duration;
gops.unshift(gopForFusion);
// Adjust Gops' metadata to account for the inclusion of the
// new gop at the beginning
gops.byteLength += gopForFusion.byteLength;
gops.nalCount += gopForFusion.nalCount;
gops.pts = gopForFusion.pts;
gops.dts = gopForFusion.dts;
gops.duration += gopForFusion.duration;
} else {
// If we didn't find a candidate gop fall back to keyframe-pulling
gops = frameUtils.extendFirstKeyFrame(gops);
}
}
// Trim gops to align with gopsToAlignWith
if (gopsToAlignWith.length) {
var alignedGops;
if (options.alignGopsAtEnd) {
alignedGops = this.alignGopsAtEnd_(gops);
} else {
alignedGops = this.alignGopsAtStart_(gops);
}
if (!alignedGops) {
// save all the nals in the last GOP into the gop cache
this.gopCache_.unshift({
gop: gops.pop(),
pps: track.pps,
sps: track.sps
});
// Keep a maximum of 6 GOPs in the cache
this.gopCache_.length = Math.min(6, this.gopCache_.length);
// Clear nalUnits
nalUnits = [];
// return early no gops can be aligned with desired gopsToAlignWith
this.resetStream_();
this.trigger('done', 'VideoSegmentStream');
return;
}
// Some gops were trimmed. clear dts info so minSegmentDts and pts are correct
// when recalculated before sending off to CoalesceStream
trackDecodeInfo.clearDtsInfo(track);
gops = alignedGops;
}
trackDecodeInfo.collectDtsInfo(track, gops);
// First, we have to build the index from byte locations to
// samples (that is, frames) in the video data
track.samples = frameUtils.generateSampleTable(gops);
// Concatenate the video data and construct the mdat
mdat = mp4.mdat(frameUtils.concatenateNalData(gops));
track.baseMediaDecodeTime = trackDecodeInfo.calculateTrackBaseMediaDecodeTime(
track, options.keepOriginalTimestamps);
this.trigger('processedGopsInfo', gops.map(function(gop) {
return {
pts: gop.pts,
dts: gop.dts,
byteLength: gop.byteLength
};
}));
firstGop = gops[0];
lastGop = gops[gops.length - 1];
this.trigger(
'segmentTimingInfo',
generateSegmentTimingInfo(
track.baseMediaDecodeTime,
firstGop.dts,
firstGop.pts,
lastGop.dts + lastGop.duration,
lastGop.pts + lastGop.duration,
prependedContentDuration));
this.trigger('timingInfo', {
start: gops[0].pts,
end: gops[gops.length - 1].pts + gops[gops.length - 1].duration
});
// save all the nals in the last GOP into the gop cache
this.gopCache_.unshift({
gop: gops.pop(),
pps: track.pps,
sps: track.sps
});
// Keep a maximum of 6 GOPs in the cache
this.gopCache_.length = Math.min(6, this.gopCache_.length);
// Clear nalUnits
nalUnits = [];
this.trigger('baseMediaDecodeTime', track.baseMediaDecodeTime);
this.trigger('timelineStartInfo', track.timelineStartInfo);
moof = mp4.moof(sequenceNumber, [track]);
// it would be great to allocate this array up front instead of
// throwing away hundreds of media segment fragments
boxes = new Uint8Array(moof.byteLength + mdat.byteLength);
// Bump the sequence number for next time
sequenceNumber++;
boxes.set(moof);
boxes.set(mdat, moof.byteLength);
this.trigger('data', {track: track, boxes: boxes});
this.resetStream_();
// Continue with the flush process now
this.trigger('done', 'VideoSegmentStream');
};
this.reset = function() {
this.resetStream_();
nalUnits = [];
this.gopCache_.length = 0;
gopsToAlignWith.length = 0;
this.trigger('reset');
};
this.resetStream_ = function() {
trackDecodeInfo.clearDtsInfo(track);
// reset config and pps because they may differ across segments
// for instance, when we are rendition switching
config = undefined;
pps = undefined;
};
// Search for a candidate Gop for gop-fusion from the gop cache and
// return it or return null if no good candidate was found
this.getGopForFusion_ = function(nalUnit) {
var
halfSecond = 45000, // Half-a-second in a 90khz clock
allowableOverlap = 10000, // About 3 frames @ 30fps
nearestDistance = Infinity,
dtsDistance,
nearestGopObj,
currentGop,
currentGopObj,
i;
// Search for the GOP nearest to the beginning of this nal unit
for (i = 0; i < this.gopCache_.length; i++) {
currentGopObj = this.gopCache_[i];
currentGop = currentGopObj.gop;
// Reject Gops with different SPS or PPS
if (!(track.pps && arrayEquals(track.pps[0], currentGopObj.pps[0])) ||
!(track.sps && arrayEquals(track.sps[0], currentGopObj.sps[0]))) {
continue;
}
// Reject Gops that would require a negative baseMediaDecodeTime
if (currentGop.dts < track.timelineStartInfo.dts) {
continue;
}
// The distance between the end of the gop and the start of the nalUnit
dtsDistance = (nalUnit.dts - currentGop.dts) - currentGop.duration;
// Only consider GOPS that start before the nal unit and end within
// a half-second of the nal unit
if (dtsDistance >= -allowableOverlap &&
dtsDistance <= halfSecond) {
// Always use the closest GOP we found if there is more than
// one candidate
if (!nearestGopObj ||
nearestDistance > dtsDistance) {
nearestGopObj = currentGopObj;
nearestDistance = dtsDistance;
}
}
}
if (nearestGopObj) {
return nearestGopObj.gop;
}
return null;
};
// trim gop list to the first gop found that has a matching pts with a gop in the list
// of gopsToAlignWith starting from the START of the list
this.alignGopsAtStart_ = function(gops) {
var alignIndex, gopIndex, align, gop, byteLength, nalCount, duration, alignedGops;
byteLength = gops.byteLength;
nalCount = gops.nalCount;
duration = gops.duration;
alignIndex = gopIndex = 0;
while (alignIndex < gopsToAlignWith.length && gopIndex < gops.length) {
align = gopsToAlignWith[alignIndex];
gop = gops[gopIndex];
if (align.pts === gop.pts) {
break;
}
if (gop.pts > align.pts) {
// this current gop starts after the current gop we want to align on, so increment
// align index
alignIndex++;
continue;
}
// current gop starts before the current gop we want to align on. so increment gop
// index
gopIndex++;
byteLength -= gop.byteLength;
nalCount -= gop.nalCount;
duration -= gop.duration;
}
if (gopIndex === 0) {
// no gops to trim
return gops;
}
if (gopIndex === gops.length) {
// all gops trimmed, skip appending all gops
return null;
}
alignedGops = gops.slice(gopIndex);
alignedGops.byteLength = byteLength;
alignedGops.duration = duration;
alignedGops.nalCount = nalCount;
alignedGops.pts = alignedGops[0].pts;
alignedGops.dts = alignedGops[0].dts;
return alignedGops;
};
// trim gop list to the first gop found that has a matching pts with a gop in the list
// of gopsToAlignWith starting from the END of the list
this.alignGopsAtEnd_ = function(gops) {
var alignIndex, gopIndex, align, gop, alignEndIndex, matchFound;
alignIndex = gopsToAlignWith.length - 1;
gopIndex = gops.length - 1;
alignEndIndex = null;
matchFound = false;
while (alignIndex >= 0 && gopIndex >= 0) {
align = gopsToAlignWith[alignIndex];
gop = gops[gopIndex];
if (align.pts === gop.pts) {
matchFound = true;
break;
}
if (align.pts > gop.pts) {
alignIndex--;
continue;
}
if (alignIndex === gopsToAlignWith.length - 1) {
// gop.pts is greater than the last alignment candidate. If no match is found
// by the end of this loop, we still want to append gops that come after this
// point
alignEndIndex = gopIndex;
}
gopIndex--;
}
if (!matchFound && alignEndIndex === null) {
return null;
}
var trimIndex;
if (matchFound) {
trimIndex = gopIndex;
} else {
trimIndex = alignEndIndex;
}
if (trimIndex === 0) {
return gops;
}
var alignedGops = gops.slice(trimIndex);
var metadata = alignedGops.reduce(function(total, gop) {
total.byteLength += gop.byteLength;
total.duration += gop.duration;
total.nalCount += gop.nalCount;
return total;
}, { byteLength: 0, duration: 0, nalCount: 0 });
alignedGops.byteLength = metadata.byteLength;
alignedGops.duration = metadata.duration;
alignedGops.nalCount = metadata.nalCount;
alignedGops.pts = alignedGops[0].pts;
alignedGops.dts = alignedGops[0].dts;
return alignedGops;
};
this.alignGopsWith = function(newGopsToAlignWith) {
gopsToAlignWith = newGopsToAlignWith;
};
};
VideoSegmentStream.prototype = new Stream();
/**
* A Stream that can combine multiple streams (ie. audio & video)
* into a single output segment for MSE. Also supports audio-only
* and video-only streams.
* @param options {object} transmuxer options object
* @param options.keepOriginalTimestamps {boolean} If true, keep the timestamps
* in the source; false to adjust the first segment to start at media timeline start.
*/
CoalesceStream = function(options, metadataStream) {
// Number of Tracks per output segment
// If greater than 1, we combine multiple
// tracks into a single segment
this.numberOfTracks = 0;
this.metadataStream = metadataStream;
options = options || {};
if (typeof options.remux !== 'undefined') {
this.remuxTracks = !!options.remux;
} else {
this.remuxTracks = true;
}
if (typeof options.keepOriginalTimestamps === 'boolean') {
this.keepOriginalTimestamps = options.keepOriginalTimestamps;
} else {
this.keepOriginalTimestamps = false;
}
this.pendingTracks = [];
this.videoTrack = null;
this.pendingBoxes = [];
this.pendingCaptions = [];
this.pendingMetadata = [];
this.pendingBytes = 0;
this.emittedTracks = 0;
CoalesceStream.prototype.init.call(this);
// Take output from multiple
this.push = function(output) {
// buffer incoming captions until the associated video segment
// finishes
if (output.text) {
return this.pendingCaptions.push(output);
}
// buffer incoming id3 tags until the final flush
if (output.frames) {
return this.pendingMetadata.push(output);
}
// Add this track to the list of pending tracks and store
// important information required for the construction of
// the final segment
this.pendingTracks.push(output.track);
this.pendingBytes += output.boxes.byteLength;
// TODO: is there an issue for this against chrome?
// We unshift audio and push video because
// as of Chrome 75 when switching from
// one init segment to another if the video
// mdat does not appear after the audio mdat
// only audio will play for the duration of our transmux.
if (output.track.type === 'video') {
this.videoTrack = output.track;
this.pendingBoxes.push(output.boxes);
}
if (output.track.type === 'audio') {
this.audioTrack = output.track;
this.pendingBoxes.unshift(output.boxes);
}
};
};
CoalesceStream.prototype = new Stream();
CoalesceStream.prototype.flush = function(flushSource) {
var
offset = 0,
event = {
captions: [],
captionStreams: {},
metadata: [],
info: {}
},
caption,
id3,
initSegment,
timelineStartPts = 0,
i;
if (this.pendingTracks.length < this.numberOfTracks) {
if (flushSource !== 'VideoSegmentStream' &&
flushSource !== 'AudioSegmentStream') {
// Return because we haven't received a flush from a data-generating
// portion of the segment (meaning that we have only recieved meta-data
// or captions.)
return;
} else if (this.remuxTracks) {
// Return until we have enough tracks from the pipeline to remux (if we
// are remuxing audio and video into a single MP4)
return;
} else if (this.pendingTracks.length === 0) {
// In the case where we receive a flush without any data having been
// received we consider it an emitted track for the purposes of coalescing
// `done` events.
// We do this for the case where there is an audio and video track in the
// segment but no audio data. (seen in several playlists with alternate
// audio tracks and no audio present in the main TS segments.)
this.emittedTracks++;
if (this.emittedTracks >= this.numberOfTracks) {
this.trigger('done');
this.emittedTracks = 0;
}
return;
}
}
if (this.videoTrack) {
timelineStartPts = this.videoTrack.timelineStartInfo.pts;
VIDEO_PROPERTIES.forEach(function(prop) {
event.info[prop] = this.videoTrack[prop];
}, this);
} else if (this.audioTrack) {
timelineStartPts = this.audioTrack.timelineStartInfo.pts;
AUDIO_PROPERTIES.forEach(function(prop) {
event.info[prop] = this.audioTrack[prop];
}, this);
}
if (this.videoTrack || this.audioTrack) {
if (this.pendingTracks.length === 1) {
event.type = this.pendingTracks[0].type;
} else {
event.type = 'combined';
}
this.emittedTracks += this.pendingTracks.length;
initSegment = mp4.initSegment(this.pendingTracks);
// Create a new typed array to hold the init segment
event.initSegment = new Uint8Array(initSegment.byteLength);
// Create an init segment containing a moov
// and track definitions
event.initSegment.set(initSegment);
// Create a new typed array to hold the moof+mdats
event.data = new Uint8Array(this.pendingBytes);
// Append each moof+mdat (one per track) together
for (i = 0; i < this.pendingBoxes.length; i++) {
event.data.set(this.pendingBoxes[i], offset);
offset += this.pendingBoxes[i].byteLength;
}
// Translate caption PTS times into second offsets to match the
// video timeline for the segment, and add track info
for (i = 0; i < this.pendingCaptions.length; i++) {
caption = this.pendingCaptions[i];
caption.startTime = clock.metadataTsToSeconds(
caption.startPts, timelineStartPts, this.keepOriginalTimestamps);
caption.endTime = clock.metadataTsToSeconds(
caption.endPts, timelineStartPts, this.keepOriginalTimestamps);
event.captionStreams[caption.stream] = true;
event.captions.push(caption);
}
// Translate ID3 frame PTS times into second offsets to match the
// video timeline for the segment
for (i = 0; i < this.pendingMetadata.length; i++) {
id3 = this.pendingMetadata[i];
id3.cueTime = clock.metadataTsToSeconds(
id3.pts, timelineStartPts, this.keepOriginalTimestamps);
event.metadata.push(id3);
}
// We add this to every single emitted segment even though we only need
// it for the first
event.metadata.dispatchType = this.metadataStream.dispatchType;
// Reset stream state
this.pendingTracks.length = 0;
this.videoTrack = null;
this.pendingBoxes.length = 0;
this.pendingCaptions.length = 0;
this.pendingBytes = 0;
this.pendingMetadata.length = 0;
// Emit the built segment
// We include captions and ID3 tags for backwards compatibility,
// ideally we should send only video and audio in the data event
this.trigger('data', event);
// Emit each caption to the outside world
// Ideally, this would happen immediately on parsing captions,
// but we need to ensure that video data is sent back first
// so that caption timing can be adjusted to match video timing
for (i = 0; i < event.captions.length; i++) {
caption = event.captions[i];
this.trigger('caption', caption);
}
// Emit each id3 tag to the outside world
// Ideally, this would happen immediately on parsing the tag,
// but we need to ensure that video data is sent back first
// so that ID3 frame timing can be adjusted to match video timing
for (i = 0; i < event.metadata.length; i++) {
id3 = event.metadata[i];
this.trigger('id3Frame', id3);
}
}
// Only emit `done` if all tracks have been flushed and emitted
if (this.emittedTracks >= this.numberOfTracks) {
this.trigger('done');
this.emittedTracks = 0;
}
};
CoalesceStream.prototype.setRemux = function(val) {
this.remuxTracks = val;
};
/**
* A Stream that expects MP2T binary data as input and produces
* corresponding media segments, suitable for use with Media Source
* Extension (MSE) implementations that support the ISO BMFF byte
* stream format, like Chrome.
*/
Transmuxer = function(options) {
var
self = this,
hasFlushed = true,
videoTrack,
audioTrack;
Transmuxer.prototype.init.call(this);
options = options || {};
this.baseMediaDecodeTime = options.baseMediaDecodeTime || 0;
this.transmuxPipeline_ = {};
this.setupAacPipeline = function() {
var pipeline = {};
this.transmuxPipeline_ = pipeline;
pipeline.type = 'aac';
pipeline.metadataStream = new m2ts.MetadataStream();
// set up the parsing pipeline
pipeline.aacStream = new AacStream();
pipeline.audioTimestampRolloverStream = new m2ts.TimestampRolloverStream('audio');
pipeline.timedMetadataTimestampRolloverStream = new m2ts.TimestampRolloverStream('timed-metadata');
pipeline.adtsStream = new AdtsStream();
pipeline.coalesceStream = new CoalesceStream(options, pipeline.metadataStream);
pipeline.headOfPipeline = pipeline.aacStream;
pipeline.aacStream
.pipe(pipeline.audioTimestampRolloverStream)
.pipe(pipeline.adtsStream);
pipeline.aacStream
.pipe(pipeline.timedMetadataTimestampRolloverStream)
.pipe(pipeline.metadataStream)
.pipe(pipeline.coalesceStream);
pipeline.metadataStream.on('timestamp', function(frame) {
pipeline.aacStream.setTimestamp(frame.timeStamp);
});
pipeline.aacStream.on('data', function(data) {
if ((data.type !== 'timed-metadata' && data.type !== 'audio') || pipeline.audioSegmentStream) {
return;
}
audioTrack = audioTrack || {
timelineStartInfo: {
baseMediaDecodeTime: self.baseMediaDecodeTime
},
codec: 'adts',
type: 'audio'
};
// hook up the audio segment stream to the first track with aac data
pipeline.coalesceStream.numberOfTracks++;
pipeline.audioSegmentStream = new AudioSegmentStream(audioTrack, options);
pipeline.audioSegmentStream.on('timingInfo',
self.trigger.bind(self, 'audioTimingInfo'));
// Set up the final part of the audio pipeline
pipeline.adtsStream
.pipe(pipeline.audioSegmentStream)
.pipe(pipeline.coalesceStream);
// emit pmt info
self.trigger('trackinfo', {
hasAudio: !!audioTrack,
hasVideo: !!videoTrack
});
});
// Re-emit any data coming from the coalesce stream to the outside world
pipeline.coalesceStream.on('data', this.trigger.bind(this, 'data'));
// Let the consumer know we have finished flushing the entire pipeline
pipeline.coalesceStream.on('done', this.trigger.bind(this, 'done'));
};
this.setupTsPipeline = function() {
var pipeline = {};
this.transmuxPipeline_ = pipeline;
pipeline.type = 'ts';
pipeline.metadataStream = new m2ts.MetadataStream();
// set up the parsing pipeline
pipeline.packetStream = new m2ts.TransportPacketStream();
pipeline.parseStream = new m2ts.TransportParseStream();
pipeline.elementaryStream = new m2ts.ElementaryStream();
pipeline.timestampRolloverStream = new m2ts.TimestampRolloverStream();
pipeline.adtsStream = new AdtsStream();
pipeline.h264Stream = new H264Stream();
pipeline.captionStream = new m2ts.CaptionStream(options);
pipeline.coalesceStream = new CoalesceStream(options, pipeline.metadataStream);
pipeline.headOfPipeline = pipeline.packetStream;
// disassemble MPEG2-TS packets into elementary streams
pipeline.packetStream
.pipe(pipeline.parseStream)
.pipe(pipeline.elementaryStream)
.pipe(pipeline.timestampRolloverStream);
// !!THIS ORDER IS IMPORTANT!!
// demux the streams
pipeline.timestampRolloverStream
.pipe(pipeline.h264Stream);
pipeline.timestampRolloverStream
.pipe(pipeline.adtsStream);
pipeline.timestampRolloverStream
.pipe(pipeline.metadataStream)
.pipe(pipeline.coalesceStream);
// Hook up CEA-608/708 caption stream
pipeline.h264Stream.pipe(pipeline.captionStream)
.pipe(pipeline.coalesceStream);
pipeline.elementaryStream.on('data', function(data) {
var i;
if (data.type === 'metadata') {
i = data.tracks.length;
// scan the tracks listed in the metadata
while (i--) {
if (!videoTrack && data.tracks[i].type === 'video') {
videoTrack = data.tracks[i];
videoTrack.timelineStartInfo.baseMediaDecodeTime = self.baseMediaDecodeTime;
} else if (!audioTrack && data.tracks[i].type === 'audio') {
audioTrack = data.tracks[i];
audioTrack.timelineStartInfo.baseMediaDecodeTime = self.baseMediaDecodeTime;
}
}
// hook up the video segment stream to the first track with h264 data
if (videoTrack && !pipeline.videoSegmentStream) {
pipeline.coalesceStream.numberOfTracks++;
pipeline.videoSegmentStream = new VideoSegmentStream(videoTrack, options);
pipeline.videoSegmentStream.on('timelineStartInfo', function(timelineStartInfo) {
// When video emits timelineStartInfo data after a flush, we forward that
// info to the AudioSegmentStream, if it exists, because video timeline
// data takes precedence. Do not do this if keepOriginalTimestamps is set,
// because this is a particularly subtle form of timestamp alteration.
if (audioTrack && !options.keepOriginalTimestamps) {
audioTrack.timelineStartInfo = timelineStartInfo;
// On the first segment we trim AAC frames that exist before the
// very earliest DTS we have seen in video because Chrome will
// interpret any video track with a baseMediaDecodeTime that is
// non-zero as a gap.
pipeline.audioSegmentStream.setEarliestDts(timelineStartInfo.dts - self.baseMediaDecodeTime);
}
});
pipeline.videoSegmentStream.on('processedGopsInfo',
self.trigger.bind(self, 'gopInfo'));
pipeline.videoSegmentStream.on('segmentTimingInfo',
self.trigger.bind(self, 'videoSegmentTimingInfo'));
pipeline.videoSegmentStream.on('baseMediaDecodeTime', function(baseMediaDecodeTime) {
if (audioTrack) {
pipeline.audioSegmentStream.setVideoBaseMediaDecodeTime(baseMediaDecodeTime);
}
});
pipeline.videoSegmentStream.on('timingInfo',
self.trigger.bind(self, 'videoTimingInfo'));
// Set up the final part of the video pipeline
pipeline.h264Stream
.pipe(pipeline.videoSegmentStream)
.pipe(pipeline.coalesceStream);
}
if (audioTrack && !pipeline.audioSegmentStream) {
// hook up the audio segment stream to the first track with aac data
pipeline.coalesceStream.numberOfTracks++;
pipeline.audioSegmentStream = new AudioSegmentStream(audioTrack, options);
pipeline.audioSegmentStream.on('timingInfo',
self.trigger.bind(self, 'audioTimingInfo'));
pipeline.audioSegmentStream.on('segmentTimingInfo',
self.trigger.bind(self, 'audioSegmentTimingInfo'));
// Set up the final part of the audio pipeline
pipeline.adtsStream
.pipe(pipeline.audioSegmentStream)
.pipe(pipeline.coalesceStream);
}
// emit pmt info
self.trigger('trackinfo', {
hasAudio: !!audioTrack,
hasVideo: !!videoTrack
});
}
});
// Re-emit any data coming from the coalesce stream to the outside world
pipeline.coalesceStream.on('data', this.trigger.bind(this, 'data'));
pipeline.coalesceStream.on('id3Frame', function(id3Frame) {
id3Frame.dispatchType = pipeline.metadataStream.dispatchType;
self.trigger('id3Frame', id3Frame);
});
pipeline.coalesceStream.on('caption', this.trigger.bind(this, 'caption'));
// Let the consumer know we have finished flushing the entire pipeline
pipeline.coalesceStream.on('done', this.trigger.bind(this, 'done'));
};
// hook up the segment streams once track metadata is delivered
this.setBaseMediaDecodeTime = function(baseMediaDecodeTime) {
var pipeline = this.transmuxPipeline_;
if (!options.keepOriginalTimestamps) {
this.baseMediaDecodeTime = baseMediaDecodeTime;
}
if (audioTrack) {
audioTrack.timelineStartInfo.dts = undefined;
audioTrack.timelineStartInfo.pts = undefined;
trackDecodeInfo.clearDtsInfo(audioTrack);
if (pipeline.audioTimestampRolloverStream) {
pipeline.audioTimestampRolloverStream.discontinuity();
}
}
if (videoTrack) {
if (pipeline.videoSegmentStream) {
pipeline.videoSegmentStream.gopCache_ = [];
}
videoTrack.timelineStartInfo.dts = undefined;
videoTrack.timelineStartInfo.pts = undefined;
trackDecodeInfo.clearDtsInfo(videoTrack);
pipeline.captionStream.reset();
}
if (pipeline.timestampRolloverStream) {
pipeline.timestampRolloverStream.discontinuity();
}
};
this.setAudioAppendStart = function(timestamp) {
if (audioTrack) {
this.transmuxPipeline_.audioSegmentStream.setAudioAppendStart(timestamp);
}
};
this.setRemux = function(val) {
var pipeline = this.transmuxPipeline_;
options.remux = val;
if (pipeline && pipeline.coalesceStream) {
pipeline.coalesceStream.setRemux(val);
}
};
this.alignGopsWith = function(gopsToAlignWith) {
if (videoTrack && this.transmuxPipeline_.videoSegmentStream) {
this.transmuxPipeline_.videoSegmentStream.alignGopsWith(gopsToAlignWith);
}
};
// feed incoming data to the front of the parsing pipeline
this.push = function(data) {
if (hasFlushed) {
var isAac = isLikelyAacData(data);
if (isAac && this.transmuxPipeline_.type !== 'aac') {
this.setupAacPipeline();
} else if (!isAac && this.transmuxPipeline_.type !== 'ts') {
this.setupTsPipeline();
}
hasFlushed = false;
}
this.transmuxPipeline_.headOfPipeline.push(data);
};
// flush any buffered data
this.flush = function() {
hasFlushed = true;
// Start at the top of the pipeline and flush all pending work
this.transmuxPipeline_.headOfPipeline.flush();
};
this.endTimeline = function() {
this.transmuxPipeline_.headOfPipeline.endTimeline();
};
this.reset = function() {
if (this.transmuxPipeline_.headOfPipeline) {
this.transmuxPipeline_.headOfPipeline.reset();
}
};
// Caption data has to be reset when seeking outside buffered range
this.resetCaptions = function() {
if (this.transmuxPipeline_.captionStream) {
this.transmuxPipeline_.captionStream.reset();
}
};
};
Transmuxer.prototype = new Stream();
module.exports = {
Transmuxer: Transmuxer,
VideoSegmentStream: VideoSegmentStream,
AudioSegmentStream: AudioSegmentStream,
AUDIO_PROPERTIES: AUDIO_PROPERTIES,
VIDEO_PROPERTIES: VIDEO_PROPERTIES,
// exported for testing
generateSegmentTimingInfo: generateSegmentTimingInfo
};