Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extract audio type from TS descriptors #973

Merged
merged 5 commits into from
Jan 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions RELEASENOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
than if we ignore it and assume the file is CBR.
* MPEG2-TS: Add DTS, DTS-LBR and DTS:X Profile2 support
([#275](https://github.com/androidx/media/pull/275)).
* Extract audio types from TS descriptors and map them to role flags,
allowing users to make better-informed audio track selections
([#973](https://github.com/androidx/media/pull/973)).
* Audio:
* Video:
* Text:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,13 +258,15 @@ private DtsHeader(
* @param frame The DTS Core frame to parse.
* @param trackId The track identifier to set on the format.
* @param language The language to set on the format.
* @param roleFlags The role flags to set on the format.
* @param drmInitData {@link DrmInitData} to be included in the format.
* @return The DTS format parsed from data in the header.
*/
public static Format parseDtsFormat(
byte[] frame,
@Nullable String trackId,
@Nullable String language,
@C.RoleFlags int roleFlags,
@Nullable DrmInitData drmInitData) {
ParsableBitArray frameBits = getNormalizedFrame(frame);
frameBits.skipBits(32 + 1 + 5 + 1 + 7 + 14); // SYNC, FTYPE, SHORT, CPF, NBLKS, FSIZE
Expand All @@ -287,6 +289,7 @@ public static Format parseDtsFormat(
.setSampleRate(sampleRate)
.setDrmInitData(drmInitData)
.setLanguage(language)
.setRoleFlags(roleFlags)
.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ public final class Ac3Reader implements ElementaryStreamReader {
private final ParsableBitArray headerScratchBits;
private final ParsableByteArray headerScratchBytes;
@Nullable private final String language;
private final @C.RoleFlags int roleFlags;

private @MonotonicNonNull String formatId;
private @MonotonicNonNull TrackOutput output;
Expand All @@ -80,20 +81,22 @@ public final class Ac3Reader implements ElementaryStreamReader {

/** Constructs a new reader for (E-)AC-3 elementary streams. */
public Ac3Reader() {
this(null);
this(null, /* roleFlags= */ 0);
}

/**
* Constructs a new reader for (E-)AC-3 elementary streams.
*
* @param language Track language.
* @param roleFlags Track role flags.
*/
public Ac3Reader(@Nullable String language) {
public Ac3Reader(@Nullable String language, @C.RoleFlags int roleFlags) {
headerScratchBits = new ParsableBitArray(new byte[HEADER_SIZE]);
headerScratchBytes = new ParsableByteArray(headerScratchBits.data);
state = STATE_FINDING_SYNC;
timeUs = C.TIME_UNSET;
this.language = language;
this.roleFlags = roleFlags;
}

@Override
Expand Down Expand Up @@ -216,6 +219,7 @@ private void parseHeader() {
.setChannelCount(frameInfo.channelCount)
.setSampleRate(frameInfo.sampleRate)
.setLanguage(language)
.setRoleFlags(roleFlags)
.setPeakBitrate(frameInfo.bitrate);
// AC3 has constant bitrate, so averageBitrate = peakBitrate
if (MimeTypes.AUDIO_AC3.equals(frameInfo.mimeType)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ public final class Ac4Reader implements ElementaryStreamReader {
private final ParsableBitArray headerScratchBits;
private final ParsableByteArray headerScratchBytes;
@Nullable private final String language;
private final @C.RoleFlags int roleFlags;

private @MonotonicNonNull String formatId;
private @MonotonicNonNull TrackOutput output;
Expand All @@ -78,15 +79,16 @@ public final class Ac4Reader implements ElementaryStreamReader {

/** Constructs a new reader for AC-4 elementary streams. */
public Ac4Reader() {
this(null);
this(null, /* roleFlags= */ 0);
}

/**
* Constructs a new reader for AC-4 elementary streams.
*
* @param language Track language.
* @param roleFlags Track role flags.
*/
public Ac4Reader(@Nullable String language) {
public Ac4Reader(@Nullable String language, @C.RoleFlags int roleFlags) {
headerScratchBits = new ParsableBitArray(new byte[Ac4Util.HEADER_SIZE_FOR_PARSER]);
headerScratchBytes = new ParsableByteArray(headerScratchBits.data);
state = STATE_FINDING_SYNC;
Expand All @@ -95,6 +97,7 @@ public Ac4Reader(@Nullable String language) {
hasCRC = false;
timeUs = C.TIME_UNSET;
this.language = language;
this.roleFlags = roleFlags;
}

@Override
Expand Down Expand Up @@ -217,6 +220,7 @@ private void parseHeader() {
.setChannelCount(frameInfo.channelCount)
.setSampleRate(frameInfo.sampleRate)
.setLanguage(language)
.setRoleFlags(roleFlags)
.build();
output.format(format);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ public final class AdtsReader implements ElementaryStreamReader {
private final ParsableBitArray adtsScratch;
private final ParsableByteArray id3HeaderBuffer;
@Nullable private final String language;
private final @C.RoleFlags int roleFlags;

private @MonotonicNonNull String formatId;
private @MonotonicNonNull TrackOutput output;
Expand Down Expand Up @@ -105,14 +106,15 @@ public final class AdtsReader implements ElementaryStreamReader {
* @param exposeId3 True if the reader should expose ID3 information.
*/
public AdtsReader(boolean exposeId3) {
this(exposeId3, null);
this(exposeId3, null, /* roleFlags= */ 0);
}

/**
* @param exposeId3 True if the reader should expose ID3 information.
* @param language Track language.
* @param roleFlags Track role flags.
*/
public AdtsReader(boolean exposeId3, @Nullable String language) {
public AdtsReader(boolean exposeId3, @Nullable String language, @C.RoleFlags int roleFlags) {
adtsScratch = new ParsableBitArray(new byte[HEADER_SIZE + CRC_SIZE]);
id3HeaderBuffer = new ParsableByteArray(Arrays.copyOf(ID3_IDENTIFIER, ID3_HEADER_SIZE));
setFindingSampleState();
Expand All @@ -122,6 +124,7 @@ public AdtsReader(boolean exposeId3, @Nullable String language) {
timeUs = C.TIME_UNSET;
this.exposeId3 = exposeId3;
this.language = language;
this.roleFlags = roleFlags;
}

/** Returns whether an integer matches an ADTS SYNC word. */
Expand Down Expand Up @@ -510,6 +513,7 @@ private void parseAdtsHeader() throws ParserException {
.setSampleRate(aacConfig.sampleRateHz)
.setInitializationData(Collections.singletonList(audioSpecificConfig))
.setLanguage(language)
.setRoleFlags(roleFlags)
.build();
// In this class a sample is an access unit, but the MediaFormat sample rate specifies the
// number of PCM audio samples per second.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,30 +151,32 @@ public TsPayloadReader createPayloadReader(int streamType, EsInfo esInfo) {
switch (streamType) {
case TsExtractor.TS_STREAM_TYPE_MPA:
case TsExtractor.TS_STREAM_TYPE_MPA_LSF:
return new PesReader(new MpegAudioReader(esInfo.language));
return new PesReader(new MpegAudioReader(esInfo.language, esInfo.getRoleFlags()));
case TsExtractor.TS_STREAM_TYPE_AAC_ADTS:
return isSet(FLAG_IGNORE_AAC_STREAM)
? null
: new PesReader(new AdtsReader(false, esInfo.language));
: new PesReader(new AdtsReader(false, esInfo.language, esInfo.getRoleFlags()));
case TsExtractor.TS_STREAM_TYPE_AAC_LATM:
return isSet(FLAG_IGNORE_AAC_STREAM)
? null
: new PesReader(new LatmReader(esInfo.language));
: new PesReader(new LatmReader(esInfo.language, esInfo.getRoleFlags()));
case TsExtractor.TS_STREAM_TYPE_AC3:
case TsExtractor.TS_STREAM_TYPE_E_AC3:
return new PesReader(new Ac3Reader(esInfo.language));
return new PesReader(new Ac3Reader(esInfo.language, esInfo.getRoleFlags()));
case TsExtractor.TS_STREAM_TYPE_AC4:
return new PesReader(new Ac4Reader(esInfo.language));
return new PesReader(new Ac4Reader(esInfo.language, esInfo.getRoleFlags()));
case TsExtractor.TS_STREAM_TYPE_HDMV_DTS:
if (!isSet(FLAG_ENABLE_HDMV_DTS_AUDIO_STREAMS)) {
return null;
}
// Fall through.
case TsExtractor.TS_STREAM_TYPE_DTS:
case TsExtractor.TS_STREAM_TYPE_DTS_HD:
return new PesReader(new DtsReader(esInfo.language, DtsReader.EXTSS_HEADER_SIZE_MAX));
return new PesReader(
new DtsReader(esInfo.language, esInfo.getRoleFlags(), DtsReader.EXTSS_HEADER_SIZE_MAX));
case TsExtractor.TS_STREAM_TYPE_DTS_UHD:
return new PesReader(new DtsReader(esInfo.language, DtsReader.FTOC_MAX_HEADER_SIZE));
return new PesReader(
new DtsReader(esInfo.language, esInfo.getRoleFlags(), DtsReader.FTOC_MAX_HEADER_SIZE));
case TsExtractor.TS_STREAM_TYPE_H262:
case TsExtractor.TS_STREAM_TYPE_DC2_H262:
return new PesReader(new H262Reader(buildUserDataReader(esInfo)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ public final class DtsReader implements ElementaryStreamReader {
private final AtomicInteger uhdAudioChunkId;

@Nullable private final String language;
private final @C.RoleFlags int roleFlags;

private @MonotonicNonNull String formatId;
private @MonotonicNonNull TrackOutput output;
Expand All @@ -93,16 +94,18 @@ public final class DtsReader implements ElementaryStreamReader {
* Constructs a new reader for DTS elementary streams.
*
* @param language Track language.
* @param roleFlags Track role flags.
* @param maxHeaderSize Maximum size of the header in a frame.
*/
public DtsReader(@Nullable String language, int maxHeaderSize) {
public DtsReader(@Nullable String language, @C.RoleFlags int roleFlags, int maxHeaderSize) {
headerScratchBytes = new ParsableByteArray(new byte[maxHeaderSize]);
state = STATE_FINDING_SYNC;
timeUs = C.TIME_UNSET;
uhdAudioChunkId = new AtomicInteger();
extensionSubstreamHeaderSize = C.LENGTH_UNSET;
uhdHeaderSize = C.LENGTH_UNSET;
this.language = language;
this.roleFlags = roleFlags;
}

@Override
Expand Down Expand Up @@ -263,7 +266,7 @@ private boolean skipToNextSyncWord(ParsableByteArray pesBuffer) {
private void parseCoreHeader() {
byte[] frameData = headerScratchBytes.getData();
if (format == null) {
format = DtsUtil.parseDtsFormat(frameData, formatId, language, null);
format = DtsUtil.parseDtsFormat(frameData, formatId, language, roleFlags, null);
output.format(format);
}
sampleSize = DtsUtil.getDtsFrameSize(frameData);
Expand Down Expand Up @@ -314,6 +317,7 @@ private void updateFormatWithDtsHeaderInfo(DtsUtil.DtsHeader dtsHeader) {
.setChannelCount(dtsHeader.channelCount)
.setSampleRate(dtsHeader.sampleRate)
.setLanguage(language)
.setRoleFlags(roleFlags)
.build();
output.format(format);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ public final class LatmReader implements ElementaryStreamReader {
private static final int SYNC_BYTE_SECOND = 0xE0;

@Nullable private final String language;
private final @C.RoleFlags int roleFlags;
private final ParsableByteArray sampleDataBuffer;
private final ParsableBitArray sampleBitArray;

Expand Down Expand Up @@ -78,9 +79,11 @@ public final class LatmReader implements ElementaryStreamReader {

/**
* @param language Track language.
* @param roleFlags Track role flags.
*/
public LatmReader(@Nullable String language) {
public LatmReader(@Nullable String language, @C.RoleFlags int roleFlags) {
this.language = language;
this.roleFlags = roleFlags;
sampleDataBuffer = new ParsableByteArray(INITIAL_BUFFER_SIZE);
sampleBitArray = new ParsableBitArray(sampleDataBuffer.getData());
timeUs = C.TIME_UNSET;
Expand Down Expand Up @@ -217,6 +220,7 @@ private void parseStreamMuxConfig(ParsableBitArray data) throws ParserException
.setSampleRate(sampleRateHz)
.setInitializationData(Collections.singletonList(initData))
.setLanguage(language)
.setRoleFlags(roleFlags)
.build();
if (!format.equals(this.format)) {
this.format = format;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ public final class MpegAudioReader implements ElementaryStreamReader {
private final ParsableByteArray headerScratch;
private final MpegAudioUtil.Header header;
@Nullable private final String language;
private final @C.RoleFlags int roleFlags;

private @MonotonicNonNull TrackOutput output;
private @MonotonicNonNull String formatId;
Expand All @@ -63,17 +64,18 @@ public final class MpegAudioReader implements ElementaryStreamReader {
private long timeUs;

public MpegAudioReader() {
this(null);
this(null, /* roleFlags= */ 0);
}

public MpegAudioReader(@Nullable String language) {
public MpegAudioReader(@Nullable String language, @C.RoleFlags int roleFlags) {
state = STATE_FINDING_HEADER;
// The first byte of an MPEG Audio frame header is always 0xFF.
headerScratch = new ParsableByteArray(4);
headerScratch.getData()[0] = (byte) 0xFF;
header = new MpegAudioUtil.Header();
timeUs = C.TIME_UNSET;
this.language = language;
this.roleFlags = roleFlags;
}

@Override
Expand Down Expand Up @@ -200,6 +202,7 @@ private void readHeaderRemainder(ParsableByteArray source) {
.setChannelCount(header.channels)
.setSampleRate(header.sampleRate)
.setLanguage(language)
.setRoleFlags(roleFlags)
.build();
output.format(format);
hasOutputFormat = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
package androidx.media3.extractor.ts;

import static androidx.media3.extractor.ts.TsPayloadReader.EsInfo.AUDIO_TYPE_UNDEFINED;
import static androidx.media3.extractor.ts.TsPayloadReader.FLAG_PAYLOAD_UNIT_START_INDICATOR;
import static java.lang.annotation.ElementType.TYPE_USE;

Expand Down Expand Up @@ -752,7 +753,8 @@ public void consume(ParsableByteArray sectionData) {
if (mode == MODE_HLS && id3Reader == null) {
// Setup an ID3 track regardless of whether there's a corresponding entry, in case one
// appears intermittently during playback. See [Internal: b/20261500].
EsInfo id3EsInfo = new EsInfo(TS_STREAM_TYPE_ID3, null, null, Util.EMPTY_BYTE_ARRAY);
EsInfo id3EsInfo =
new EsInfo(TS_STREAM_TYPE_ID3, null, AUDIO_TYPE_UNDEFINED, null, Util.EMPTY_BYTE_ARRAY);
id3Reader = payloadReaderFactory.createPayloadReader(TS_STREAM_TYPE_ID3, id3EsInfo);
if (id3Reader != null) {
id3Reader.init(
Expand Down Expand Up @@ -842,6 +844,7 @@ private EsInfo readEsInfo(ParsableByteArray data, int length) {
int descriptorsEndPosition = descriptorsStartPosition + length;
int streamType = -1;
String language = null;
@EsInfo.AudioType int audioType = AUDIO_TYPE_UNDEFINED;
List<DvbSubtitleInfo> dvbSubtitleInfos = null;
while (data.getPosition() < descriptorsEndPosition) {
int descriptorTag = data.readUnsignedByte();
Expand Down Expand Up @@ -883,7 +886,7 @@ private EsInfo readEsInfo(ParsableByteArray data, int length) {
streamType = TS_STREAM_TYPE_DTS;
} else if (descriptorTag == TS_PMT_DESC_ISO639_LANG) {
language = data.readString(3).trim();
// Audio type is ignored.
audioType = data.readUnsignedByte();
} else if (descriptorTag == TS_PMT_DESC_DVBSUBS) {
streamType = TS_STREAM_TYPE_DVBSUBS;
dvbSubtitleInfos = new ArrayList<>();
Expand All @@ -905,6 +908,7 @@ private EsInfo readEsInfo(ParsableByteArray data, int length) {
return new EsInfo(
streamType,
language,
audioType,
dvbSubtitleInfos,
Arrays.copyOfRange(data.getData(), descriptorsStartPosition, descriptorsEndPosition));
}
Expand Down
Loading