Skip to content

Commit

Permalink
Merge pull request #313 from pengbins:fix_ts_h265reader_parse_sps
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 527259619
  • Loading branch information
icbaker committed Apr 26, 2023
2 parents 9128244 + 171c224 commit fab134f
Show file tree
Hide file tree
Showing 11 changed files with 317 additions and 202 deletions.
4 changes: 4 additions & 0 deletions RELEASENOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@
* Add `Builder` for `DeviceInfo` and deprecate existing constructor.
* Add `DeviceInfo.routingControllerId` to specify the routing controller
ID for remote playbacks.
* Extractors:
* Fix parsing of H.265 SPS in MPEG-TS files by re-using the parsing logic
already used by RTSP and MP4 extractors
([#303](https://github.com/androidx/media/issues/303)).
* Session:
* Deprecate 4 volume-controlling methods in `Player` and add overloaded
methods which allow users to specify volume flags:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ public static ImmutableList<String> mediaSamples() {
"sample_h264_mpeg_audio.ts",
"sample_h264_no_access_unit_delimiters.ts",
"sample_h265.ts",
"sample_h265_rps_pred.ts",
"sample_latm.ts",
"sample_scte35.ts",
"sample_with_id3.adts",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,14 @@
*/
package androidx.media3.extractor.ts;

import static java.lang.Math.min;

import androidx.annotation.Nullable;
import androidx.media3.common.C;
import androidx.media3.common.Format;
import androidx.media3.common.MimeTypes;
import androidx.media3.common.util.Assertions;
import androidx.media3.common.util.CodecSpecificDataUtil;
import androidx.media3.common.util.Log;
import androidx.media3.common.util.NalUnitUtil;
import androidx.media3.common.util.ParsableByteArray;
import androidx.media3.common.util.ParsableNalUnitBitArray;
import androidx.media3.common.util.UnstableApi;
import androidx.media3.common.util.Util;
import androidx.media3.extractor.ExtractorOutput;
Expand Down Expand Up @@ -246,216 +242,30 @@ private static Format parseMediaFormat(
System.arraycopy(sps.nalData, 0, csdData, vps.nalLength, sps.nalLength);
System.arraycopy(pps.nalData, 0, csdData, vps.nalLength + sps.nalLength, pps.nalLength);

// Parse the SPS NAL unit, as per H.265/HEVC (2014) 7.3.2.2.1.
ParsableNalUnitBitArray bitArray = new ParsableNalUnitBitArray(sps.nalData, 0, sps.nalLength);
bitArray.skipBits(40 + 4); // NAL header, sps_video_parameter_set_id
int maxSubLayersMinus1 = bitArray.readBits(3);
bitArray.skipBit(); // sps_temporal_id_nesting_flag
int generalProfileSpace = bitArray.readBits(2);
boolean generalTierFlag = bitArray.readBit();
int generalProfileIdc = bitArray.readBits(5);
int generalProfileCompatibilityFlags = 0;
for (int i = 0; i < 32; i++) {
if (bitArray.readBit()) {
generalProfileCompatibilityFlags |= (1 << i);
}
}
int[] constraintBytes = new int[6];
for (int i = 0; i < constraintBytes.length; ++i) {
constraintBytes[i] = bitArray.readBits(8);
}
int generalLevelIdc = bitArray.readBits(8);
int toSkip = 0;
for (int i = 0; i < maxSubLayersMinus1; i++) {
if (bitArray.readBit()) { // sub_layer_profile_present_flag[i]
toSkip += 89;
}
if (bitArray.readBit()) { // sub_layer_level_present_flag[i]
toSkip += 8;
}
}
bitArray.skipBits(toSkip);
if (maxSubLayersMinus1 > 0) {
bitArray.skipBits(2 * (8 - maxSubLayersMinus1));
}

bitArray.readUnsignedExpGolombCodedInt(); // sps_seq_parameter_set_id
int chromaFormatIdc = bitArray.readUnsignedExpGolombCodedInt();
if (chromaFormatIdc == 3) {
bitArray.skipBit(); // separate_colour_plane_flag
}
int picWidthInLumaSamples = bitArray.readUnsignedExpGolombCodedInt();
int picHeightInLumaSamples = bitArray.readUnsignedExpGolombCodedInt();
if (bitArray.readBit()) { // conformance_window_flag
int confWinLeftOffset = bitArray.readUnsignedExpGolombCodedInt();
int confWinRightOffset = bitArray.readUnsignedExpGolombCodedInt();
int confWinTopOffset = bitArray.readUnsignedExpGolombCodedInt();
int confWinBottomOffset = bitArray.readUnsignedExpGolombCodedInt();
// H.265/HEVC (2014) Table 6-1
int subWidthC = chromaFormatIdc == 1 || chromaFormatIdc == 2 ? 2 : 1;
int subHeightC = chromaFormatIdc == 1 ? 2 : 1;
picWidthInLumaSamples -= subWidthC * (confWinLeftOffset + confWinRightOffset);
picHeightInLumaSamples -= subHeightC * (confWinTopOffset + confWinBottomOffset);
}
bitArray.readUnsignedExpGolombCodedInt(); // bit_depth_luma_minus8
bitArray.readUnsignedExpGolombCodedInt(); // bit_depth_chroma_minus8
int log2MaxPicOrderCntLsbMinus4 = bitArray.readUnsignedExpGolombCodedInt();
// for (i = sps_sub_layer_ordering_info_present_flag ? 0 : sps_max_sub_layers_minus1; ...)
for (int i = bitArray.readBit() ? 0 : maxSubLayersMinus1; i <= maxSubLayersMinus1; i++) {
bitArray.readUnsignedExpGolombCodedInt(); // sps_max_dec_pic_buffering_minus1[i]
bitArray.readUnsignedExpGolombCodedInt(); // sps_max_num_reorder_pics[i]
bitArray.readUnsignedExpGolombCodedInt(); // sps_max_latency_increase_plus1[i]
}
bitArray.readUnsignedExpGolombCodedInt(); // log2_min_luma_coding_block_size_minus3
bitArray.readUnsignedExpGolombCodedInt(); // log2_diff_max_min_luma_coding_block_size
bitArray.readUnsignedExpGolombCodedInt(); // log2_min_luma_transform_block_size_minus2
bitArray.readUnsignedExpGolombCodedInt(); // log2_diff_max_min_luma_transform_block_size
bitArray.readUnsignedExpGolombCodedInt(); // max_transform_hierarchy_depth_inter
bitArray.readUnsignedExpGolombCodedInt(); // max_transform_hierarchy_depth_intra
// if (scaling_list_enabled_flag) { if (sps_scaling_list_data_present_flag) {...}}
boolean scalingListEnabled = bitArray.readBit();
if (scalingListEnabled && bitArray.readBit()) {
skipScalingList(bitArray);
}
bitArray.skipBits(2); // amp_enabled_flag (1), sample_adaptive_offset_enabled_flag (1)
if (bitArray.readBit()) { // pcm_enabled_flag
// pcm_sample_bit_depth_luma_minus1 (4), pcm_sample_bit_depth_chroma_minus1 (4)
bitArray.skipBits(8);
bitArray.readUnsignedExpGolombCodedInt(); // log2_min_pcm_luma_coding_block_size_minus3
bitArray.readUnsignedExpGolombCodedInt(); // log2_diff_max_min_pcm_luma_coding_block_size
bitArray.skipBit(); // pcm_loop_filter_disabled_flag
}
// Skips all short term reference picture sets.
skipShortTermRefPicSets(bitArray);
if (bitArray.readBit()) { // long_term_ref_pics_present_flag
// num_long_term_ref_pics_sps
for (int i = 0; i < bitArray.readUnsignedExpGolombCodedInt(); i++) {
int ltRefPicPocLsbSpsLength = log2MaxPicOrderCntLsbMinus4 + 4;
// lt_ref_pic_poc_lsb_sps[i], used_by_curr_pic_lt_sps_flag[i]
bitArray.skipBits(ltRefPicPocLsbSpsLength + 1);
}
}
bitArray.skipBits(2); // sps_temporal_mvp_enabled_flag, strong_intra_smoothing_enabled_flag
float pixelWidthHeightRatio = 1;
if (bitArray.readBit()) { // vui_parameters_present_flag
if (bitArray.readBit()) { // aspect_ratio_info_present_flag
int aspectRatioIdc = bitArray.readBits(8);
if (aspectRatioIdc == NalUnitUtil.EXTENDED_SAR) {
int sarWidth = bitArray.readBits(16);
int sarHeight = bitArray.readBits(16);
if (sarWidth != 0 && sarHeight != 0) {
pixelWidthHeightRatio = (float) sarWidth / sarHeight;
}
} else if (aspectRatioIdc < NalUnitUtil.ASPECT_RATIO_IDC_VALUES.length) {
pixelWidthHeightRatio = NalUnitUtil.ASPECT_RATIO_IDC_VALUES[aspectRatioIdc];
} else {
Log.w(TAG, "Unexpected aspect_ratio_idc value: " + aspectRatioIdc);
}
}
if (bitArray.readBit()) { // overscan_info_present_flag
bitArray.skipBit(); // overscan_appropriate_flag
}
if (bitArray.readBit()) { // video_signal_type_present_flag
bitArray.skipBits(4); // video_format, video_full_range_flag
if (bitArray.readBit()) { // colour_description_present_flag
// colour_primaries, transfer_characteristics, matrix_coeffs
bitArray.skipBits(24);
}
}
if (bitArray.readBit()) { // chroma_loc_info_present_flag
bitArray.readUnsignedExpGolombCodedInt(); // chroma_sample_loc_type_top_field
bitArray.readUnsignedExpGolombCodedInt(); // chroma_sample_loc_type_bottom_field
}
bitArray.skipBit(); // neutral_chroma_indication_flag
if (bitArray.readBit()) { // field_seq_flag
// field_seq_flag equal to 1 indicates that the coded video sequence conveys pictures that
// represent fields, which means that frame height is double the picture height.
picHeightInLumaSamples *= 2;
}
}
// Skip the 3-byte NAL unit start code synthesised by the NalUnitTargetBuffer constructor.
NalUnitUtil.H265SpsData spsData =
NalUnitUtil.parseH265SpsNalUnit(sps.nalData, /* nalOffset= */ 3, sps.nalLength);

String codecs =
CodecSpecificDataUtil.buildHevcCodecString(
generalProfileSpace,
generalTierFlag,
generalProfileIdc,
generalProfileCompatibilityFlags,
constraintBytes,
generalLevelIdc);
spsData.generalProfileSpace,
spsData.generalTierFlag,
spsData.generalProfileIdc,
spsData.generalProfileCompatibilityFlags,
spsData.constraintBytes,
spsData.generalLevelIdc);

return new Format.Builder()
.setId(formatId)
.setSampleMimeType(MimeTypes.VIDEO_H265)
.setCodecs(codecs)
.setWidth(picWidthInLumaSamples)
.setHeight(picHeightInLumaSamples)
.setPixelWidthHeightRatio(pixelWidthHeightRatio)
.setWidth(spsData.width)
.setHeight(spsData.height)
.setPixelWidthHeightRatio(spsData.pixelWidthHeightRatio)
.setInitializationData(Collections.singletonList(csdData))
.build();
}

/** Skips scaling_list_data(). See H.265/HEVC (2014) 7.3.4. */
private static void skipScalingList(ParsableNalUnitBitArray bitArray) {
for (int sizeId = 0; sizeId < 4; sizeId++) {
for (int matrixId = 0; matrixId < 6; matrixId += sizeId == 3 ? 3 : 1) {
if (!bitArray.readBit()) { // scaling_list_pred_mode_flag[sizeId][matrixId]
// scaling_list_pred_matrix_id_delta[sizeId][matrixId]
bitArray.readUnsignedExpGolombCodedInt();
} else {
int coefNum = min(64, 1 << (4 + (sizeId << 1)));
if (sizeId > 1) {
// scaling_list_dc_coef_minus8[sizeId - 2][matrixId]
bitArray.readSignedExpGolombCodedInt();
}
for (int i = 0; i < coefNum; i++) {
bitArray.readSignedExpGolombCodedInt(); // scaling_list_delta_coef
}
}
}
}
}

/**
* Reads the number of short term reference picture sets in a SPS as ue(v), then skips all of
* them. See H.265/HEVC (2014) 7.3.7.
*/
private static void skipShortTermRefPicSets(ParsableNalUnitBitArray bitArray) {
int numShortTermRefPicSets = bitArray.readUnsignedExpGolombCodedInt();
boolean interRefPicSetPredictionFlag = false;
int numNegativePics;
int numPositivePics;
// As this method applies in a SPS, the only element of NumDeltaPocs accessed is the previous
// one, so we just keep track of that rather than storing the whole array.
// RefRpsIdx = stRpsIdx - (delta_idx_minus1 + 1) and delta_idx_minus1 is always zero in SPS.
int previousNumDeltaPocs = 0;
for (int stRpsIdx = 0; stRpsIdx < numShortTermRefPicSets; stRpsIdx++) {
if (stRpsIdx != 0) {
interRefPicSetPredictionFlag = bitArray.readBit();
}
if (interRefPicSetPredictionFlag) {
bitArray.skipBit(); // delta_rps_sign
bitArray.readUnsignedExpGolombCodedInt(); // abs_delta_rps_minus1
for (int j = 0; j <= previousNumDeltaPocs; j++) {
if (bitArray.readBit()) { // used_by_curr_pic_flag[j]
bitArray.skipBit(); // use_delta_flag[j]
}
}
} else {
numNegativePics = bitArray.readUnsignedExpGolombCodedInt();
numPositivePics = bitArray.readUnsignedExpGolombCodedInt();
previousNumDeltaPocs = numNegativePics + numPositivePics;
for (int i = 0; i < numNegativePics; i++) {
bitArray.readUnsignedExpGolombCodedInt(); // delta_poc_s0_minus1[i]
bitArray.skipBit(); // used_by_curr_pic_s0_flag[i]
}
for (int i = 0; i < numPositivePics; i++) {
bitArray.readUnsignedExpGolombCodedInt(); // delta_poc_s1_minus1[i]
bitArray.skipBit(); // used_by_curr_pic_s1_flag[i]
}
}
}
}

@EnsuresNonNull({"output", "sampleReader"})
private void assertTracksCreated() {
Assertions.checkStateNotNull(output);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@ public void sampleWithH265() throws Exception {
ExtractorAsserts.assertBehavior(TsExtractor::new, "media/ts/sample_h265.ts", simulationConfig);
}

@Test
public void sampleWithH265RpsPred() throws Exception {
ExtractorAsserts.assertBehavior(
TsExtractor::new, "media/ts/sample_h265_rps_pred.ts", simulationConfig);
}

@Test
public void sampleWithScte35() throws Exception {
ExtractorAsserts.assertBehavior(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
seekMap:
isSeekable = true
duration = 1000000
getPosition(0) = [[timeUs=0, position=0]]
getPosition(1) = [[timeUs=1, position=0]]
getPosition(500000) = [[timeUs=500000, position=7134]]
getPosition(1000000) = [[timeUs=1000000, position=14457]]
numberOfTracks = 1
track 256:
total output bytes = 10004
sample count = 15
format 0:
id = 1/256
sampleMimeType = video/hevc
codecs = hvc1.1.6.L63.90
width = 914
height = 686
pixelWidthHeightRatio = 1.0003651
initializationData:
data = length 146, hash 61554FEF
sample 0:
time = 266666
flags = 1
data = length 7464, hash EBF8518B
sample 1:
time = 1200000
flags = 0
data = length 1042, hash F69C93E1
sample 2:
time = 733333
flags = 0
data = length 465, hash 2B469969
sample 3:
time = 466666
flags = 0
data = length 177, hash 79777966
sample 4:
time = 333333
flags = 0
data = length 65, hash 63DA4886
sample 5:
time = 400000
flags = 0
data = length 33, hash EFE759C6
sample 6:
time = 600000
flags = 0
data = length 88, hash 98333D02
sample 7:
time = 533333
flags = 0
data = length 49, hash F9A023E1
sample 8:
time = 666666
flags = 0
data = length 58, hash 74F1E9D9
sample 9:
time = 933333
flags = 0
data = length 114, hash FA033C4D
sample 10:
time = 800000
flags = 0
data = length 87, hash 1A1C57E4
sample 11:
time = 866666
flags = 0
data = length 65, hash 59F937BE
sample 12:
time = 1066666
flags = 0
data = length 94, hash 5D02AC81
sample 13:
time = 1000000
flags = 0
data = length 57, hash 2750D207
sample 14:
time = 1133333
flags = 0
data = length 46, hash CE770A40
tracksEnded = true
Loading

0 comments on commit fab134f

Please sign in to comment.