Skip to content

Parse metadata from fMP4 files #2453

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions RELEASENOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
* Transformer:
* Track Selection:
* Extractors:
* Parse metadata from fragmented MP4 files
([#2084](https://github.com/androidx/media/issues/2084)).
* JPEG: Support motion photos that don't have an Exif segment at the start
([#2552](https://github.com/androidx/media/issues/2552)).
* DataSource:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import androidx.media3.common.DrmInitData;
import androidx.media3.common.DrmInitData.SchemeData;
import androidx.media3.common.Format;
import androidx.media3.common.Metadata;
import androidx.media3.common.MimeTypes;
import androidx.media3.common.ParserException;
import androidx.media3.common.util.Log;
Expand Down Expand Up @@ -483,6 +484,8 @@ public void init(ExtractorOutput output) {
enterReadingAtomHeaderState();
initExtraTracks();
if (sideloadedTrack != null) {
Format.Builder formatBuilder = sideloadedTrack.format.buildUpon();
formatBuilder.setContainerMimeType(getContainerMimeType(sideloadedTrack.format));
TrackBundle bundle =
new TrackBundle(
extractorOutput.track(0, sideloadedTrack.type),
Expand All @@ -499,7 +502,7 @@ public void init(ExtractorOutput output) {
/* duration= */ 0,
/* size= */ 0,
/* flags= */ 0),
getContainerMimeType(sideloadedTrack.format));
formatBuilder.build());
trackBundles.put(0, bundle);
extractorOutput.endTracks();
}
Expand Down Expand Up @@ -642,6 +645,9 @@ private boolean readAtomHeader(ExtractorInput input) throws IOException {

if (shouldParseContainerAtom(atomType)) {
long endPosition = input.getPosition() + atomSize - Mp4Box.HEADER_SIZE;
if (atomSize != atomHeaderBytesRead && atomType == Mp4Box.TYPE_meta) {
maybeSkipRemainingMetaAtomHeaderBytes(input);
}
containerAtoms.push(new ContainerBox(atomType, endPosition));
if (atomSize == atomHeaderBytesRead) {
processAtomEnded(endPosition);
Expand Down Expand Up @@ -674,6 +680,14 @@ private boolean readAtomHeader(ExtractorInput input) throws IOException {
return true;
}

private void maybeSkipRemainingMetaAtomHeaderBytes(ExtractorInput input) throws IOException {
scratch.reset(Mp4Box.HEADER_SIZE);
input.peekFully(scratch.getData(), 0, Mp4Box.HEADER_SIZE);
BoxParser.maybeSkipRemainingMetaBoxHeaderBytes(scratch);
input.skipFully(scratch.getPosition());
input.resetPeekPosition();
}

private void readAtomPayload(ExtractorInput input) throws IOException {
int atomPayloadSize = (int) (atomSize - atomHeaderBytesRead);
@Nullable ParsableByteArray atomData = this.atomData;
Expand Down Expand Up @@ -743,11 +757,27 @@ private void onMoovContainerAtomRead(ContainerBox moov) throws ParserException {
}
}

@Nullable Metadata mdtaMetadata = null;
@Nullable Mp4Box.ContainerBox meta = moov.getContainerBoxOfType(Mp4Box.TYPE_meta);
if (meta != null) {
mdtaMetadata = BoxParser.parseMdtaFromMeta(meta);
}
GaplessInfoHolder gaplessInfoHolder = new GaplessInfoHolder();
@Nullable Metadata udtaMetadata = null;
@Nullable Mp4Box.LeafBox udta = moov.getLeafBoxOfType(Mp4Box.TYPE_udta);
if (udta != null) {
udtaMetadata = BoxParser.parseUdta(udta);
gaplessInfoHolder.setFromMetadata(udtaMetadata);
}
Metadata mvhdMetadata =
new Metadata(
BoxParser.parseMvhd(checkNotNull(moov.getLeafBoxOfType(Mp4Box.TYPE_mvhd)).data));

// Construction of tracks and sample tables.
List<TrackSampleTable> sampleTables =
parseTraks(
moov,
new GaplessInfoHolder(),
gaplessInfoHolder,
duration,
drmInitData,
/* ignoreEditLists= */ (flags & FLAG_WORKAROUND_IGNORE_EDIT_LISTS) != 0,
Expand All @@ -763,12 +793,22 @@ private void onMoovContainerAtomRead(ContainerBox moov) throws ParserException {
Track track = sampleTable.track;
TrackOutput output = extractorOutput.track(i, track.type);
output.durationUs(track.durationUs);
Format.Builder formatBuilder = track.format.buildUpon();
formatBuilder.setContainerMimeType(containerMimeType);
MetadataUtil.setFormatGaplessInfo(track.type, gaplessInfoHolder, formatBuilder);
MetadataUtil.setFormatMetadata(
track.type,
mdtaMetadata,
formatBuilder,
track.format.metadata,
udtaMetadata,
mvhdMetadata);
TrackBundle trackBundle =
new TrackBundle(
output,
sampleTable,
getDefaultSampleValues(defaultSampleValuesArray, track.id),
containerMimeType);
formatBuilder.build());
trackBundles.put(track.id, trackBundle);
durationUs = max(durationUs, track.durationUs);
}
Expand Down Expand Up @@ -1900,7 +1940,10 @@ private static boolean shouldParseLeafAtom(int atom) {
|| atom == Mp4Box.TYPE_sgpd
|| atom == Mp4Box.TYPE_elst
|| atom == Mp4Box.TYPE_mehd
|| atom == Mp4Box.TYPE_emsg;
|| atom == Mp4Box.TYPE_emsg
|| atom == Mp4Box.TYPE_udta
|| atom == Mp4Box.TYPE_keys
|| atom == Mp4Box.TYPE_ilst;
}

/** Returns whether the extractor should decode a container atom with type {@code atom}. */
Expand All @@ -1913,7 +1956,8 @@ private static boolean shouldParseContainerAtom(int atom) {
|| atom == Mp4Box.TYPE_moof
|| atom == Mp4Box.TYPE_traf
|| atom == Mp4Box.TYPE_mvex
|| atom == Mp4Box.TYPE_edts;
|| atom == Mp4Box.TYPE_edts
|| atom == Mp4Box.TYPE_meta;
}

/** Holds data corresponding to a metadata sample. */
Expand Down Expand Up @@ -1946,7 +1990,7 @@ private static final class TrackBundle {
public int currentTrackRunIndex;
public int firstSampleToOutputIndex;

private final String containerMimeType;
private final Format baseFormat;
private final ParsableByteArray encryptionSignalByte;
private final ParsableByteArray defaultInitializationVector;

Expand All @@ -1956,11 +2000,11 @@ public TrackBundle(
TrackOutput output,
TrackSampleTable moovSampleTable,
DefaultSampleValues defaultSampleValues,
String containerMimeType) {
Format baseFormat) {
this.output = output;
this.moovSampleTable = moovSampleTable;
this.defaultSampleValues = defaultSampleValues;
this.containerMimeType = containerMimeType;
this.baseFormat = baseFormat;
fragment = new TrackFragment();
scratch = new ParsableByteArray();
encryptionSignalByte = new ParsableByteArray(1);
Expand All @@ -1971,9 +2015,7 @@ public TrackBundle(
public void reset(TrackSampleTable moovSampleTable, DefaultSampleValues defaultSampleValues) {
this.moovSampleTable = moovSampleTable;
this.defaultSampleValues = defaultSampleValues;
Format format =
moovSampleTable.track.format.buildUpon().setContainerMimeType(containerMimeType).build();
output.format(format);
output.format(baseFormat);
resetFragmentInfo();
}

Expand All @@ -1984,14 +2026,7 @@ public void updateDrmInitData(DrmInitData drmInitData) {
castNonNull(fragment.header).sampleDescriptionIndex);
@Nullable String schemeType = encryptionBox != null ? encryptionBox.schemeType : null;
DrmInitData updatedDrmInitData = drmInitData.copyWithSchemeType(schemeType);
Format format =
moovSampleTable
.track
.format
.buildUpon()
.setContainerMimeType(containerMimeType)
.setDrmInitData(updatedDrmInitData)
.build();
Format format = baseFormat.buildUpon().setDrmInitData(updatedDrmInitData).build();
output.format(format);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1085,8 +1085,8 @@ private void processEndOfStreamReadingAtomHeader() {
}

private void maybeSkipRemainingMetaAtomHeaderBytes(ExtractorInput input) throws IOException {
scratch.reset(8);
input.peekFully(scratch.getData(), 0, 8);
scratch.reset(Mp4Box.HEADER_SIZE);
input.peekFully(scratch.getData(), 0, Mp4Box.HEADER_SIZE);
BoxParser.maybeSkipRemainingMetaBoxHeaderBytes(scratch);
input.skipFully(scratch.getPosition());
input.resetPeekPosition();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ track 0:
lumaBitdepth = 8
chromaBitdepth = 8
language = und
metadata = entries=[TSSE: description=null: values=[Lavf60.16.100], Mp4Timestamp: creation time=0, modification time=0, timescale=1000]
initializationData:
data = length 27, hash 9F13E633
data = length 8, hash 94643657
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ track 0:
lumaBitdepth = 8
chromaBitdepth = 8
language = und
metadata = entries=[TSSE: description=null: values=[Lavf60.16.100], Mp4Timestamp: creation time=0, modification time=0, timescale=1000]
initializationData:
data = length 27, hash 9F13E633
data = length 8, hash 94643657
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ track 0:
lumaBitdepth = 8
chromaBitdepth = 8
language = und
metadata = entries=[TSSE: description=null: values=[Lavf60.16.100], Mp4Timestamp: creation time=0, modification time=0, timescale=1000]
initializationData:
data = length 27, hash 9F13E633
data = length 8, hash 94643657
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ track 0:
lumaBitdepth = 8
chromaBitdepth = 8
language = und
metadata = entries=[TSSE: description=null: values=[Lavf60.16.100], Mp4Timestamp: creation time=0, modification time=0, timescale=1000]
initializationData:
data = length 27, hash 9F13E633
data = length 8, hash 94643657
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ track 0:
lumaBitdepth = 8
chromaBitdepth = 8
language = und
metadata = entries=[TSSE: description=null: values=[Lavf60.16.100], Mp4Timestamp: creation time=0, modification time=0, timescale=1000]
initializationData:
data = length 2426, hash 25737613
sample 0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ track 0:
lumaBitdepth = 8
chromaBitdepth = 8
language = und
metadata = entries=[TSSE: description=null: values=[Lavf60.16.100], Mp4Timestamp: creation time=0, modification time=0, timescale=1000]
initializationData:
data = length 2426, hash 25737613
sample 0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ track 0:
lumaBitdepth = 8
chromaBitdepth = 8
language = und
metadata = entries=[TSSE: description=null: values=[Lavf60.16.100], Mp4Timestamp: creation time=0, modification time=0, timescale=1000]
initializationData:
data = length 2426, hash 25737613
sample 0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ track 0:
lumaBitdepth = 8
chromaBitdepth = 8
language = und
metadata = entries=[TSSE: description=null: values=[Lavf60.16.100], Mp4Timestamp: creation time=0, modification time=0, timescale=1000]
initializationData:
data = length 2426, hash 25737613
sample 0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ track 0:
channelCount = 6
sampleRate = 48000
language = und
metadata = entries=[Mp4AlternateGroup: 2]
metadata = entries=[Mp4AlternateGroup: 2, Mp4Timestamp: creation time=3664420004, modification time=3664420004, timescale=1000]
sample 0:
time = 0
flags = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ track 0:
channelCount = 6
sampleRate = 48000
language = und
metadata = entries=[Mp4AlternateGroup: 2]
metadata = entries=[Mp4AlternateGroup: 2, Mp4Timestamp: creation time=3664420004, modification time=3664420004, timescale=1000]
sample 0:
time = 96000
flags = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ track 0:
channelCount = 6
sampleRate = 48000
language = und
metadata = entries=[Mp4AlternateGroup: 2]
metadata = entries=[Mp4AlternateGroup: 2, Mp4Timestamp: creation time=3664420004, modification time=3664420004, timescale=1000]
sample 0:
time = 192000
flags = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ track 0:
channelCount = 6
sampleRate = 48000
language = und
metadata = entries=[Mp4AlternateGroup: 2]
metadata = entries=[Mp4AlternateGroup: 2, Mp4Timestamp: creation time=3664420004, modification time=3664420004, timescale=1000]
sample 0:
time = 256000
flags = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ track 0:
channelCount = 6
sampleRate = 48000
language = und
metadata = entries=[Mp4AlternateGroup: 2]
metadata = entries=[Mp4AlternateGroup: 2, Mp4Timestamp: creation time=3664420004, modification time=3664420004, timescale=1000]
sample 0:
time = 0
flags = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ track 0:
channelCount = 6
sampleRate = 48000
language = und
metadata = entries=[Mp4AlternateGroup: 2]
metadata = entries=[Mp4AlternateGroup: 2, Mp4Timestamp: creation time=3664420004, modification time=3664420004, timescale=1000]
sample 0:
time = 96000
flags = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ track 0:
channelCount = 6
sampleRate = 48000
language = und
metadata = entries=[Mp4AlternateGroup: 2]
metadata = entries=[Mp4AlternateGroup: 2, Mp4Timestamp: creation time=3664420004, modification time=3664420004, timescale=1000]
sample 0:
time = 192000
flags = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ track 0:
channelCount = 6
sampleRate = 48000
language = und
metadata = entries=[Mp4AlternateGroup: 2]
metadata = entries=[Mp4AlternateGroup: 2, Mp4Timestamp: creation time=3664420004, modification time=3664420004, timescale=1000]
sample 0:
time = 256000
flags = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ track 0:
channelCount = 6
sampleRate = 48000
language = und
metadata = entries=[Mp4AlternateGroup: 2]
metadata = entries=[Mp4AlternateGroup: 2, Mp4Timestamp: creation time=3664420004, modification time=3664420004, timescale=1000]
sample 0:
time = 0
flags = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ track 0:
channelCount = 6
sampleRate = 48000
language = und
metadata = entries=[Mp4AlternateGroup: 2]
metadata = entries=[Mp4AlternateGroup: 2, Mp4Timestamp: creation time=3664420004, modification time=3664420004, timescale=1000]
sample 0:
time = 0
flags = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ track 0:
channelCount = 2
sampleRate = 48000
language = und
metadata = entries=[Mp4AlternateGroup: 2]
metadata = entries=[Mp4AlternateGroup: 2, Mp4Timestamp: creation time=3661133014, modification time=3661133014, timescale=1000]
sample 0:
time = 0
flags = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ track 0:
channelCount = 2
sampleRate = 48000
language = und
metadata = entries=[Mp4AlternateGroup: 2]
metadata = entries=[Mp4AlternateGroup: 2, Mp4Timestamp: creation time=3661133014, modification time=3661133014, timescale=1000]
sample 0:
time = 240000
flags = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ track 0:
channelCount = 2
sampleRate = 48000
language = und
metadata = entries=[Mp4AlternateGroup: 2]
metadata = entries=[Mp4AlternateGroup: 2, Mp4Timestamp: creation time=3661133014, modification time=3661133014, timescale=1000]
sample 0:
time = 480000
flags = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ track 0:
channelCount = 2
sampleRate = 48000
language = und
metadata = entries=[Mp4AlternateGroup: 2]
metadata = entries=[Mp4AlternateGroup: 2, Mp4Timestamp: creation time=3661133014, modification time=3661133014, timescale=1000]
sample 0:
time = 720000
flags = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ track 0:
channelCount = 2
sampleRate = 48000
language = und
metadata = entries=[Mp4AlternateGroup: 2]
metadata = entries=[Mp4AlternateGroup: 2, Mp4Timestamp: creation time=3661133014, modification time=3661133014, timescale=1000]
sample 0:
time = 0
flags = 1
Expand Down
Loading