Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 128 additions & 30 deletions audb/core/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ def _add_attachment(
archive: str,
checksum: str,
):
r"""Add or update attachment.
r"""Add attachment.

Args:
file: relative path of attachment
Expand All @@ -443,18 +443,22 @@ def _add_attachment(
"""
format = audeer.file_extension(file).lower()

self._df.loc[file] = [
archive, # archive
0, # bit_depth
0, # channels
checksum, # checksum
0.0, # duration
format, # format
0, # removed
0, # sampling_rate
define.DEPENDENCY_TYPE["attachment"], # type
version, # version
values = [
(
file, # file
archive, # archive
0, # bit_depth
0, # channels
checksum, # checksum
0.0, # duration
format, # format
0, # removed
0, # sampling_rate
define.DEPENDENCY_TYPE["attachment"], # type
version, # version
)
]
self._add_rows(values)

def _add_media(
self,
Expand All @@ -481,20 +485,15 @@ def _add_media(
where each tuple holds the values of a new media entry

"""
df = pd.DataFrame.from_records(
values,
columns=["file"] + list(define.DEPENDENCY_TABLE.keys()),
).set_index("file")
df = self._set_dtypes(df)
self._df = pd.concat([self._df, df])
self._add_rows(values)

def _add_meta(
self,
file: str,
version: str,
checksum: str,
):
r"""Add or update table file.
r"""Add table file.

Args:
file: relative file path
Expand All @@ -508,18 +507,54 @@ def _add_meta(
else:
archive = os.path.splitext(file[3:])[0]

self._df.loc[file] = [
archive, # archive
0, # bit_depth
0, # channels
checksum, # checksum
0.0, # duration
format, # format
0, # removed
0, # sampling_rate
define.DEPENDENCY_TYPE["meta"], # type
version, # version
values = [
(
file, # file
archive, # archive
0, # bit_depth
0, # channels
checksum, # checksum
0.0, # duration
format, # format
0, # removed
0, # sampling_rate
define.DEPENDENCY_TYPE["meta"], # type
version, # version
)
]
self._add_rows(values)

def _add_rows(
self,
rows: Sequence[
tuple[
str, # file
str, # archive
int, # bit_depth
int, # channels
str, # checksum
float, # duration
str, # format
int, # removed
float, # sampling_rate
int, # type
str, # version
]
],
):
r"""Add new entries.

Args:
rows: list of tuples,
where each tuple holds the values of a new dependency table row

"""
df = pd.DataFrame.from_records(
rows,
columns=["file"] + list(define.DEPENDENCY_TABLE.keys()),
).set_index("file")
df = self._set_dtypes(df)
self._df = pd.concat([self._df, df])

def _column_loc(
self,
Expand Down Expand Up @@ -639,6 +674,36 @@ def _table_to_dataframe(self, table: pa.Table) -> pd.DataFrame:
df.index = df.index.astype(define.DEPENDENCY_INDEX_DTYPE)
return df

def _update_attachment(
self,
file: str,
version: str,
archive: str,
checksum: str,
):
r"""Update attachment.

Args:
file: relative path of attachment
version: version string
archive: archive name without extension
checksum: checksum of file

"""
format = audeer.file_extension(file).lower()
self._df.loc[file] = [
archive, # archive
0, # bit_depth
0, # channels
checksum, # checksum
0.0, # duration
format, # format
0, # removed
0, # sampling_rate
define.DEPENDENCY_TYPE["attachment"], # type
version, # version
]

def _update_media(
self,
values: Sequence[
Expand Down Expand Up @@ -685,6 +750,39 @@ def _update_media_version(
"""
self._df.loc[files, "version"] = version

def _update_meta(
self,
file: str,
version: str,
checksum: str,
):
r"""Update table file.

Args:
file: relative file path
checksum: checksum of file
version: version string

"""
format = audeer.file_extension(file).lower()
if format == "parquet":
archive = ""
else:
archive = os.path.splitext(file[3:])[0]

self._df.loc[file] = [
archive, # archive
0, # bit_depth
0, # channels
checksum, # checksum
0.0, # duration
format, # format
0, # removed
0, # sampling_rate
define.DEPENDENCY_TYPE["meta"], # type
version, # version
]


def error_message_missing_object(
object_type: str,
Expand Down
15 changes: 13 additions & 2 deletions audb/core/publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,14 +150,22 @@ def _find_attachments(
db.attachments[attachment_id].files
else:
checksum = utils.md5(audeer.path(db_root, path))
if path not in deps or checksum != deps.checksum(path):
if path not in deps:
deps._add_attachment(
file=path,
version=version,
archive=attachment_id,
checksum=checksum,
)
attachment_ids.append(attachment_id)
elif checksum != deps.checksum(path):
deps._update_attachment(
file=path,
version=version,
archive=attachment_id,
checksum=checksum,
)
attachment_ids.append(attachment_id)

return list(attachment_ids)

Expand Down Expand Up @@ -302,9 +310,12 @@ def _find_tables(
disable=not verbose,
):
checksum = utils.md5(os.path.join(db_root, file))
if file not in deps or checksum != deps.checksum(file):
if file not in deps:
deps._add_meta(file, version, checksum)
tables.append(table)
elif checksum != deps.checksum(file):
deps._update_meta(file, version, checksum)
tables.append(table)

return tables

Expand Down
Loading