Skip to content

Commit 2b2f312

Browse files
committed
Merge pull request tensorflow#2246 from zheng-xq/branch_121636618
Branch 121636618
2 parents f8eb1d7 + 0eb6a46 commit 2b2f312

File tree

69 files changed

+2567
-496
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+2567
-496
lines changed

tensorflow/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ filegroup(
7373
":all_files",
7474
"//tensorflow/cc:all_files",
7575
"//tensorflow/contrib:all_files",
76+
"//tensorflow/contrib/copy_graph:all_files",
7677
"//tensorflow/contrib/ctc:all_files",
7778
"//tensorflow/contrib/distributions:all_files",
7879
"//tensorflow/contrib/ffmpeg:all_files",

tensorflow/contrib/BUILD

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ py_library(
1414
visibility = ["//visibility:public"],
1515
deps = [
1616
"//tensorflow/contrib/bayesflow:bayesflow_py",
17+
"//tensorflow/contrib/copy_graph:copy_graph_py",
1718
"//tensorflow/contrib/ctc:ctc_py",
1819
"//tensorflow/contrib/distributions:distributions_py",
1920
"//tensorflow/contrib/ffmpeg:ffmpeg_ops_py",
@@ -31,7 +32,6 @@ py_library(
3132
"//tensorflow/contrib/tensor_forest:tensor_forest_py",
3233
"//tensorflow/contrib/testing:testing_py",
3334
"//tensorflow/contrib/util:util_py",
34-
"//tensorflow/contrib/copy_graph:copy_graph_py",
3535
],
3636
)
3737

tensorflow/contrib/copy_graph/python/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,9 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414
# ==============================================================================
15+
"""Functions for copying elements from one graph to another.
16+
"""
1517

18+
from __future__ import absolute_import
19+
from __future__ import division
20+
from __future__ import print_function

tensorflow/contrib/copy_graph/python/util/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,9 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414
# ==============================================================================
15+
"""Functions for copying elements from one graph to another.
16+
"""
1517

18+
from __future__ import absolute_import
19+
from __future__ import division
20+
from __future__ import print_function

tensorflow/contrib/ffmpeg/BUILD

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,25 @@ cc_library(
3131
alwayslink = 1,
3232
)
3333

34+
cc_library(
35+
name = "encode_audio_op_cc",
36+
srcs = ["encode_audio_op.cc"],
37+
copts = tf_copts(),
38+
linkstatic = 1,
39+
visibility = ["//visibility:private"],
40+
deps = [
41+
"//third_party/eigen3",
42+
"//tensorflow/contrib/ffmpeg/kernels:ffmpeg_lib",
43+
"//tensorflow/core:framework_headers_lib",
44+
],
45+
alwayslink = 1,
46+
)
47+
3448
tf_custom_op_library(
35-
name = "decode_audio_op.so",
49+
name = "ffmpeg.so",
3650
deps = [
3751
":decode_audio_op_cc",
52+
":encode_audio_op_cc",
3853
],
3954
)
4055

@@ -47,6 +62,15 @@ tf_gen_op_wrapper_py(
4762
],
4863
)
4964

65+
tf_gen_op_wrapper_py(
66+
name = "encode_audio_op_py",
67+
require_shape_functions = True,
68+
visibility = ["//visibility:private"],
69+
deps = [
70+
":encode_audio_op_cc",
71+
],
72+
)
73+
5074
tf_py_test(
5175
name = "decode_audio_op_test",
5276
srcs = ["decode_audio_op_test.py"],
@@ -56,7 +80,25 @@ tf_py_test(
5680
"//tensorflow/python:platform",
5781
],
5882
data = [
59-
":decode_audio_op.so",
83+
":ffmpeg.so",
84+
":test_data",
85+
],
86+
tags = [
87+
"local",
88+
"manual",
89+
],
90+
)
91+
92+
tf_py_test(
93+
name = "encode_audio_op_test",
94+
srcs = ["encode_audio_op_test.py"],
95+
additional_deps = [
96+
":ffmpeg_ops_py",
97+
"//third_party/py/tensorflow",
98+
"//tensorflow/python:platform",
99+
],
100+
data = [
101+
":ffmpeg.so",
60102
":test_data",
61103
],
62104
tags = [
@@ -72,11 +114,12 @@ py_library(
72114
"ffmpeg_ops.py",
73115
],
74116
data = [
75-
":decode_audio_op.so",
117+
":ffmpeg.so",
76118
],
77119
srcs_version = "PY2AND3",
78120
deps = [
79121
":decode_audio_op_py",
122+
":encode_audio_op_py",
80123
],
81124
)
82125

tensorflow/contrib/ffmpeg/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,4 @@
1919
from __future__ import print_function
2020

2121
from tensorflow.contrib.ffmpeg.ffmpeg_ops import decode_audio
22+
from tensorflow.contrib.ffmpeg.ffmpeg_ops import encode_audio
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
// Copyright 2016 Google Inc. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
// =============================================================================
15+
16+
#include <limits>
17+
18+
#include "tensorflow/contrib/ffmpeg/kernels/ffmpeg_lib.h"
19+
#include "tensorflow/core/framework/op.h"
20+
#include "tensorflow/core/framework/op_kernel.h"
21+
22+
namespace tensorflow {
23+
namespace ffmpeg {
24+
25+
class EncodeAudioOp : public OpKernel {
26+
public:
27+
explicit EncodeAudioOp(OpKernelConstruction* context)
28+
: OpKernel(context) {
29+
OP_REQUIRES_OK(context, context->GetAttr("file_format", &file_format_));
30+
file_format_ = str_util::Lowercase(file_format_);
31+
OP_REQUIRES(context, file_format_ == "wav",
32+
errors::InvalidArgument("file_format arg must be \"wav\"."));
33+
34+
OP_REQUIRES_OK(
35+
context, context->GetAttr("samples_per_second", &samples_per_second_));
36+
OP_REQUIRES(context, samples_per_second_ > 0,
37+
errors::InvalidArgument("samples_per_second must be > 0."));
38+
}
39+
40+
void Compute(OpKernelContext* context) override {
41+
// Get and verify the input data.
42+
OP_REQUIRES(context, context->num_inputs() == 1,
43+
errors::InvalidArgument(
44+
"EncodeAudio requires exactly one input."));
45+
const Tensor& contents = context->input(0);
46+
OP_REQUIRES(context, TensorShapeUtils::IsMatrix(contents.shape()),
47+
errors::InvalidArgument(
48+
"sampled_audio must be a rank 2 tensor but got shape ",
49+
contents.shape().DebugString()));
50+
OP_REQUIRES(
51+
context, contents.NumElements() <= std::numeric_limits<int32>::max(),
52+
errors::InvalidArgument(
53+
"sampled_audio cannot have more than 2^31 entries. Shape = ",
54+
contents.shape().DebugString()));
55+
56+
// Create the encoded audio file.
57+
std::vector<float> samples;
58+
samples.reserve(contents.NumElements());
59+
for (int32 i = 0; i < contents.NumElements(); ++i) {
60+
samples.push_back(contents.flat<float>()(i));
61+
}
62+
const int32 channel_count = contents.dim_size(1);
63+
string encoded_audio;
64+
OP_REQUIRES_OK(context,
65+
CreateAudioFile(file_format_, samples_per_second_,
66+
channel_count, samples, &encoded_audio));
67+
68+
// Copy the encoded audio file to the output tensor.
69+
Tensor* output = nullptr;
70+
OP_REQUIRES_OK(context,
71+
context->allocate_output(0, TensorShape(), &output));
72+
output->scalar<string>()() = encoded_audio;
73+
}
74+
75+
private:
76+
string file_format_;
77+
int32 samples_per_second_;
78+
};
79+
80+
REGISTER_KERNEL_BUILDER(Name("EncodeAudio").Device(DEVICE_CPU), EncodeAudioOp);
81+
82+
REGISTER_OP("EncodeAudio")
83+
.Input("sampled_audio: float")
84+
.Output("contents: string")
85+
.Attr("file_format: string")
86+
.Attr("samples_per_second: int")
87+
.Doc(R"doc(
88+
Processes a `Tensor` containing sampled audio with the number of channels
89+
and length of the audio specified by the dimensions of the `Tensor`. The
90+
audio is converted into a string that, when saved to disk, will be equivalent
91+
to the audio in the specified audio format.
92+
93+
The input audio has one row of the tensor for each channel in the audio file.
94+
Each channel contains audio samples starting at the beginning of the audio and
95+
having `1/samples_per_second` time between them. The output file will contain
96+
all of the audio channels contained in the tensor.
97+
98+
sampled_audio: A rank 2 tensor containing all tracks of the audio. Dimension 0
99+
is time and dimension 1 is the channel.
100+
contents: The binary audio file contents.
101+
file_format: A string describing the audio file format. This must be "wav".
102+
samples_per_second: The number of samples per second that the audio should have.
103+
)doc");
104+
105+
} // namespace ffmpeg
106+
} // namespace tensorflow
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Copyright 2016 Google Inc. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# =============================================================================
15+
16+
"""Tests for third_party.tensorflow.contrib.ffmpeg.encode_audio_op."""
17+
18+
from __future__ import absolute_import
19+
from __future__ import division
20+
from __future__ import print_function
21+
22+
import os.path
23+
24+
import tensorflow as tf
25+
26+
from tensorflow.contrib import ffmpeg
27+
from tensorflow.python.platform import resource_loader
28+
29+
30+
class EncodeAudioOpTest(tf.test.TestCase):
31+
32+
def testRoundTrip(self):
33+
"""Fabricates some audio, creates a wav file, reverses it, and compares."""
34+
with self.test_session():
35+
path = os.path.join(
36+
resource_loader.get_data_files_path(), 'testdata/mono_10khz.wav')
37+
with open(path, 'r') as f:
38+
original_contents = f.read()
39+
40+
audio_op = ffmpeg.decode_audio(
41+
original_contents, file_format='wav', samples_per_second=10000,
42+
channel_count=1)
43+
encode_op = ffmpeg.encode_audio(
44+
audio_op, file_format='wav', samples_per_second=10000)
45+
encoded_contents = encode_op.eval()
46+
self.assertEqual(original_contents, encoded_contents)
47+
48+
49+
if __name__ == '__main__':
50+
tf.test.main()

tensorflow/contrib/ffmpeg/ffmpeg_ops.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from __future__ import print_function
2121

2222
from tensorflow.contrib.ffmpeg.ops import gen_decode_audio_op_py
23+
from tensorflow.contrib.ffmpeg.ops import gen_encode_audio_op_py
2324
from tensorflow.python.framework import load_library
2425
from tensorflow.python.framework import ops
2526
from tensorflow.python.framework import tensor_shape
@@ -75,6 +76,39 @@ def decode_audio(contents, file_format=None, samples_per_second=None,
7576
ops.NoGradient('DecodeAudio')
7677

7778

79+
@ops.RegisterShape('EncodeAudio')
80+
def _encode_audio_shape(unused_op):
81+
"""Computes the shape of an EncodeAudio operation.
82+
83+
Returns:
84+
A list of output shapes. There's exactly one output, the formatted audio
85+
file. This is a rank 0 tensor.
86+
"""
87+
return [tensor_shape.TensorShape([])]
88+
89+
90+
def encode_audio(audio, file_format=None, samples_per_second=None):
91+
"""Creates an op that encodes an audio file using sampled audio from a tensor.
92+
93+
Args:
94+
audio: A rank 2 tensor that has time along dimension 0 and channels along
95+
dimension 1. Dimension 0 is `samples_per_second * length` long in
96+
seconds.
97+
file_format: The type of file to encode. "wav" is the only supported format.
98+
samples_per_second: The number of samples in the audio tensor per second of
99+
audio.
100+
101+
Returns:
102+
A scalar tensor that contains the encoded audio in the specified file
103+
format.
104+
"""
105+
return gen_encode_audio_op_py.encode_audio(
106+
audio, file_format=file_format, samples_per_second=samples_per_second)
107+
108+
109+
ops.NoGradient('EncodeAudio')
110+
111+
78112
def _load_library(name, op_list=None):
79113
"""Loads a .so file containing the specified operators.
80114
@@ -97,4 +131,4 @@ def _load_library(name, op_list=None):
97131
(expected_op, name))
98132

99133

100-
_load_library('decode_audio_op.so', ['DecodeAudio'])
134+
_load_library('ffmpeg.so', ['DecodeAudio', 'EncodeAudio'])

tensorflow/contrib/ffmpeg/kernels/ffmpeg_lib.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,10 @@ string GetTempFilename(const string& extension);
2929
// Reads an audio file using ffmpeg and converts it into an array of samples in
3030
// [-1.0, 1.0]. If there are multiple channels in the audio then each frame will
3131
// contain a separate sample for each channel. Frames are ordered by time.
32-
Status ReadAudioFile(const string& filename, const string& audio_format_id,
33-
int32 samples_per_second, int32 channel_count,
32+
Status ReadAudioFile(const string& filename,
33+
const string& audio_format_id,
34+
int32 samples_per_second,
35+
int32 channel_count,
3436
std::vector<float>* output_samples);
3537

3638
// Creates an audio file using ffmpeg in a specific format. The samples are in

0 commit comments

Comments
 (0)