The tensorflow serving support RESTful api:{"error":"json: cannot unmarshal object into Go value of type []json.RawMessage","code":3} #444
I install a tensorflow serving( for support some model.
But tensorflow serving native support grpc interface, I want it to also support RESTful, So I use grpc-gateway, hope it can help me.
I modified the tensorflow serving project file: serving / tensorflow_serving / apis / prediction_service.proto.:
syntax = "proto3";
package pb;
option cc_enable_arenas = true;
import "google/api/annotations.proto";
import "google/protobuf/wrappers.proto";
// open source marker; do not remove
// PredictionService provides access to machine-learned models loaded by
// model_servers.
service PredictionService {
// Predict -- provides access to loaded TensorFlow model.
rpc Predict(PredictRequest) returns (PredictResponse){
option (google.api.http) = {
post: "/v1/predict"
body: "*"
message PredictRequest {
// Model Specification.
ModelSpec model_spec = 1;
// Input tensors.
// Names of input tensor are alias names. The mapping from aliases to real
// input tensor names is expected to be stored as named generic signature
// under the key "inputs" in the model export.
// Each alias listed in a generic signature named "inputs" should be provided
// exactly once in order to run the prediction.
map<string, TensorProto> inputs = 2;
repeated string output_filter = 3;
// Response for PredictRequest on successful run.
message PredictResponse {
// Output tensors.
map<string, TensorProto> outputs = 1;
// Metadata for an inference request such as the model name and version.
message ModelSpec {
// Required servable name.
string name = 1;
// Optional version. If unspecified, will use the latest (numerical) version.
// Typically not needed unless coordinating across multiple models that were
// co-trained and/or have inter-dependencies on the versions used at inference
// time.
google.protobuf.Int64Value version = 2;
// A named signature to evaluate. If unspecified, the default signature will
// be used.
string signature_name = 3;
message ResourceHandleProto {
// Unique name for the device containing the resource.
string device = 1;
// Container in which this resource is placed.
string container = 2;
// Unique name of this resource.
string name = 3;
// Hash code for the type of the resource. Is only valid in the same device
// and in the same execution.
uint64 hash_code = 4;
// For debug-only, the name of the type pointed to by this handle, if
// available.
string maybe_type_name = 5;
message TensorProto {
DataType dtype = 1;
// Shape of the tensor. TODO(touts): sort out the 0-rank issues.
TensorShapeProto tensor_shape = 2;
// Only one of the representations below is set, one of "tensor_contents" and
// the "xxx_val" attributes. We are not using oneof because as oneofs cannot
// contain repeated fields it would require another extra set of messages.
// Version number.
// In version 0, if the "repeated xxx" representations contain only one
// element, that element is repeated to fill the shape. This makes it easy
// to represent a constant Tensor with a single value.
int32 version_number = 3;
// Serialized raw tensor content from either Tensor::AsProtoTensorContent or
// memcpy in tensorflow::grpc::EncodeTensorToByteBuffer. This representation
// can be used for all tensor types. The purpose of this representation is to
// reduce serialization overhead during RPC call by avoiding serialization of
// many repeated small items.
bytes tensor_content = 4;
// Type specific representations that make it easy to create tensor protos in
// all languages. Only the representation corresponding to "dtype" can
// be set. The values hold the flattened representation of the tensor in
// row major order.
// DT_HALF. Note that since protobuf has no int16 type, we'll have some
// pointless zero padding for each value here.
repeated int32 half_val = 13 [packed = true];
repeated float float_val = 5 [packed = true];
repeated double double_val = 6 [packed = true];
repeated int32 int_val = 7 [packed = true];
repeated bytes string_val = 8;
// DT_COMPLEX64. scomplex_val(2i) and scomplex_val(2i+1) are real
// and imaginary parts of i-th single precision complex.
repeated float scomplex_val = 9 [packed = true];
// DT_INT64
repeated int64 int64_val = 10 [packed = true];
repeated bool bool_val = 11 [packed = true];
// DT_COMPLEX128. dcomplex_val(2i) and dcomplex_val(2i+1) are real
// and imaginary parts of i-th double precision complex.
repeated double dcomplex_val = 12 [packed = true];
repeated ResourceHandleProto resource_handle_val = 14;
// Dimensions of a tensor.
message TensorShapeProto {
// One dimension of the tensor.
message Dim {
// Size of the tensor in that dimension.
// This value must be >= -1, but values of -1 are reserved for "unknown"
// shapes (values of -1 mean "unknown" dimension). Certain wrappers
// that work with TensorShapeProto may fail at runtime when deserializing
// a TensorShapeProto containing a dim value of -1.
int64 size = 1;
// Optional name of the tensor dimension.
string name = 2;
// Dimensions of the tensor, such as {"input", 30}, {"output", 40}
// for a 30 x 40 2D tensor. If an entry has size -1, this
// corresponds to a dimension of unknown size. The names are
// optional.
// The order of entries in "dim" matters: It indicates the layout of the
// values in the tensor in-memory representation.
// The first entry in "dim" is the outermost dimension used to layout the
// values, the last entry is the innermost dimension. This matches the
// in-memory layout of RowMajor Eigen tensors.
// If "dim.size()" > 0, "unknown_rank" must be false.
repeated Dim dim = 2;
// If true, the number of dimensions in the shape is unknown.
// If true, "dim.size()" must be 0.
bool unknown_rank = 3;
// LINT.IfChange
enum DataType {
// Not a legal value for DataType. Used to indicate a DataType field
// has not been set.
// Data types that all computation devices are expected to be
// capable to support.
DT_INT32 = 3;
DT_UINT8 = 4;
DT_INT16 = 5;
DT_INT8 = 6;
DT_COMPLEX64 = 8; // Single-precision complex
DT_INT64 = 9;
DT_BOOL = 10;
DT_QINT8 = 11; // Quantized int8
DT_QUINT8 = 12; // Quantized uint8
DT_QINT32 = 13; // Quantized int32
DT_BFLOAT16 = 14; // Float32 truncated to 16 bits. Only for cast ops.
DT_QINT16 = 15; // Quantized int16
DT_QUINT16 = 16; // Quantized uint16
DT_UINT16 = 17;
DT_COMPLEX128 = 18; // Double-precision complex
DT_HALF = 19;
// TODO(josh11b): DT_GENERIC_PROTO = ??;
// TODO(jeff,josh11b): DT_UINT64? DT_UINT32?
// Do not use! These are only for parameters. Every enum above
// should have a corresponding value below (verified by types_test).
DT_INT32_REF = 103;
DT_UINT8_REF = 104;
DT_INT16_REF = 105;
DT_INT8_REF = 106;
DT_INT64_REF = 109;
DT_BOOL_REF = 110;
DT_QINT8_REF = 111;
DT_QUINT8_REF = 112;
DT_QINT32_REF = 113;
DT_BFLOAT16_REF = 114;
DT_QINT16_REF = 115;
DT_QUINT16_REF = 116;
DT_UINT16_REF = 117;
DT_COMPLEX128_REF = 118;
DT_HALF_REF = 119;
// LINT.ThenChange(,
I installed the grpc-gateway using the use document as if I generated the reverse proxy, and first started tensorflow serving, then started the reverse proxy.
my reverse proxy code:proxy.go
1 package main
3 import (
4 "flag"
5 "log"
6 "net/http"
8 ""
9 ""
10 ""
12 gw "tensorflow_serving/pb"
13 )
15 var (
16 predictionEndpoint = flag.String("prediction__endpoint", "localhost:9000", "endpoint of prediction gRPC Service")
17 )
19 func run() error {
20 ctx := context.Background()
21 ctx, cancel := context.WithCancel(ctx)
22 defer cancel()
24 mux := runtime.NewServeMux()
25 opts := []grpc.DialOption{grpc.WithInsecure()}
26 err := gw.RegisterPredictionServiceHandlerFromEndpoint(ctx, mux, *predictionEndpoint, opts)
27 if err != nil {
28 return err
29 }
31 log.Print("prediction gRPC Server gateway start at port 8093...")
33 return http.ListenAndServe(":8093", mux)
34 }
36 func main() {
37 flag.Parse()
39 if err := run(); err != nil {
40 log.Fatal(err)
41 }
42 }
go build -o proxy proxy.go ---->Generate executable files: proxy
I can use the grpc client code to access tensorflow serving. The code is as follows:
16 #!/usr/bin/env python2.7
18 """Send JPEG image to tensorflow_model_server loaded with inception model.
19 """
21 from future import print_function
23 # This is a placeholder for a Google-internal import.
25 from grpc.beta import implementations
26 import tensorflow as tf
28 from tensorflow_serving.apis import predict_pb2
29 from tensorflow_serving.apis import prediction_service_pb2
32'server', 'localhost:9000',
33 'PredictionService host:port')
34'image', '', 'path to image in JPEG format')
35 FLAGS =
38 def main(_):
39 host, port = FLAGS.server.split(':')
40 channel = implementations.insecure_channel(host, int(port))
41 stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
42 # Send request
43 with open(FLAGS.image, 'rb') as f:
44 # See prediction_service.proto for gRPC request/response details.
45 data =
46 request = predict_pb2.PredictRequest()
47 = 'inception'
48 request.model_spec.signature_name = 'predict_images'
49 request.inputs['images'].CopyFrom(
50 tf.contrib.util.make_tensor_proto(data, shape=[1]))
51 print
52 result = stub.Predict(request, 10.0) # 10 secs timeout
53 print(result)
56 if name == 'main':
Above the grpc client function :tf.contrib.util.make_tensor_proto(data, shape=[1]))
return value is:
dtype: DT_STRING
tensor_shape {
dim {
size: 1
string_val: "\377\024005- ...... \024P\007\377\331"
When I use restclient to access the reverse proxy:
url: 'http://ip:8093/v1/predict'
body: {"model_spec": {"name": "inception", "signature_name": "predict_images"}, "inputs": {"images": {"dtype": 7, "tensor_shape": {"dim":{"size": 1 }}, "string_val": "QB........//Z"}}}
the following error is reported:
{"error":"json: cannot unmarshal object into Go value of type []json.RawMessage","code":3}
hope someone help me, thanks.