/** * Copyright (c) 2016-present, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include #include "caffe2/core/blob_serialization.h" #include "caffe2/core/init.h" #include "caffe2/core/logging.h" #include "caffe2/core/net.h" #include "caffe2/core/operator.h" #include "caffe2/utils/string_utils.h" #include "c10/util/string_utils.h" #include using std::map; using std::shared_ptr; using std::string; using std::vector; template void writeTextOutput( TensorType* tensor, const string& output_prefix, const string& name, int index, int num_blobs) { if (index >= num_blobs) { return; } string filename = name; std::replace(filename.begin(), filename.end(), '/', '_'); string output_name = output_prefix + "/" + filename + ".txt"; caffe2::TensorSerializer ser; caffe2::BlobProto blob_proto; ser.Serialize( *tensor, output_name, blob_proto.mutable_tensor(), 0, tensor->numel()); blob_proto.set_name(output_name); blob_proto.set_type("Tensor"); CAFFE_ENFORCE(blob_proto.has_tensor()); caffe2::TensorProto tensor_proto = blob_proto.tensor(); int dims_size = tensor_proto.dims_size(); long long elem_dim_size = dims_size > 1 ? tensor_proto.dims(1) : tensor_proto.dims(0); for (const auto i : c10::irange(2, dims_size)) { elem_dim_size *= tensor_proto.dims(i); } std::vector lines; std::string dims; for (const auto i : c10::irange(dims_size)) { int dim = tensor_proto.dims(i); if (i > 0) { dims += ", "; } dims += c10::to_string(dim); } lines.push_back(dims); std::stringstream line; if (tensor_proto.data_type() == caffe2::TensorProto::FLOAT) { auto start = tensor_proto.float_data().begin(); auto end = tensor_proto.float_data().end(); copy(start, end, std::ostream_iterator(line, ",")); } else if (tensor_proto.data_type() == caffe2::TensorProto::INT32) { auto start = tensor_proto.int32_data().begin(); auto end = tensor_proto.int32_data().end(); copy(start, end, std::ostream_iterator(line, ",")); } else { CAFFE_THROW("Unimplemented Blob type."); } // remove the last , string str = line.str(); if(str.length() != 0) { str.pop_back(); } lines.push_back(str); // static casts are workaround for MSVC build auto flags = static_cast(std::ios::out); if (index != 0) { flags |= static_cast(std::ios::app); } else { flags |= static_cast(std::ios::trunc); } std::ofstream output_file(output_name, flags); std::ostream_iterator output_iterator(output_file, "\n"); std::copy(lines.begin(), lines.end(), output_iterator); } void observerConfig(); bool backendCudaSet(const string&); void setDeviceType(caffe2::NetDef*, caffe2::DeviceType&); void setOperatorEngine(caffe2::NetDef*, const string&); int loadInput( shared_ptr workspace, const bool run_on_gpu, map& tensor_protos_map, const string& input, const string& input_file, const string& input_dims, const string& input_type); void fillInputBlob( shared_ptr workspace, map& tensor_protos_map, int iteration); void writeOutput( shared_ptr workspace, const bool run_on_gpu, const string& output, const string& output_folder, const bool text_output, const int index, const int num_blobs); void logBenchmarkResult( const std::string& type, const std::string& metric, const std::string& unit, const int value); long getVirtualMemoryIfOptionEnabled(bool FLAGS_measure_memory); void runNetwork( shared_ptr workspace, caffe2::NetBase* net, map& tensor_protos_map, const bool wipe_cache, const bool run_individual, const bool run_on_gpu, const bool text_output, const int warmup, const int iter, const int num_blobs, const int sleep_before_run, const int sleep_between_iteration, const int sleep_between_net_and_operator, const std::string& output, const std::string& output_folder); int benchmark( int argc, char* argv[], const string& FLAGS_backend, const string& FLAGS_init_net, const string& FLAGS_input, const string& FLAGS_input_dims, const string& FLAGS_input_file, const string& FLAGS_input_type, int FLAGS_iter, bool FLAGS_measure_memory, const string& FLAGS_net, const string& FLAGS_output, const string& FLAGS_output_folder, bool FLAGS_run_individual, int FLAGS_sleep_before_run, int FLAGS_sleep_between_iteration, int FLAGS_sleep_between_net_and_operator, bool FLAGS_text_output, int FLAGS_warmup, bool FLAGS_wipe_cache);