Hello everyone! I have faced the following problem. I am trying to launch YOLOv3 in PyACL. The model itself is from ModelZoo (https://www.hiascend.com/en/software/modelzoo/detail/1/5c3ec43f66ba455a9992fff905c6d687) However, it does not work: when i get results, all the scores are less then 0.2. This is wrong because i have launched original Tensorflow model and it works fine on the same input data. My code is as follows:
import acl
import time
import numpy as np
from PIL import Image
import cv2
import random
def get_color_table(class_num, seed=2):
random.seed(seed)
color_table = {}
for i in range(class_num):
color_table[i] = [random.randint(0, 255) for _ in range(3)]
return color_table
def letterbox_resize(img, new_width, new_height, interp=0):
'''
Letterbox resize. keep the original aspect ratio in the resized image.
'''
ori_height, ori_width = img.shape[:2]
resize_ratio = min(new_width / ori_width, new_height / ori_height)
resize_w = int(resize_ratio * ori_width)
resize_h = int(resize_ratio * ori_height)
img = cv2.resize(img, (resize_w, resize_h), interpolation=interp)
image_padded = np.full((new_height, new_width, 3), 128, np.uint8)
dw = int((new_width - resize_w) / 2)
dh = int((new_height - resize_h) / 2)
image_padded[dh: resize_h + dh, dw: resize_w + dw, :] = img
return image_padded, resize_ratio, dw, dh
return image_padded, resize_ratio, dw, dh
def py_nms(boxes, scores, max_boxes=50, iou_thresh=0.5):
"""
Pure Python NMS baseline.
Arguments: boxes: shape of [-1, 4], the value of '-1' means that dont know the
exact number of boxes
scores: shape of [-1,]
max_boxes: representing the maximum of boxes to be selected by non_max_suppression
iou_thresh: representing iou_threshold for deciding to keep boxes
"""
assert boxes.shape[1] == 4 and len(scores.shape) == 1
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (x2 - x1) * (y2 - y1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
#print(areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= iou_thresh)[0]
order = order[inds + 1]
return keep[:max_boxes]
ACL_MEMCPY_DEVICE_TO_HOST = 2
ACL_MEM_MALLOC_NORMAL_ONLY = 2
ACL_MEMCPY_HOST_TO_DEVICE = 1
image_path = "./front-slide-6.jpg"
#Initialize resources
ret = acl.init()
device_id = 2
model_path = "./yolov3_framework_tensorflow_aipp_1_batch_1_input_fp16_output_FP32.om"
ret = acl.rt.set_device(device_id)
context, ret = acl.rt.create_context(device_id)
stream, ret = acl.rt.create_stream()
#Load model
model_id, ret = acl.mdl.load_from_file(model_path)
if ret != 0:
print("acl.mdl.load_from_file failed: ", ret)
model_desc = acl.mdl.create_desc()
ret = acl.mdl.get_desc(model_desc, model_id)
#Create data types that describe model input and output
input = acl.mdl.create_dataset()
output = acl.mdl.create_dataset()
#Obtain the number of inputs and outputs of the model
in_num = acl.mdl.get_num_inputs(model_desc)
#print ("Model inputs = ", in_num)
out_num = acl.mdl.get_num_outputs(model_desc)
#print ("Model outputs = ", out_num)
#Create input dataset
index = 0
input_size = acl.mdl.get_input_size_by_index(model_desc, index)
print ("Input size= ", input_size)
#Input memory allocation
in_dev_ptr, ret = acl.rt.malloc(input_size, 2)
image_file = Image.open(image_path)
image_file=image_file.resize((416, 416))
#img, resize_ratio, dw, dh = letterbox_resize(image_file, 416, 416)
print("Image format= ", image_file.format, "Image Size= ", image_file.size, "Image Mode= ", image_file.mode)
img = np.array(image_file)
print("img shape", img.shape)
print("pixel =", img[100, 100, 1])
img = img.astype("float32")
#Image normalisation
img[:, :, 0] /= 255.0
img[:, :, 1] /= 255.0
img[:, :, 2] /= 255.0
#Modify tensor for NHWC format
shape_t = img.shape
img = img.reshape([1] + list(shape_t))
img = img.transpose([0, 3, 1, 2])
print("img.shape after transpose= ", img.shape)
print("pixel after transpose= ", img[0, 1, 100, 100])
data = np.frombuffer(img.tobytes(), np.float32)
print("data.shape = ", data.shape)
print("pixel after np.frombuffer", data[608*608 + 99*608 + 100] )
#test
img = img.transpose([0, 2, 3, 1])
print("img.shape after transpose II = ", img.shape)
img[:, :, :, 0] *= 255.0
img[:, :, :, 1] *= 255.0
img[:, :, :, 2] *= 255.0
img.astype("byte")
img_ori = cv2.imread("front-slide-6.jpg")
img_ori= np.zeros((416,416,3), np.float32)
img, resize_ratio, dw, dh = letterbox_resize(img_ori, 416, 416)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.asarray(img, np.float32)
img = img[np.newaxis, :] / 255.
#HERE DATA
for i in range(0,10):
print(img[0][i])
data = np.frombuffer(img.tobytes(), np.byte)
#img.tofile("test")
#test
img_ptr = acl.util.numpy_to_ptr(data) # host ptr
# memcopy host to device
image_buffer_size = data.size * data.itemsize
img_device, ret = acl.rt.malloc(image_buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY)
if ret != 0:
print("acl.rt.malloc failed: ", ret)
ret = acl.rt.memcpy(img_device, image_buffer_size, img_ptr, image_buffer_size, ACL_MEMCPY_HOST_TO_DEVICE)
if ret != 0:
print("acl.rt.memcpy failed: ", ret)
#Create Data Buffer
in_buff = acl.create_data_buffer(img_device, input_size)
#Add buffers to dataset
input, ret = acl.mdl.add_dataset_buffer(input, in_buff)
#Create output dataset
#out_dev_ptr = []
#out_buff = []
for index in range(out_num):
output_size = acl.mdl.get_output_size_by_index(model_desc, index)
#print ("Output size= ", output_size, "for index: ", index)
out_dev_ptr, ret = acl.rt.malloc(output_size, 2)
out_buff = acl.create_data_buffer(out_dev_ptr, output_size)
output, ret = acl.mdl.add_dataset_buffer(output, out_buff)
#Model inference
start_time = time.time()
steps = 1
for i in range(steps):
ret = acl.mdl.execute(model_id, input, output)
if ret != 0:
print("acl.mdl.execute failed: ", ret)
#print("FPS = " , 1.0/((time.time() - start_time )/steps))
#Get Model Result
out_data=[]
for i in range(out_num):
temp_output_buf = acl.mdl.get_dataset_buffer(output, i)
infer_output_ptr = acl.get_data_buffer_addr(temp_output_buf)
infer_output_size = acl.get_data_buffer_size_v2(temp_output_buf)
output_host, ret = acl.rt.malloc_host(infer_output_size)
if ret != 0:
print("acl.rt.malloc_host failed: ", ret)
ret = acl.rt.memcpy(output_host, infer_output_size, infer_output_ptr, infer_output_size, ACL_MEMCPY_DEVICE_TO_HOST)
if ret != 0:
print("acl.rt.memcpy failed: ", ret)
#output_host_dict = [{"buffer": output_host, "size": infer_output_size}]
#result = self.get_result(output_host_dict)
data = acl.util.ptr_to_numpy(output_host, (infer_output_size,), 1)
#print("data before float32", data.shape)
data.astype("float32")
data_fp32 = np.frombuffer(data.tobytes(), np.float32)
#print("data after float32", data_fp32.shape)
data_fp32.tofile("outData" + str(i))
# for i in range(infer_output_size):
# if data_fp32[i + 4] > 0.7:
# print("----------------------")
# print("tx= ", data_fp32[i + 0])
# print("ty= ", data_fp32[i + 1])
# print("tw= ", data_fp32[i + 2])
# print("th= ", data_fp32[i + 3])
# print("p0= ", data_fp32[i + 4])
# print("p1= ", data_fp32[i + 5])
# print("----------------------")
print(len(data_fp32))
out_data.append(data_fp32)
scores_=out_data[0].reshape(1,10647,80)
print(scores_.shape)
print(scores_)
#Release Resources
ret= acl.rt.free(in_dev_ptr)
ret= acl.rt.free(out_dev_ptr)
ret = acl.mdl.destroy_dataset(input)
ret = acl.mdl.destroy_dataset(output)
ret = acl.mdl.unload(model_id)
ret = acl.rt.destroy_stream(stream)
ret = acl.rt.destroy_context(context)
ret = acl.rt.reset_device(device_id)
acl.finalize()How can fix this problem and get right results from the model? Maybe there is another way on working with YOLOv3 in python?
