/**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#include <math.h>
#include "nnacl/fp32/resize_fp32.h"
#include "nnacl/common_func.h"
#include "nnacl/errorcode.h"
#include "nnacl/intrinsics/ms_simd_instructions.h"

void CalculateCoordinate(float out, int in, int32_t *bottom, int32_t *top, float *bottom_weight) {
  *bottom = (int)(floorf(out));
  *bottom = *bottom >= 0 ? *bottom : 0;  // extrapolate may generate neg value
  *top = *bottom + 1 < in ? (*bottom + 1) : (in - 1);
  float top_weight = (float)out - (float)(*bottom);
  *bottom_weight = 1.0f - top_weight;
}

static void BicubicBaseFunc(float a, const float x, float *weight) {
  float abs_x = fabsf(x);
  if (abs_x >= 0 && abs_x <= 1) {
    *weight = ((a + 2) * abs_x - (a + 3)) * abs_x * abs_x + 1;
  } else if (abs_x > 1 && abs_x <= 2) {
    *weight = a * abs_x * abs_x * abs_x - 5 * a * abs_x * abs_x + 8 * a * abs_x - 4 * a;
  } else {
    *weight = 0;
  }
}

// a is a coefficient
// W(x) = { (a + 2) * |x| * |x| * |x| - (a + 3) * |x| * |x| + 1,           for |x| <= 1
//        { a * |x| * |x| * |x| - 5 * a * |x| * |x| + 8 * a *|x| - 4 * a,  for 1 < |x| < 2
//        { 0,                                                             otherwise
// the value of 'a' depends on if is half_pixel_center(the scheme is the same as tf).
// If is half pixel mode, a equals to -0.5, otherwise -0.75.
void CalculateWeightForBicubic(float out, int in, int32_t *index, float *weights, float a) {
  int floor_index = (int)(floorf(out));
  index[0] = (floor_index - 1) < 0 ? 0 : (floor_index - 1);
  index[1] = floor_index;
  index[2] = (floor_index + 1) < in ? (floor_index + 1) : (in - 1);
  index[3] = (floor_index + 2) < in ? (floor_index + 2) : (in - 1);

  // get positive value
  float distance[4] = {-1, 0, 1, 2};
  float tmp_dis = out - (float)floor_index;
  distance[0] -= tmp_dis;
  distance[1] -= tmp_dis;
  distance[2] -= tmp_dis;
  distance[3] -= tmp_dis;

  for (int i = 0; i < 4; ++i) {
    BicubicBaseFunc(a, distance[i], &weights[i]);
  }
}

int PrepareResizeBilinear(const int32_t *input_shape, const int32_t *output_shape,
                          CalculateOriginalCoordinate calculate, int32_t *y_bottoms, int32_t *y_tops, int32_t *x_lefts,
                          int32_t *x_rights, float *y_bottom_weights, float *x_left_weights) {
  if (input_shape == NULL || output_shape == NULL || y_bottoms == NULL || y_tops == NULL || x_lefts == NULL ||
      x_rights == NULL || y_bottom_weights == NULL || x_left_weights == NULL) {
    return NNACL_NULL_PTR;
  }

  int in_h = input_shape[1];
  int in_w = input_shape[2];

  int new_height = output_shape[1];
  int new_width = output_shape[2];

  for (int h = 0; h < new_height; h++) {
    float actual_y = calculate(h, in_h, new_height);
    CalculateCoordinate(actual_y, in_h, y_bottoms + h, y_tops + h, y_bottom_weights + h);
  }
  for (int w = 0; w < new_width; w++) {
    float actual_x = calculate(w, in_w, new_width);
    CalculateCoordinate(actual_x, in_w, x_lefts + w, x_rights + w, x_left_weights + w);
  }
  return NNACL_OK;
}

int PrepareResizeBicubic(const int32_t *input_shape, const int32_t *output_shape, CalculateOriginalCoordinate calculate,
                         int32_t *y_tops, int32_t *x_lefts, float *y_weights, float *x_weights, float cubic_coeff) {
  if (input_shape == NULL || output_shape == NULL || y_tops == NULL || x_lefts == NULL || y_weights == NULL ||
      x_weights == NULL) {
    return NNACL_NULL_PTR;
  }

  int in_h = input_shape[1];
  int in_w = input_shape[2];
  int new_height = output_shape[1];
  int new_width = output_shape[2];

  for (int h = 0; h < new_height; h++) {
    float actual_y = calculate(h, in_h, new_height);
    CalculateWeightForBicubic(actual_y, in_h, y_tops + 4 * h, y_weights + 4 * h, cubic_coeff);
  }
  for (int w = 0; w < new_width; w++) {
    float actual_x = calculate(w, in_w, new_width);
    CalculateWeightForBicubic(actual_x, in_w, x_lefts + 4 * w, x_weights + 4 * w, cubic_coeff);
  }
  return NNACL_OK;
}

int PrepareCropAndResizeBilinear(const int32_t *input_shape, const float *boxes, const int32_t *box_idx,
                                 const int32_t *output_shape, int32_t *y_bottoms, int32_t *y_tops, int32_t *x_lefts,
                                 int32_t *x_rights, float *y_bottom_weights, float *x_left_weights) {
  if (input_shape == NULL || output_shape == NULL || y_bottoms == NULL || y_tops == NULL || x_lefts == NULL ||
      x_rights == NULL || y_bottom_weights == NULL || x_left_weights == NULL) {
    return NNACL_NULL_PTR;
  }
  int in_h = input_shape[1];
  int in_w = input_shape[2];
  int batch = output_shape[0];
  int new_height = output_shape[1];
  int new_width = output_shape[2];
  float actual_x;
  float actual_y;

  for (int b = 0; b < batch; b++) {
    const float *box = boxes + b * 4;
    float start_h = box[0];
    float end_h = box[2];
    float start_w = box[1];
    float end_w = box[3];

    int32_t *y_bottom = y_bottoms + b * new_height;
    int32_t *y_top = y_tops + b * new_height;
    float *y_bottom_weight = y_bottom_weights + b * new_height;
    int32_t *x_left = x_lefts + b * new_width;
    int32_t *x_right = x_rights + b * new_width;
    float *x_left_weight = x_left_weights + b * new_width;
    for (int h = 0; h < new_height; h++) {
      if (new_height > 1) {
        actual_y = start_h * (in_h - 1) + h * (end_h - start_h) * (in_h - 1) / (new_height - 1);
      } else {
        actual_y = 0.5 * (end_h + start_h) * (in_h - 1);
      }
      CalculateCoordinate(actual_y, in_h, y_bottom + h, y_top + h, y_bottom_weight + h);
    }
    for (int w = 0; w < new_width; w++) {
      if (new_width > 1) {
        actual_x = start_w * (in_w - 1) + w * (end_w - start_w) * (in_w - 1) / (new_width - 1);
      } else {
        actual_x = 0.5 * (end_w + start_w) * (in_w - 1);
      }
      CalculateCoordinate(actual_x, in_w, x_left + w, x_right + w, x_left_weight + w);
    }
  }
  return NNACL_OK;
}

int InterpRow(const float *src_line, float *linear_output, int new_width, const float *x_left_weights,
              const int32_t *x_lefts, const int32_t *x_rights, int in_c) {
  int w;
  for (w = 0; w < new_width; w++) {
    int c = 0;
#if defined(ENABLE_AVX)
    MS_FLOAT32X8 left_w_8 = MS_MOV256_F32(x_left_weights[w]);
    MS_FLOAT32X8 right_w_8 = MS_MOV256_F32(1.0f - x_left_weights[w]);
    for (; c <= in_c - C8NUM; c += C8NUM) {
      MS_FLOAT32X8 left = MS_LD256_F32(src_line + x_lefts[w] * in_c + c);
      MS_FLOAT32X8 right = MS_LD256_F32(src_line + x_rights[w] * in_c + c);
      MS_FLOAT32X8 interp_value = MS_ADD256_F32(MS_MUL256_F32(left, left_w_8), MS_MUL256_F32(right, right_w_8));
      MS_ST256_F32(linear_output + w * in_c + c, interp_value);
    }
#endif
#if defined(ENABLE_NEON) || defined(ENABLE_SSE)
    MS_FLOAT32X4 left_w = MS_MOVQ_F32(x_left_weights[w]);
    MS_FLOAT32X4 right_w = MS_MOVQ_F32(1.0f - x_left_weights[w]);
    for (; c <= in_c - C4NUM; c += C4NUM) {
      MS_FLOAT32X4 left = MS_LDQ_F32(src_line + x_lefts[w] * in_c + c);
      MS_FLOAT32X4 right = MS_LDQ_F32(src_line + x_rights[w] * in_c + c);
      MS_FLOAT32X4 interp_value = MS_ADDQ_F32(MS_MULQ_F32(left, left_w), MS_MULQ_F32(right, right_w));
      MS_STQ_F32(linear_output + w * in_c + c, interp_value);
    }
#endif
    int left_w_offset = x_lefts[w] * in_c;
    int right_w_offset = x_rights[w] * in_c;
    for (; c < in_c; c++) {
      float left = src_line[left_w_offset + c];
      float right = src_line[right_w_offset + c];
      linear_output[w * in_c + c] = left * x_left_weights[w] + right * (1.0f - x_left_weights[w]);
    }
  }
  return 0;
}

int InterpCol(const float *bottom_line, const float *top_line, float *output, int new_width, float y_bottom_weight,
              int in_c) {
  int w;
  for (w = 0; w < new_width; w++) {
    int c = 0;
#if defined(ENABLE_AVX)
    MS_FLOAT32X8 bottom_w_8 = MS_MOV256_F32(y_bottom_weight);
    MS_FLOAT32X8 top_w_8 = MS_MOV256_F32(1.0f - y_bottom_weight);
    for (; c <= in_c - C8NUM; c += C8NUM) {
      MS_FLOAT32X8 bottom = MS_LD256_F32(bottom_line + w * in_c + c);
      MS_FLOAT32X8 top = MS_LD256_F32(top_line + w * in_c + c);
      MS_FLOAT32X8 interp_value = MS_ADD256_F32(MS_MUL256_F32(bottom, bottom_w_8), MS_MUL256_F32(top, top_w_8));
      MS_ST256_F32(output + w * in_c + c, interp_value);
    }
#endif
#if defined(ENABLE_NEON) || defined(ENABLE_SSE)
    MS_FLOAT32X4 bottom_w = MS_MOVQ_F32(y_bottom_weight);
    MS_FLOAT32X4 top_w = MS_MOVQ_F32(1.0f - y_bottom_weight);
    for (; c <= in_c - C4NUM; c += C4NUM) {
      MS_FLOAT32X4 bottom = MS_LDQ_F32(bottom_line + w * in_c + c);
      MS_FLOAT32X4 top = MS_LDQ_F32(top_line + w * in_c + c);
      MS_FLOAT32X4 interp_value = MS_ADDQ_F32(MS_MULQ_F32(bottom, bottom_w), MS_MULQ_F32(top, top_w));
      MS_STQ_F32(output + w * in_c + c, interp_value);
    }
#endif
    for (; c < in_c; c++) {
      float bottom = bottom_line[w * in_c + c];
      float top = top_line[w * in_c + c];
      output[w * in_c + c] = bottom * y_bottom_weight + top * (1.0f - y_bottom_weight);
    }
  }
  return 0;
}

void Bilinear(const float *input_data, float *output_data, const int32_t *input_shape, const int32_t *output_shape,
              const int32_t *y_bottom, const int32_t *y_top, const int32_t *x_left, const int32_t *x_right,
              const float *y_bottom_weight, const float *x_left_weight, float *line0, float *line1, const int h_begin,
              const int h_end) {
  int in_w = input_shape[2];
  int in_c = input_shape[3];
  int new_width = output_shape[2];
  int h_stride = new_width * in_c;

  bool cache_line_used[2] = {false, false};
  int cache_line_num[2] = {-1, -1};
  float *const cache_line_ptr[2] = {line0, line1};
  float *current_line_ptr[2] = {line0, line1};
  int current_line_num[2] = {-1, -1};

  for (int h = h_begin; h < h_end; h++) {
    current_line_num[0] = y_bottom[h];
    current_line_num[1] = y_top[h];

    for (int i = 0; i < 2; i++) {
      cache_line_used[i] = false;
    }
    // search if we cached
    for (int j = 0; j < 2; j++) {
      bool find = false;
      for (int k = 0; k < 2; k++) {
        if (current_line_num[j] == cache_line_num[k]) {
          cache_line_used[k] = true;
          current_line_ptr[j] = cache_line_ptr[k];
          find = true;
          break;
        }
      }

      if (!find) {
        const float *line = input_data + current_line_num[j] * in_w * in_c;
        for (int k = 0; k < 2; k++) {
          if (!cache_line_used[k]) {
            cache_line_num[k] = current_line_num[j];
            cache_line_used[k] = true;
            current_line_ptr[j] = cache_line_ptr[k];
            InterpRow(line, current_line_ptr[j], new_width, x_left_weight, x_left, x_right, in_c);
            break;
          }
        }
      }
    }
    // do col interp
    InterpCol(current_line_ptr[0], current_line_ptr[1], output_data + h * h_stride, new_width, y_bottom_weight[h],
              in_c);
  }
}

int ResizeBilinear(const float *input_data, float *output_data, const int32_t *input_shape, const int32_t *output_shape,
                   const int32_t *y_bottoms, const int32_t *y_tops, const int32_t *x_lefts, const int32_t *x_rights,
                   const float *y_bottom_weights, const float *x_left_weights, float *line0, float *line1,
                   const int h_begin, const int h_end) {
  if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL || y_bottoms == NULL ||
      y_tops == NULL || x_lefts == NULL || x_rights == NULL || y_bottom_weights == NULL || x_left_weights == NULL) {
    return NNACL_NULL_PTR;
  }

  int in_b = input_shape[0];
  int in_h = input_shape[1];
  int in_w = input_shape[2];
  int in_c = input_shape[3];
  int new_height = output_shape[1];
  int new_width = output_shape[2];

  for (int b = 0; b < in_b; b++) {
    const float *input = input_data + b * in_h * in_w * in_c;
    float *output = output_data + b * new_height * new_width * in_c;
    Bilinear(input, output, input_shape, output_shape, y_bottoms, y_tops, x_lefts, x_rights, y_bottom_weights,
             x_left_weights, line0, line1, h_begin, h_end);
  }
  return NNACL_OK;
}

void BicubicInterpRow(const float *src, float *dst, const float *weights, const int32_t *lefts, int width,
                      int channel) {
  for (int w = 0; w < width; w++) {
    const float *weight = weights + 4 * w;
    float *dst_w = dst + w * channel;
    const float *src0_w = src + lefts[4 * w] * channel;
    const float *src1_w = src + lefts[4 * w + 1] * channel;
    const float *src2_w = src + lefts[4 * w + 2] * channel;
    const float *src3_w = src + lefts[4 * w + 3] * channel;
    int c = 0;
#if defined(ENABLE_AVX)
    MS_FLOAT32X8 weight0_vec_8 = MS_MOV256_F32(weight[0]);
    MS_FLOAT32X8 weight1_vec_8 = MS_MOV256_F32(weight[1]);
    MS_FLOAT32X8 weight2_vec_8 = MS_MOV256_F32(weight[2]);
    MS_FLOAT32X8 weight3_vec_8 = MS_MOV256_F32(weight[3]);
    for (; c <= channel - C8NUM; c += C8NUM) {
      MS_FLOAT32X8 src0_vec = MS_LD256_F32(src0_w + c);
      MS_FLOAT32X8 src1_vec = MS_LD256_F32(src1_w + c);
      MS_FLOAT32X8 src2_vec = MS_LD256_F32(src2_w + c);
      MS_FLOAT32X8 src3_vec = MS_LD256_F32(src3_w + c);
      MS_FLOAT32X8 dst0 = MS_MUL256_F32(src0_vec, weight0_vec_8);
      MS_FLOAT32X8 dst1 = MS_MUL256_F32(src1_vec, weight1_vec_8);
      MS_FLOAT32X8 dst2 = MS_MUL256_F32(src2_vec, weight2_vec_8);
      MS_FLOAT32X8 dst3 = MS_MUL256_F32(src3_vec, weight3_vec_8);
      MS_FLOAT32X8 interp_value = MS_ADD256_F32(dst3, MS_ADD256_F32(dst2, MS_ADD256_F32(dst1, dst0)));
      MS_ST256_F32(dst_w + c, interp_value);
    }
#endif
#if defined(ENABLE_NEON) || defined(ENABLE_SSE)
    MS_FLOAT32X4 weight0_vec = MS_MOVQ_F32(weight[0]);
    MS_FLOAT32X4 weight1_vec = MS_MOVQ_F32(weight[1]);
    MS_FLOAT32X4 weight2_vec = MS_MOVQ_F32(weight[2]);
    MS_FLOAT32X4 weight3_vec = MS_MOVQ_F32(weight[3]);
    for (; c <= channel - C4NUM; c += C4NUM) {
      MS_FLOAT32X4 src0_vec = MS_LDQ_F32(src0_w + c);
      MS_FLOAT32X4 src1_vec = MS_LDQ_F32(src1_w + c);
      MS_FLOAT32X4 src2_vec = MS_LDQ_F32(src2_w + c);
      MS_FLOAT32X4 src3_vec = MS_LDQ_F32(src3_w + c);
      MS_FLOAT32X4 dst0 = MS_MULQ_F32(src0_vec, weight0_vec);
      MS_FLOAT32X4 dst1 = MS_MULQ_F32(src1_vec, weight1_vec);
      MS_FLOAT32X4 dst2 = MS_MULQ_F32(src2_vec, weight2_vec);
      MS_FLOAT32X4 dst3 = MS_MULQ_F32(src3_vec, weight3_vec);
      MS_FLOAT32X4 interp_value = MS_ADDQ_F32(dst3, MS_ADDQ_F32(dst2, MS_ADDQ_F32(dst1, dst0)));
      MS_STQ_F32(dst_w + c, interp_value);
    }
#endif
    for (; c < channel; c++) {
      dst_w[c] = src0_w[c] * weight[0] + src1_w[c] * weight[1] + src2_w[c] * weight[2] + src3_w[c] * weight[3];
    }
  }
}

void BicubicInterpCol(const float *src, float *dst, const float *weights, int width, int channel) {
  const float *src0 = src;
  const float *src1 = src + width * channel;
  const float *src2 = src + 2 * width * channel;
  const float *src3 = src + 3 * width * channel;
  for (int w = 0; w < width; w++) {
    float *dst_w = dst + w * channel;
    const float *src0_w = src0 + w * channel;
    const float *src1_w = src1 + w * channel;
    const float *src2_w = src2 + w * channel;
    const float *src3_w = src3 + w * channel;
    int c = 0;
#ifdef ENABLE_AVX
    MS_FLOAT32X8 weight0_vec_8 = MS_MOV256_F32(weights[0]);
    MS_FLOAT32X8 weight1_vec_8 = MS_MOV256_F32(weights[1]);
    MS_FLOAT32X8 weight2_vec_8 = MS_MOV256_F32(weights[2]);
    MS_FLOAT32X8 weight3_vec_8 = MS_MOV256_F32(weights[3]);
    for (; c <= channel - C8NUM; c += C8NUM) {
      MS_FLOAT32X8 src0_vec = MS_LD256_F32(src0_w + c);
      MS_FLOAT32X8 src1_vec = MS_LD256_F32(src1_w + c);
      MS_FLOAT32X8 src2_vec = MS_LD256_F32(src2_w + c);
      MS_FLOAT32X8 src3_vec = MS_LD256_F32(src3_w + c);
      MS_FLOAT32X8 dst1 = MS_MUL256_F32(src0_vec, weight0_vec_8);
      MS_FLOAT32X8 dst2 = MS_MUL256_F32(src1_vec, weight1_vec_8);
      MS_FLOAT32X8 dst3 = MS_MUL256_F32(src2_vec, weight2_vec_8);
      MS_FLOAT32X8 dst4 = MS_MUL256_F32(src3_vec, weight3_vec_8);
      MS_FLOAT32X8 interp_value = MS_ADD256_F32(dst4, MS_ADD256_F32(dst3, MS_ADD256_F32(dst1, dst2)));
      MS_ST256_F32(dst_w + c, interp_value);
    }
#endif
#if defined(ENABLE_NEON) || defined(ENABLE_SSE)
    MS_FLOAT32X4 weight0_vec = MS_MOVQ_F32(weights[0]);
    MS_FLOAT32X4 weight1_vec = MS_MOVQ_F32(weights[1]);
    MS_FLOAT32X4 weight2_vec = MS_MOVQ_F32(weights[2]);
    MS_FLOAT32X4 weight3_vec = MS_MOVQ_F32(weights[3]);
    for (; c <= channel - C4NUM; c += C4NUM) {
      MS_FLOAT32X4 src0_vec = MS_LDQ_F32(src0_w + c);
      MS_FLOAT32X4 src1_vec = MS_LDQ_F32(src1_w + c);
      MS_FLOAT32X4 src2_vec = MS_LDQ_F32(src2_w + c);
      MS_FLOAT32X4 src3_vec = MS_LDQ_F32(src3_w + c);
      MS_FLOAT32X4 dst1 = MS_MULQ_F32(src0_vec, weight0_vec);
      MS_FLOAT32X4 dst2 = MS_MULQ_F32(src1_vec, weight1_vec);
      MS_FLOAT32X4 dst3 = MS_MULQ_F32(src2_vec, weight2_vec);
      MS_FLOAT32X4 dst4 = MS_MULQ_F32(src3_vec, weight3_vec);
      MS_FLOAT32X4 interp_value = MS_ADDQ_F32(dst4, MS_ADDQ_F32(dst3, MS_ADDQ_F32(dst1, dst2)));
      MS_STQ_F32(dst_w + c, interp_value);
    }
#endif
    for (; c < channel; c++) {
      dst_w[c] = src0_w[c] * weights[0] + src1_w[c] * weights[1] + src2_w[c] * weights[2] + src3_w[c] * weights[3];
    }
  }
}

void Bicubic(const float *input_data, float *output_data, const int32_t *input_shape, const int32_t *output_shape,
             const int32_t *y_tops, const int32_t *x_lefts, const float *y_weights, const float *x_weights,
             float *line_buffer, const int h_begin, const int h_end) {
  int in_w = input_shape[2];
  int in_c = input_shape[3];
  int new_width = output_shape[2];
  int h_stride = new_width * in_c;

  for (int h = h_begin; h < h_end; h++) {
    for (int i = 0; i < 4; ++i) {
      BicubicInterpRow(input_data + y_tops[4 * h + i] * in_w * in_c, line_buffer + i * h_stride, x_weights, x_lefts,
                       new_width, in_c);
    }
    BicubicInterpCol(line_buffer, output_data + h * h_stride, y_weights + 4 * h, new_width, in_c);
  }
}

int ResizeBicubic(const float *input_data, float *output_data, const int32_t *input_shape, const int32_t *output_shape,
                  const int32_t *y_tops, const int32_t *x_lefts, const float *y_weights, const float *x_weights,
                  float *line_buffer, const int h_begin, const int h_end) {
  if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL || y_tops == NULL ||
      x_lefts == NULL || y_weights == NULL || x_weights == NULL) {
    return NNACL_NULL_PTR;
  }
  int input_cube_per_batch = input_shape[1] * input_shape[2] * input_shape[3];
  int output_cube_per_batch = output_shape[1] * output_shape[2] * input_shape[3];
  for (int b = 0; b < input_shape[0]; b++) {
    const float *input = input_data + b * input_cube_per_batch;
    float *output = output_data + b * output_cube_per_batch;
    Bicubic(input, output, input_shape, output_shape, y_tops, x_lefts, y_weights, x_weights, line_buffer, h_begin,
            h_end);
  }
  return NNACL_OK;
}

int RewriteExtrapolationValue(const float *input_data, float *output_data, const int32_t *box_idx, const float *boxes,
                              const CropAndResizeParameter *param, const int32_t *input_shape,
                              const int32_t *output_shape, const int32_t *y_tops, const int h_begin, const int h_end) {
  if (input_data == NULL || output_data == NULL || box_idx == NULL || param == NULL || input_shape == NULL ||
      output_shape == NULL) {
    return NNACL_NULL_PTR;
  }
  int batch = output_shape[0];
  int new_height = output_shape[1];
  int new_width = output_shape[2];
  int new_channel = output_shape[3];
  int input_h = input_shape[1];
  int input_w = input_shape[2];

  for (int b = 0; b < batch; b++) {
    float *output = output_data + b * new_height * new_width * new_channel;
    const float extrapolation_value = param->extrapolation_value_;
    const float *box = boxes + 4 * b;
    float start_h = box[0];
    float end_h = box[2];
    float start_w = box[1];
    float end_w = box[3];
    float actual_y, actual_x;
    for (int h = h_begin; h < h_end; ++h) {
      if (new_height > 1) {
        actual_y = start_h * (input_h - 1) + h * (end_h - start_h) * (input_h - 1) / (new_height - 1);
      } else {
        actual_y = 0.5 * (end_h + start_h) * (input_h - 1);
      }
      if (actual_y < 0 || actual_y > input_h - 1) {
        float *output_data_base = output + h * new_width * new_channel;
        for (int x = 0; x < new_width; ++x) {
          for (int d = 0; d < new_channel; ++d) {
            *output_data_base = extrapolation_value;
            output_data_base++;
          }
        }
      }
      for (int w = 0; w < new_width; ++w) {
        if (new_width > 1) {
          actual_x = start_w * (input_w - 1) + w * (end_w - start_w) * (input_w - 1) / (new_width - 1);
        } else {
          actual_x = 0.5 * (end_w + start_w) * (input_w - 1);
        }
        if (actual_x < 0 || actual_x > input_w - 1) {
          float *output_data_base = output + h * new_width * new_channel + w * new_channel;
          for (int d = 0; d < new_channel; ++d) {
            output_data_base[d] = extrapolation_value;
          }
        }
      }
    }
  }
  return NNACL_OK;
}

int CropAndResizeBilinear(const float *input_data, float *output_data, const int32_t *box_idx, const float *boxes,
                          const CropAndResizeParameter *param, const int32_t *input_shape, const int32_t *output_shape,
                          const int32_t *y_bottoms, const int32_t *y_tops, const int32_t *x_lefts,
                          const int32_t *x_rights, const float *y_bottom_weights, const float *x_left_weights,
                          float *line0, float *line1, const int h_begin, const int h_end) {
  if (input_data == NULL || output_data == NULL || box_idx == NULL || param == NULL || input_shape == NULL ||
      output_shape == NULL || y_bottoms == NULL || y_tops == NULL || x_lefts == NULL || x_rights == NULL ||
      y_bottom_weights == NULL || x_left_weights == NULL) {
    return NNACL_NULL_PTR;
  }
  int batch = output_shape[0];
  int new_height = output_shape[1];
  int new_width = output_shape[2];
  int new_channel = output_shape[3];
  int input_h = input_shape[1];
  int input_w = input_shape[2];

  for (int b = 0; b < batch; b++) {
    const float *cur_img = input_data + box_idx[b] * input_h * input_w * new_channel;
    const int32_t *y_bottom = y_bottoms + b * new_height;
    const int32_t *y_top = y_tops + b * new_height;
    const float *y_bottom_weight = y_bottom_weights + b * new_height;
    const int32_t *x_left = x_lefts + b * new_width;
    const int32_t *x_right = x_rights + b * new_width;
    const float *x_left_weight = x_left_weights + b * new_width;
    float *output = output_data + b * new_height * new_width * new_channel;

    Bilinear(cur_img, output, input_shape, output_shape, y_bottom, y_top, x_left, x_right, y_bottom_weight,
             x_left_weight, line0, line1, h_begin, h_end);
  }
  RewriteExtrapolationValue(input_data, output_data, box_idx, boxes, param, input_shape, output_shape, y_tops, h_begin,
                            h_end);
  return NNACL_OK;
}

int ResizeNearestNeighbor(const float *input_data, float *output_data, const int32_t *input_shape,
                          const int32_t *output_shape, CalculateOriginalCoordinate calculate,
                          int coordinate_transform_mode, int tid, int thread_num) {
  if (thread_num == 0) {
    return NNACL_PARAM_INVALID;
  }
  int c = input_shape[3];
  bool align_corners = coordinate_transform_mode == 1;
  for (int batch = 0; batch < output_shape[0]; batch++) {
    for (int y = tid; y < output_shape[1]; y += thread_num) {
      float actual_y = calculate(y, input_shape[1], output_shape[1]);
      int input_y;
      if (align_corners) {
        input_y = (int)(roundf(actual_y));
      } else {
        input_y = (int)(floorf(actual_y));
      }
      for (int x = 0; x < output_shape[2]; x++) {
        float actual_x = calculate(x, input_shape[2], output_shape[2]);
        int input_x;
        if (align_corners) {
          input_x = (int)(roundf(actual_x));
        } else {
          input_x = (int)(floorf(actual_x));
        }
        int in_offset = Offset(input_shape, batch, input_y, input_x, 0);
        int out_offset = Offset(output_shape, batch, y, x, 0);
        memcpy(output_data + out_offset, input_data + in_offset, c * sizeof(float));
      }
    }
  }
  return NNACL_OK;
}

float CalculateAsymmetric(int x_resized, int length_original, int length_resized) {
  float scale = (float)(length_resized) / (float)(length_original);
  return (float)(x_resized) / scale;
}

float CalculateAlignCorners(int x_resized, int length_original, int length_resized) {
  float scale = (float)(length_resized - 1) / (float)(length_original - 1);
  return (float)(x_resized) / scale;
}

float CalculateHalfPixel(int x_resized, int length_original, int length_resized) {
  float scale = (float)(length_resized) / (float)(length_original);
  float actual = (float)(x_resized + 0.5) / scale - 0.5;
  return actual > 0 ? actual : 0;
}
