Слияние кода завершено, страница обновится автоматически
/*****************************************************************************/
/* Copyright YouDao, Inc. */
/* */
/* Licensed under the Apache License, Version 2.0 (the "License"); */
/* you may not use this file except in compliance with the License. */
/* You may obtain a copy of the License at */
/* */
/* http://www.apache.org/licenses/LICENSE-2.0 */
/* */
/* Unless required by applicable law or agreed to in writing, software */
/* distributed under the License is distributed on an "AS IS" BASIS, */
/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
/* See the License for the specific language governing permissions and */
/* limitations under the License. */
/*****************************************************************************/
#include <stdint.h>
#include <arm_neon.h>
#ifndef INCLUDE_ARM_QUANT_INTERFACE
#define INCLUDE_ARM_QUANT_INTERFACE
#ifdef __cplusplus
extern "C" {
#endif
/***********************************************************************
Function: bias_int32_t
Description: Perform bias operation on a 32-bit signed int matrix.
This function can be used in asymmetric quantitized GEMM.
Parameters: dst: the address of the matrix to apply bias on
bias_dim0: the bias value on every element
bias_dim1: the address of the input bias vector which
will be applied to the matrix along its
major dimension, i.e. when the element
can be indexed by x * dim1 + y, each element
is biased by bias_dim1[y]. No bias will be
performed with NULL pointer as input.
bias_dim1_scale: the scale to be applied on elements
of bias_dim1[] prior to the bias
operation
bias_dim2: the address of the input bias vector which
whill be applied to the matrix along its
minor dimension, i.e. when the element
can be indexed by x * dim1 + y, each element
is biased by bias_dim2[x]. No bias will be
performed with NULL pointer as input.
bias_dim2_scale: the scale to be applied on elements
of bias_dim2[] prior to the bias
operation
dim1: the length of the major dimension of input matrix
dim2: the length of the minor dimension of input matrix
***********************************************************************/
void bias_int32_t(int32_t *dst, int32_t bias_dim0,
const int32_t *bias_dim1, int32_t bias_dim1_scale,
const int32_t *bias_dim2, int32_t bias_dim2_scale,
uint32_t dim1, uint32_t dim2);
/***********************************************************************
Function: u8u32_sum
Description: Perform summing operation of cols/rows of the unsigned
8-bit int matrix. The sum of each col/row is an unsigned
32-bit integer.
Parameters: src: the address of input matrix.
dst: the address of output vector.
dim1: the length of major dimension of input matrix.
dim2: the length of minor dimension of input matrix.
(the major dimension is the vertical one for column-
major matrix, or the horizontal one for row-major
matrix)
direction: the direction of summing
0: sum along the minor dimension,
output_vector_size == dim1;
1: sum along the major dimension,
output_vector_size == dim2.
***********************************************************************/
void u8u32_sum(const uint8_t *src, uint32_t *dst,
uint32_t dim1, uint32_t dim2, uint8_t direction);
/***********************************************************************
Function: quantize_asymmetric_f32_u8
Description: Asymmetric quantization from fp32 to unsigned 8-bit int,
producing an 8-bit zero-point integer Z0, a fp32 scale S0
and quantitized unsigned 8-bit data Q1-Qn on the run.
For each quantitized element Qi, S0 * (Qi - Z0) can
approximate the original input (fp32) Fi.
Parameters: const float32_t *input: the address of the input fp32 array
uint8_t *output: the address of the output integer array
uint8_t *zero_point: the address to output Z0
float32_t *scale: the address to output S0
uint32_t size: the number of elements in the input
float32_t input_min, input_max:
the min and max of input float32_t numbers.
when input_min > input_max, the min and max
of input are reevaluated.
***********************************************************************/
void quantize_asymmetric_f32_u8(const float32_t *input, uint8_t *output,
uint8_t *zero_point, float32_t *scale, uint32_t size,
float32_t input_min, float32_t input_max);
/***********************************************************************
Function: quantize_symmetric_f32_s8
Description: symmetric quantization from fp32 to signed 8-bit int,
producing a fp32 scale S0 and quantitized 8-bit data
Q1-Qn on the run.
For each quantitized element Qi, S0 * Qi can
approximate the original input (fp32) Fi.
Parameters: const float32_t *input: the address of the input fp32 array
int8_t *output: the address of the output integer array
float32_t *scale: the address to output S0
uint32_t size: the number of elements in the input
float32_t input_min, input_max:
the min and max of input float32_t numbers.
when input_min > input_max, the min and max
of input are reevaluated.
***********************************************************************/
void quantize_symmetric_f32_s8(const float32_t *input, int8_t *output,
float32_t *scale, uint32_t size, float32_t input_min, float32_t input_max);
/***********************************************************************
Function: quantize_asymmetric_f32_u16
Description: Asymmetric quantization from fp32 to unsigned 16-bit int,
producing an 16-bit zero-point integer Z0, a fp32 scale S0
and quantitized unsigned 16-bit data Q1-Qn on the run.
This function does the same thing as
quantize_asymmetric_f32_u8 except the zero point and
outputs are 16-bit integers.
***********************************************************************/
void quantize_asymmetric_f32_u16(const float32_t *input, uint16_t *output,
uint16_t *zero_point, float32_t *scale, uint32_t size,
float32_t input_min, float32_t input_max);
/***********************************************************************
Function: quantize_symmetric_f32_s16
Description: symmetric quantization from fp32 to signed 16-bit int,
producing a fp32 scale S0 and quantitized 16-bit data
Q1-Qn on the run. This function does the same thing
as quantize_symmetric_f32_s8 except the outputs are
16-bit integers.
***********************************************************************/
void quantize_symmetric_f32_s16(const float32_t *input, int16_t *output,
float32_t *scale, uint32_t size, float32_t input_min, float32_t input_max);
/***********************************************************************
Function: dequantize_symmetric_f32_s32
Description: Convert 32-bit signed int values to fp32 ones with scaling.
Parameters: const int32_t *src: the address of the input integer array
float32_t *dst: the address of the output fp32 array
float32_t scale: the scaling factor on the input
uint32_t size: the number of elements in the input
***********************************************************************/
void dequantize_symmetric_f32_s32(const int32_t *src, float32_t *dst,
float32_t scale, uint32_t size);
/************************************************************************
Function: requantize_asymmetric_32to8
Description: asymmetric requantization from signed 32-bit int to
unsigned 8-bit int, which produces an 8-bit zero-point
integer Z0, updates the fp32 scale S0 and outputs
requantitized unsigned 8-bit data Q1-Qn on the run.
For each requantitized element Qi, S0 * (Qi - Z0) can
approximate the original dequantized value (fp32) Fi
of the corresponding 32-bit input.
Parameters: const int32_t *input: the address of the input int array
uint8_t *output: the address of the output integer array
float *scale: the address to update scaling factor S0
uint8_t *zero_point: the address to output Z0
uint32_t size: the number of elements in the input
int32_t input_min, input_max: the min and max value
of input int32 numbers. if input_min > input_max,
the min and max of the input integers are recalculated.
Note: The following function is near-equivalent to this sequence:
dequant_cvt_float_int32_t(input, temporal_array, *scale, size);
quant_unsym_float_uint8_t(temporal_array, output,
zero_point, scale, size);
************************************************************************/
void requantize_asymmetric_32to8(const int32_t *input, uint8_t *output,
float *scale, uint8_t *zero_point, uint32_t size,
int32_t input_min, int32_t input_max);
/************************************************************************
Function: requantize_symmetric_32to8
Description: symmetric requantization from signed 32-bit int to
signed 8-bit int, which updates the fp32 scale S0
and outputs requantitized signed 8-bit data Q1-Qn
on the run.
For each requantitized element Qi, S0 * Qi can
approximate the original dequantized value (fp32) Fi
of the corresponding 32-bit input.
Parameters: const int32_t *input: the address of the input int array
int8_t *output: the address of the output integer array
float *scale: the address to update scaling factor S0
uint32_t size: the number of elements in the input
int32_t input_min, input_max: the min and max value
of input int32 numbers. if input_min > input_max,
the min and max of the input integers are recalculated.
Note: The following function is near-equivalent to this sequence:
dequant_cvt_float_int32_t(input, temporal_array, *scale, size);
quant_sym_float_int8_t(temporal_array, output, scale, size);
************************************************************************/
void requantize_symmetric_32to8(const int32_t *input, int8_t *output,
float *scale, uint32_t size,
int32_t input_min, int32_t input_max);
/************************************************************************
* Function: requantize_asymmetric_32to16
* Description: asymmetric requantization from signed 32-bit int to
* unsigned 16-bit int, which does the same thing as
* requantize_asymmetric_32to8 except that the outputs
* and zero point are 16-bit integers
***********************************************************************/
void requantize_asymmetric_32to16(const int32_t *input, uint16_t *output,
float *scale, uint16_t *zero_point, uint32_t size,
int32_t input_min, int32_t input_max);
/************************************************************************
* Function: requantize_symmetric_32to16
* Description: symmetric requantization from signed 32-bit int to
* signed 16-bit int, which does the same thing as
* requantize_symmetric_32to8 except that the outputs
* are 16-bit integers
***********************************************************************/
void requantize_symmetric_32to16(const int32_t *input, int16_t *output,
float *scale, uint32_t size,
int32_t input_min, int32_t input_max);
/************************************************************************
* Function: requantize_asymmetric_16to8
* Description: asymmetric requantization from signed 16-bit int to
* unsigned 8-bit int, which does the same thing as
* requantize_asymmetric_32to8 except that the inputs
* are 16-bit integers
***********************************************************************/
void requantize_asymmetric_16to8(const int16_t *input, uint8_t *output,
float *scale, uint8_t *zero_point, uint32_t size,
int16_t input_min, int16_t input_max);
/************************************************************************
* Function: requantize_symmetric_16to8
* Description: symmetric requantization from signed 16-bit int to
* signed 8-bit int, which does the same thing as
* requantize_symmetric_32to8 except that the inputs
* are 16-bit integers
***********************************************************************/
void requantize_symmetric_16to8(const int16_t *input, int8_t *output,
float *scale, uint32_t size,
int16_t input_min, int16_t input_max);
#ifdef __cplusplus
}
#endif
#endif
Вы можете оставить комментарий после Вход в систему
Неприемлемый контент может быть отображен здесь и не будет показан на странице. Вы можете проверить и изменить его с помощью соответствующей функции редактирования.
Если вы подтверждаете, что содержание не содержит непристойной лексики/перенаправления на рекламу/насилия/вульгарной порнографии/нарушений/пиратства/ложного/незначительного или незаконного контента, связанного с национальными законами и предписаниями, вы можете нажать «Отправить» для подачи апелляции, и мы обработаем ее как можно скорее.
Опубликовать ( 0 )