ffmpeg-AVFrame

本文要介绍的结构体是 AVFrame.
AVFrame 是原始多媒体数据的抽象,用来描述解码后的原始音频数据(PCM)或者原始视频数据(YUV420, RGB等等).
即 存放编码前、解码后的原始数。

注意: AVFrame是表示解码后的原始数据,而AVPackets是表示编码后的数据

av_frame_alloc: 分配 AVFrame 内存
av_frame_free: 释放 AVFrame 内存

AVFrame 分配过一次内存后,就可以多次复用,来保存不同的数据(如一个 AVFrame 持有多个来自 decoder 的 frames). 在这种场景里,调用av_frame_unref() 来释放所有引用,同时会重置为初始状态,这样就能够继续复用了

通过 AVFrame 表示的数据,通常用 AVBuffer API 进行引用计数。底层缓冲区引用存储在 AVFrame.buf/AVFrame.extended_buf 中。

当 AVFrame 里有至少包含有一个数据时,我们就要考虑进行引用计数了。每一个 data plane 必须包含在 AVFrame.buf 或者 AVFrame.extended_buf 中的一个。对于所有数据,可能只有一个缓冲区,对于每个 plane ,可能有一个单独的缓冲区,或者介于两者之间的任何东西

通过 AVOptions 可以访问 Fields

AVFrame 里的 AVClass 可通过调用 avcodec_get_frame_class() 获得
源码在 libavutil/frame.h 文件中

typedef struct AVFrame {
#define AV_NUM_DATA_POINTERS 8
    /**
    * 图像数据(picture/channel), 通过第一个字节的分配来区分
    一些解码器访问0,0-宽度,高度以外的区域,请参见avcodec_align_dimensions2()。
    一些过滤器和swscale 最多可以读取超出 plane 16个字节,如果要使用这些过滤器,必须分配16个额外字节。
    注意:除了hwaccel格式外,当封装格式不需要该指针对象时必须设置为空
    */
    uint8_t *data[AV_NUM_DATA_POINTERS];

    /**
    * 对于视频,每行 picture 的大小以字节为单位。
    * 对于音频,每个 plane 的大小以字节为单位。
    *
    * 对于音频,只有 linesize[0] 可以被设置. 对于 planar audio, 每个 channel plane 必须设置一样的大小.
    *
    * For video the linesizes should be multiples of the CPUs alignment
    * preference, this is 16 or 32 for modern desktop CPUs.
    * Some code requires such alignment other code can be slower without
    * correct alignment, for yet other it makes no difference.
    data中“一行”数据的大小。注意:未必等于图像的宽,一般大于图像的宽。
    */
    int linesize[AV_NUM_DATA_POINTERS];

    /**
    * pointers to the data planes/channels.
    *
    * For video, this should simply point to data[].
    *
    * For planar audio, each channel has a separate data pointer, and
    * linesize[0] contains the size of each channel buffer.
    * For packed audio, there is just one data pointer, and linesize[0]
    * contains the total size of the buffer for all channels.
    *
    * Note: Both data and extended_data should always be set in a valid frame,
    * but for planar audio with more channels that can fit in data,
    * extended_data must be used in order to access all channels.
    */
    uint8_t **extended_data;

    /* 分辨率 */
    int width, height;
    /* 音频的一个AVFrame中可能包含多个音频帧,在此标记包含了几个 */
    int nb_samples;

    /**
    * 帧格式, -1 为未知格式
    * 数据为视频帧时,值为 enum AVPixelFormat 里的枚举值,
    * 为音频时,值为 enum AVSampleFormat 里的枚举值
    */
    int format;

    /**
    * 1 -> 关键帧, 0-> 非关键帧
    */
    int key_frame;

    /* 帧的类型(I,B,P...)*/
    enum AVPictureType pict_type;

    /**
    * 视频帧的宽高比, 0/1 未知/未指定, 16:9,4:3...
    */
    AVRational sample_aspect_ratio;

    /* 显示时间戳 */
    int64_t pts;
    ...
    /**
    * DTS copied from the AVPacket that triggered returning this frame. (if frame threading isn't used)
    * This is also the Presentation time of this AVFrame calculated from
    * only AVPacket.dts values without pts values.
    */
    int64_t pkt_dts;

    /** 编码帧序号 */
    int coded_picture_number;
    /* 显示帧序号 */
    int display_picture_number;

    /** 质量 (1 (good) ~ FF_LAMBDA_MAX (bad))*/
    int quality;

    /** 用户的私有数据 */
    void *opaque;
    ...
    /**
    * 解码时,repeat_pict 表示 picture 延迟的时长.
    * extra_delay = repeat_pict / (2*fps)
    */
    int repeat_pict;

    /** 是否是隔行扫描 */
    int interlaced_frame;

    /**
    * If the content is interlaced, is top field displayed first.
    */
    int top_field_first;

    /**
    * Tell user application that palette has changed from previous frame.
    */
    int palette_has_changed;

    /**
    * reordered opaque 64 bits (generally an integer or a double precision float
    * PTS but can be anything).
    * The user sets AVCodecContext.reordered_opaque to represent the input at
    * that time,
    * the decoder reorders values as needed and sets AVFrame.reordered_opaque
    * to exactly one of the values provided by the user through AVCodecContext.reordered_opaque
    */
    int64_t reordered_opaque;

    /**
    * 音频数据的采样率
    */
    int sample_rate;

    /**
    * 音频数据的 Channel layout 参数
    */
    uint64_t channel_layout;

    /**
    * AVBuffer引用备份此帧的数据。如果此数组的所有元素都为空,则此帧
    * 不计算引用。这个数组必须连续填充,每个 data plane 最多可以有
    * 一个AVBuffer,因此对于视频,这个数组始终包含所有引用。对于带有* 超过 AV_NUM_DATA_POINTERS channels 数量的 planar 
    * audio,需要更多的缓冲区来存储。 因此,额外的 AVBufferRef 指针* 存储在extended_buf 中
    */
    AVBufferRef *buf[AV_NUM_DATA_POINTERS];

    /**
    *存储无法放入 AVFrame.buf 的数据
    * 注意:不同于始终包含所有指针的 AVFrame.extended_data, extended_buf 只包含了 无法存放在 AVFrame.buf 里,多出来的指针
    * 分配空间 av_malloc() 
    * 释放空间 av_frame_unref()
    */
    AVBufferRef **extended_buf;
    /**
    * extended_buf 里元素的数量.
    */
    int        nb_extended_buf;

    AVFrameSideData **side_data;
    int            nb_side_data;
    ...
    /**
    * Frame flags, a combination of @ref lavu_frame_flags
    */
    int flags;

    /**
    * MPEG vs JPEG YUV range.
    * - encoding: Set by user
    * - decoding: Set by libavcodec
    */
    enum AVColorRange color_range;

    enum AVColorPrimaries color_primaries;

    enum AVColorTransferCharacteristic color_trc;

    /**
    * YUV colorspace type.
    * - encoding: Set by user
    * - decoding: Set by libavcodec
    */
    enum AVColorSpace colorspace;

    enum AVChromaLocation chroma_location;

    /**
    * frame 最优时间戳
    * - encoding: unused
    * - decoding: set by libavcodec, read by user.
    */
    int64_t best_effort_timestamp;

    /**
    * 最后一个 AVPacket 位置
    * - encoding: unused
    * - decoding: Read by user.
    */
    int64_t pkt_pos;

    /**
    * packet 数据对应的时长
    * - encoding: unused
    * - decoding: Read by user.
    */
    int64_t pkt_duration;

    /**
    * metadata.
    * - encoding: Set by user.
    * - decoding: Set by libavcodec.
    */
    AVDictionary *metadata;

    /**
    * decode error flags of the frame, set to a combination of
    * FF_DECODE_ERROR_xxx flags if the decoder produced a frame, but there
    * were errors during the decoding.
    * - encoding: unused
    * - decoding: set by libavcodec, read by user.
    */
    int decode_error_flags;
#define FF_DECODE_ERROR_INVALID_BITSTREAM   1
#define FF_DECODE_ERROR_MISSING_REFERENCE   2
#define FF_DECODE_ERROR_CONCEALMENT_ACTIVE  4
#define FF_DECODE_ERROR_DECODE_SLICES       8

    /**
    * number of audio channels, only used for audio.
    * - encoding: unused
    * - decoding: Read by user.
    */
    int channels;

    /**
    * size of the corresponding packet containing the compressed
    * frame.
    * It is set to a negative value if unknown.
    * - encoding: unused
    * - decoding: set by libavcodec, read by user.
    */
    int pkt_size;

// QP 功能已弃用
#if FF_API_FRAME_QP
    /**
    * QP table
    */
    attribute_deprecated
    int8_t *qscale_table;
    /**
    * QP store stride
    */
    attribute_deprecated
    int qstride;

    attribute_deprecated
    int qscale_type;

    attribute_deprecated
    AVBufferRef *qp_table_buf;
#endif
    /**
    * 对于 hwaccel-format 帧, hw_frames_ctx 指向 AVHWFramesContext 描述该 frame.
    */
    AVBufferRef *hw_frames_ctx;

    AVBufferRef *opaque_ref;

    /**
    * @anchor cropping
    * @name Cropping
    * Video frames only. The number of pixels to discard from the the
    * top/bottom/left/right border of the frame to obtain the sub-rectangle of
    * the frame intended for presentation.
    * @{
    */
    size_t crop_top;
    size_t crop_bottom;
    size_t crop_left;
    size_t crop_right;
    /**
    * @}
    */

    /**
    * AVBufferRef for internal use by a single libav* library.
    * Must not be used to transfer data between libraries.
    * Has to be NULL when ownership of the frame leaves the respective library.
    *
    * Code outside the FFmpeg libs should never check or change the contents of the buffer ref.
    *
    * FFmpeg calls av_buffer_unref() on it when the frame is unreferenced.
    * av_frame_copy_props() calls create a new reference with av_buffer_ref()
    * for the target frame's private_ref field.
    */
    AVBufferRef *private_ref;
} AVFrame;

其中 enum AVPictureType 枚举类型如下,

Enumerator
AV_PICTURE_TYPE_NONE Undefined
AV_PICTURE_TYPE_I Intra
AV_PICTURE_TYPE_P Predicted
AV_PICTURE_TYPE_B Bi-dir predicted
AV_PICTURE_TYPE_S S(GMC)-VOP MPEG-4
AV_PICTURE_TYPE_SI Switching Intra
AV_PICTURE_TYPE_SP Switching Predicted
AV_PICTURE_TYPE_BI BI type