Cleofwine | 开发者

音视频开发_MP4 分解与合成

最近更新：2024-09-23 | 字数总计：4.8k | 阅读估时：27分钟 | 阅读量：次

需求分析：
1. 需求定义
2. 主要功能分析
功能一(仅功能Demo)
1. 效果
功能二(工程化)

需求分析：

需求定义

功能一：MP4解复用解码并提取出YUV和PCM裸流
功能二：YUV和PCM裸流编码并合成MP4

主要功能分析

封装和解复用(MP4格式)
音频解码与视频解码(AAC与H264)
音频编码与视频编码(H264与AAC)
音频重采样(S16 to FLTP & FLTP to S16)

功能一(仅功能Demo)

#include <iostream>
extern "C"
{
#include "libavformat/avformat.h"
#include "libavcodec/avcodec.h"
#include "libavformat/avio.h"
}
#include <direct.h>
using namespace std;

// yuv文件输出位置
#define VIDEO_SAVE_PATH "yuv420_720x576_25fps.yuv"
// pcm文件输出位置
#define AUDIO_SAVE_PATH "44.1khz_2ch_s16.pcm"

static char err_buf[128] = {0};
static char* av_get_err(int errnum)
{
    av_strerror(errnum, err_buf, 128);
    return err_buf;
}

void audio_decode(AVCodecContext *dec_ctx, AVPacket *pkt, AVFrame *frame, FILE *outfile)
{
  int data_size;
  int ret = avcodec_send_packet(dec_ctx, pkt);
  if(ret == AVERROR(EAGAIN)){printf("当前状态不接受输⼊，⽤户必须先⽤avcodec_receive_frame() 读取数据帧；\n");}
  else if(ret < 0){printf("提交包发生错误，err:%s,pkt->size=%d\n",av_get_err(ret),pkt->size);return;}
  while(ret >= 0)
  {
    ret = avcodec_receive_frame(dec_ctx, frame);
    if(ret == AVERROR(EAGAIN)||ret == AVERROR_EOF) return;
    else if(ret < 0){printf("解码过程有误！\n");return;}
    data_size = av_get_bytes_per_sample(dec_ctx->sample_fmt);
    if(data_size < 0){printf("计算数据大小失败\n");return;}
    // 打印一些信息
    printf("ar-samplerate: %uHz\n", frame->sample_rate);
    printf("ac-channel: %u\n", frame->channels);
    printf("f-format: %u\n", frame->format); // 格式需要注意，实际存储到本地文件时已经改成交错模式
    // fltp采样格式需要重采样成s16
    if (frame->format == AVSampleFormat::AV_SAMPLE_FMT_FLTP)
    {
        AVFrame* s16_frame = AllocS16PcmFrame(frame->channels, frame->nb_samples);
        if (!s16_frame)
            return;
        AudioResample audio_resampler;
        int ret = audio_resampler.InitFromFLTPToS16(frame->channels, frame->sample_rate, frame->channels, frame->sample_rate);
        if (ret < 0)
        {
            return;
        }
        ret = audio_resampler.ResampleFromFLTPToS16(frame, s16_frame);
        if (ret < 0)
            return;
        data_size = av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
        if (data_size < 0) { printf("计算数据大小失败\n"); return; }
        for (int i = 0; i < s16_frame->nb_samples * 2; i++) {
            // for (int ch = 0; ch < s16_frame->channels; ch++) //以交错模式写入
                fwrite(s16_frame->data[0] + data_size * i, 1, data_size, outfile);
        }
        FreePCMFrame(s16_frame);
    }
    else {
        for (int i = 0; i < frame->nb_samples; i++) {
            for (int ch = 0; ch < dec_ctx->channels; ch++) //以交错模式写入
                fwrite(frame->data[ch] + data_size * i, 1, data_size, outfile);
        }
    }
  }
}

void video_decode(AVCodecContext *dec_ctx, AVPacket *pkt,AVFrame *frame, FILE *outfile)
{
  int data_size;
  int ret = avcodec_send_packet(dec_ctx, pkt);
  if(ret == AVERROR(EAGAIN)){printf("当前状态不接受输⼊，⽤户必须先⽤avcodec_receive_frame() 读取数据帧；\n");}
  else if(ret < 0){printf("提交包发生错误，err:%s,pkt->size=%d\n",av_get_err(ret),pkt->size);return;}
  while(ret >= 0)
  {
    ret = avcodec_receive_frame(dec_ctx, frame);
    if(ret == AVERROR(EAGAIN)||ret == AVERROR_EOF) return;
    else if(ret < 0){printf("解码过程有误！\n");return;}
    data_size = av_get_bytes_per_sample(dec_ctx->sample_fmt);
    if(data_size < 0){printf("计算数据大小失败\n");return;}
    // 打印一些信息
    printf("width: %u\n", frame->width);
    printf("height: %u\n", frame->height);
    printf("format: %u\n", frame->format);// 格式需要注意
    // 正确写法  linesize[]代表每行的字节数量，所以每行的偏移是linesize[]
    for(int j=0; j<frame->height; j++)
        fwrite(frame->data[0] + j * frame->linesize[0], 1, frame->width, outfile);
    for(int j=0; j<frame->height/2; j++)
        fwrite(frame->data[1] + j * frame->linesize[1], 1, frame->width/2, outfile);
    for(int j=0; j<frame->height/2; j++)
        fwrite(frame->data[2] + j * frame->linesize[2], 1, frame->width/2, outfile);
  }
}

void start_av_decode(AVFormatContext* fmt_context, int video_index, int audio_index)
{
  AVPacket* pkt = av_packet_alloc(); // 使用alloc必须进行free
  AVFrame *decoded_frame = NULL;
  if(!decoded_frame){
    if(!(decoded_frame = av_frame_alloc())){printf("不能分配帧内存\n");return;}
  }
  /* 初始化视频解码器 */
  const AVCodec *v_codec;
  AVCodecContext *v_codec_ctx = NULL;
  enum AVCodecID video_codec_id = AV_CODEC_ID_H264;

  // 查找解码器
  v_codec = avcodec_find_decoder(video_codec_id);
  if(!v_codec){printf("找不到解码器！\n");return;}
  // 分配解码器上下文
  v_codec_ctx = avcodec_alloc_context3(v_codec);
  if(!v_codec_ctx){printf("无法分配解码器上下文！\n");return;}
  v_codec_ctx->pkt_timebase = fmt_context->streams[video_index]->time_base;
  avcodec_parameters_to_context(v_codec_ctx, fmt_context->streams[video_index]->codecpar);
  // 关联解码器和解码器上下文
  if(avcodec_open2(v_codec_ctx, v_codec, NULL) < 0){
    printf("无法打开解码器\n");return;
  }
  FILE* v_out_file = fopen(VIDEO_SAVE_PATH, "wb");

  /* 初始化音频解码器 */
  const AVCodec *a_codec;
  AVCodecContext *a_codec_ctx = NULL;
  enum AVCodecID audio_codec_id = AV_CODEC_ID_AAC;

  // 查找解码器
  a_codec = avcodec_find_decoder(audio_codec_id);
  if(!a_codec){printf("找不到解码器！\n");return;}
  // 分配解码器上下文
  a_codec_ctx = avcodec_alloc_context3(a_codec);
  if(!a_codec_ctx){printf("无法分配解码器上下文！\n");return;}
  // 关联解码器和解码器上下文
  a_codec_ctx->pkt_timebase = fmt_context->streams[audio_index]->time_base;
  avcodec_parameters_to_context(a_codec_ctx, fmt_context->streams[audio_index]->codecpar);
  if(avcodec_open2(a_codec_ctx, a_codec, NULL) < 0){
    printf("无法打开解码器\n");return;
  }
  FILE* a_out_file = fopen(AUDIO_SAVE_PATH, "wb");

  do
  {
    int ret = av_read_frame(fmt_context, pkt); // ！！实际得到的是packet
    if (ret < 0){printf("av_read_frame end\n");break;}
    if (pkt->stream_index == video_index) { // 视频包
        printf("A video packet\n");
        video_decode(v_codec_ctx, pkt, decoded_frame, v_out_file);
    }
    else if (pkt->stream_index == audio_index) // 音频包
    {
        printf("A audio packet\n");
        audio_decode(a_codec_ctx, pkt, decoded_frame, a_out_file);
    }
    printf("pts:%lld\n", pkt->pts);
    printf("dts:%lld\n", pkt->dts);
    printf("size:%d\n", pkt->size);
    printf("pos:%lld\n", pkt->pos);
    printf("duration:%llf\n", pkt->duration * av_q2d(fmt_context->streams[pkt->stream_index]->time_base));
    // 每次循环结束释放掉上一帧的数据
    av_packet_unref(pkt); printf("\n");
  } while (1);

  // 冲刷编码器
  pkt->data = NULL; pkt->size = 0;
  video_decode(v_codec_ctx, pkt, decoded_frame, v_out_file);
  audio_decode(a_codec_ctx, pkt, decoded_frame, a_out_file);

  // 使用alloc必须进行free
  if (pkt){av_packet_free(&pkt);}

  // 清理内存
  fclose(v_out_file);
  fclose(a_out_file);
  avcodec_free_context(&v_codec_ctx);
  avcodec_free_context(&a_codec_ctx);
  av_frame_free(&decoded_frame);
  av_packet_free(&pkt);
}

void demux( const char* filepath )
{
    // AVFormatContext是描述一个媒体文件或媒体流的构成和基本信息的结构体
    AVFormatContext* fmt_context = NULL;
    int video_index = -1; // 存储视频索引
    int audio_index = -1; // 存储音频索引
    // 打开文件，主要是探测协议类型，如果是网络文件则创建网络链接，使用open必须使用close来关闭
    int ret = avformat_open_input(&fmt_context, filepath, NULL, NULL);
    if (ret < 0) { char buf[1024] = { 0 }; av_strerror(ret, buf, sizeof(buf)); printf("open %s failed: %s\n", filepath, buf); return; }
    ret = avformat_find_stream_info(fmt_context, NULL);
    if (ret < 0) { char buf[1024] = { 0 }; av_strerror(ret, buf, sizeof(buf)); printf("avformat_find_stream_info %s failed: %s\n", filepath, buf); return; }
    // dump媒体文件信息
    av_dump_format(fmt_context, 0, filepath, 0);
    // 打印context已经得到的信息
    {
        printf("media->url: %s\n", fmt_context->url);
        printf("media->bit_rate: %lld\n", fmt_context->bit_rate);
        printf("media->nb_streams: %d\n", fmt_context->nb_streams);
        // 时间计算 总秒数 * AV_TIME_BASE = duration
        int total_seconds = (fmt_context->duration) / AV_TIME_BASE;
        int hours = total_seconds / 3600; int minutes = total_seconds % 3600 / 60; int seconds = total_seconds % 3600 % 60;
        printf("media->duration: %d-%d-%d\n", hours, minutes, seconds); printf("\n");
    }
    // 使用遍历来找出媒体流
    for (auto i = 0; i < fmt_context->nb_streams; i++)
    {
        AVStream* is = fmt_context->streams[i]; // 音频流、视频流、字幕流
        if (AVMEDIA_TYPE_AUDIO == is->codecpar->codec_type) // 音频流
        {
            audio_index = i;
            printf("stream index:%d\n", is->index); // 流唯一索引
            printf("audio sample_rate:%d Hz\n", is->codecpar->sample_rate); // 音频采样率
            switch (is->codecpar->format){ // 音频采样格式
            case AV_SAMPLE_FMT_FLTP: printf("sampleformat:AV_SAMPLE_FMT_FLTP\n"); break;
            case AV_SAMPLE_FMT_S16P: printf("sampleformat:AV_SAMPLE_FMT_S16P\n"); break;
            default:break;}
            printf("audio channels: %d\n", is->codecpar->channels); // 音频信道数
            switch (is->codecpar->codec_id){ // 音频编码格式
            case AV_CODEC_ID_AAC:printf("audio codec:AAC\n"); break;
            case AV_CODEC_ID_MP3:printf("audio codec:MP3\n"); break;
            default:printf("audio codec_id:%d\n", is->codecpar->codec_id);break;}
            if (is->duration != AV_NOPTS_VALUE) // flv格式这里读不出时长
            {
                int duration_audio_seconds = is->duration * av_q2d(is->time_base); // 音频时长
                printf("audio duration: %02d:%02d:%02d\n", duration_audio_seconds / 3600, (duration_audio_seconds % 3600) / 60, (duration_audio_seconds % 60));
            }
            else { printf("duration unknown\n"); };
        }
        else if (AVMEDIA_TYPE_VIDEO == is->codecpar->codec_type) // 视频流
        {
            video_index = i;
            printf("stream index:%d\n", is->index); // 流唯一索引
            printf("video fps: %llf / s\n", av_q2d(is->avg_frame_rate)); // 视频帧率
            switch (is->codecpar->codec_id) { // 视频编码格式
            case AV_CODEC_ID_MPEG4: printf("video codec:MPEG4\n"); break;
            case AV_CODEC_ID_H264: printf("video codec:H264\n"); break;
            default:printf("video codec_id:%d\n", is->codecpar->codec_id);break;}
            printf("video width:%d px height:%d px \n", is->codecpar->width, is->codecpar->height); // 视频帧宽度和帧高度
            if (is->duration != AV_NOPTS_VALUE) // flv格式这里读不出时长
            {
                int duration_video_seconds = is->duration * av_q2d(is->time_base); // 视频时长
                printf("video duration: %02d:%02d:%02d\n", duration_video_seconds / 3600, (duration_video_seconds % 3600) / 60, (duration_video_seconds % 60));
            }
            else { printf("duration unknown\n"); };
        }
    }
    printf("\n");
    // 开始解码
    start_av_decode(fmt_context, video_index, audio_index);
    // 使用open必须使用close来关闭
    if (fmt_context){avformat_close_input(&fmt_context);}
}

int main()
{
    cout << "Hello World!" << endl;
    demux("sound_in_sync_test.mp4");
    return 0;
}

效果

功能二(工程化)

音频重采样类

AVFrame *AllocFltpPcmFrame(int channels, int sample_per_channel);
void FreePCMFrame(AVFrame *frame);

AVFrame* AllocS16PcmFrame(int channels, int sample_per_channel);

class AudioResample
{
public:
    AudioResample();
    ~AudioResample();
    int InitFromS16ToFLTP(int in_channels, int in_sample_rate, int out_channels, int out_sample_rate);
    int InitFromFLTPToS16(int in_channels, int in_sample_rate, int out_channels, int out_sample_rate);
    int ResampleFromS16ToFLTP(uint8_t* in_data, AVFrame *out_frame);
    int ResampleFromFLTPToS16(AVFrame* in_frame, AVFrame* out_frame);
    void DeInit();
private:
    int in_channels_;
    int in_sample_rate_;
    int out_channels_;
    int out_sample_rate_;
    SwrContext* ctx_ = NULL;
};

#include "audioresample.h"

AudioResample::AudioResample()
{

}

AudioResample::~AudioResample()
{
    if(ctx_)
        DeInit();
}

int AudioResample::InitFromS16ToFLTP(int in_channels, int in_sample_rate, int out_channels, int out_sample_rate)
{
    in_channels_ = in_channels;
    in_sample_rate_ = in_sample_rate;
    out_channels_ = out_channels;
    out_sample_rate_ = out_sample_rate;

    ctx_ = swr_alloc_set_opts(ctx_, 
                                av_get_default_channel_layout(out_channels_),
                                AV_SAMPLE_FMT_FLTP,
                                out_sample_rate_,
                                av_get_default_channel_layout(in_channels_),
                                AV_SAMPLE_FMT_S16,
                                in_sample_rate_,
                                0,
                                NULL);
    if(!ctx_){
        printf("swr_alloc_set_opts failed\n");
        return -1;
    }
    int ret = swr_init(ctx_);
    if(ret < 0)
    {
        char errbuf[1024] = {0};
        av_strerror(ret, errbuf, sizeof(errbuf) - 1);
        printf("swr_init failed:%s\n", errbuf);
        return NULL;
    }
    return 0;
}

int AudioResample::InitFromFLTPToS16(int in_channels, int in_sample_rate, int out_channels, int out_sample_rate)
{
    in_channels_ = in_channels;
    in_sample_rate_ = in_sample_rate;
    out_channels_ = out_channels;
    out_sample_rate_ = out_sample_rate;

    ctx_ = swr_alloc_set_opts(ctx_,
        av_get_default_channel_layout(out_channels_),
        AV_SAMPLE_FMT_S16,
        out_sample_rate_,
        av_get_default_channel_layout(in_channels_),
        AV_SAMPLE_FMT_FLTP,
        in_sample_rate_,
        0,
        NULL);
    if (!ctx_) {
        printf("swr_alloc_set_opts failed\n");
        return -1;
    }
    int ret = swr_init(ctx_);
    if (ret < 0)
    {
        char errbuf[1024] = { 0 };
        av_strerror(ret, errbuf, sizeof(errbuf) - 1);
        printf("swr_init failed:%s\n", errbuf);
        return NULL;
    }
    return 0;
}

int AudioResample::ResampleFromFLTPToS16(AVFrame* in_frame, AVFrame* out_frame)
{
    const uint8_t* indata[AV_NUM_DATA_POINTERS] = {0};
    for (int i = 0; i < AV_NUM_DATA_POINTERS; i++)
    {
        indata[i] = in_frame->data[i];
    }
    int len = swr_convert(ctx_,
        out_frame->data,
        out_frame->nb_samples,
        indata,
        out_frame->nb_samples); // 简化处理，因为采样率没变
    if (len <= 0)
    {
        return -1;
    }
    return len;
}


int AudioResample::ResampleFromS16ToFLTP(uint8_t *in_data, AVFrame *out_frame)
{
    if (!in_data)
    {
        return -1;
    }
    const uint8_t *indata[AV_NUM_DATA_POINTERS] = {0};
    indata[0] = in_data;
    int len = swr_convert(ctx_,
                          out_frame->data,
                          out_frame->nb_samples,
                          indata,
                          out_frame->nb_samples); // 简化处理，因为采样率没变
    if(len <= 0)
    {
        return -1;
    }
    return len;
}

void AudioResample::DeInit()
{
    if(ctx_){
        swr_free(&ctx_);
    }
}

AVFrame *AllocFltpPcmFrame(int channels, int sample_per_channel)
{
    AVFrame* pcm = NULL;
    pcm = av_frame_alloc();
    if (!pcm)
    {
        return NULL;
    }
    pcm->format = AV_SAMPLE_FMT_FLTP;
    pcm->channels = channels;
    pcm->channel_layout = av_get_default_channel_layout(channels);
    pcm->nb_samples = sample_per_channel;

    int ret = av_frame_get_buffer(pcm, 0);
    if(ret != 0)
    {
        char errbuf[1024] = {0};
        av_strerror(ret, errbuf, sizeof(errbuf) - 1);
        printf("av_frame_get_buffer failed:%s\n", errbuf);
        av_frame_free(&pcm);
        return NULL;
    }
    return pcm;
}

AVFrame* AllocS16PcmFrame(int channels, int sample_per_channel)
{
    AVFrame* pcm = NULL;
    pcm = av_frame_alloc();
    if (!pcm)
    {
        return NULL;
    }
    pcm->format = AV_SAMPLE_FMT_S16;
    pcm->channels = channels;
    pcm->channel_layout = av_get_default_channel_layout(channels);
    pcm->nb_samples = sample_per_channel;

    int ret = av_frame_get_buffer(pcm, 0);
    if (ret != 0)
    {
        char errbuf[1024] = { 0 };
        av_strerror(ret, errbuf, sizeof(errbuf) - 1);
        printf("av_frame_get_buffer failed:%s\n", errbuf);
        av_frame_free(&pcm);
        return NULL;
    }
    return pcm;
}

void FreePCMFrame(AVFrame *frame)
{
    if(frame)
    {
        av_frame_free(&frame);
    }
}

音频编码类

class AudioEncoder
{
public:
    AudioEncoder();
    ~AudioEncoder();
    int InitAAC(int channels, int sample_rate, int bit_rate);
    // int InitMP3(/*int channels, int sample_rate, int bit_rate*/);
    void DeInit();
    // AVPacket *Encode(AVFrame *frame, int stream_index, int64_t pts, int64_t time_base);
    int Encode(AVFrame *frame, int stream_index, int64_t pts, int64_t time_base, std::vector<AVPacket*> &packets);
    int GetFrameSize(); // 获取一帧数据 每个通道需要多少个采样点
    int GetSampleFormat(); // 编码器需要的采样格式
    AVCodecContext *GetCodecContext();
    int GetChannels();
    int GetSampleRate();
private:
    int channels_ = 2;
    int sample_rate_ = 44100;
    int bit_rate_ = 128*1024;
    int64_t pts_ = 0;
    AVCodecContext * codec_ctx_ = NULL;
};

#include "audioencoder.h"

AudioEncoder::AudioEncoder()
{

}

AudioEncoder::~AudioEncoder()
{
    if(codec_ctx_){
        DeInit();
    }
}

int AudioEncoder::InitAAC(int channels, int sample_rate, int bit_rate)
{
    channels_ = channels;
    sample_rate_ = sample_rate;
    bit_rate_ = bit_rate;

    AVCodec* codec = avcodec_find_encoder(AV_CODEC_ID_AAC);
    if(!codec)
    {
        printf("avcodec_find_encoder AV_CODEC_ID_AAC failed\n");
        return -1;
    }
    codec_ctx_ = avcodec_alloc_context3(codec);
    if(!codec_ctx_)
    {
        printf("avcodec_alloc_context3 AV_CODEC_ID_AAC failed\n");
        return -1;
    }
    codec_ctx_ -> flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
    codec_ctx_ -> bit_rate = bit_rate_;
    codec_ctx_ -> sample_rate = sample_rate_;
    codec_ctx_ -> sample_fmt = AV_SAMPLE_FMT_FLTP;
    codec_ctx_ -> channels = channels_;
    codec_ctx_ -> channel_layout = av_get_default_channel_layout(codec_ctx_->channels);

    int ret = avcodec_open2(codec_ctx_, NULL, NULL);
    if(ret != 0)
    {
        char errbuf[1024] = {0};
        av_strerror(ret, errbuf, sizeof(errbuf) - 1);
        printf("avcodec_open2 failed:%s\n", errbuf);
        return -1;
    }
    printf("InitAAC Success! \n");
    return 0;
}

void AudioEncoder::DeInit()
{
    if(codec_ctx_)
    {
        avcodec_free_context(&codec_ctx_);
        // codec_ctx_ = NULL; // 画蛇添足
    }
}

AVPacket *AudioEncoder::Encode(AVFrame *frame, int stream_index, int64_t pts, int64_t time_base)
{
    if(!codec_ctx_)
    {
        printf("codec_ctx_ null\n");
        return NULL;
    }
    pts = av_rescale_q(pts, AVRational{1, (int)time_base}, codec_ctx_->time_base);
    if(frame)
    {
        frame -> pts = pts;
    }
    int ret = avcodec_send_frame(codec_ctx_, frame);
    if(ret != 0)
    {
        char errbuf[1024] = {0};
        av_strerror(ret, errbuf, sizeof(errbuf) - 1);
        printf("avcodec_send_frame failed:%s\n", errbuf);
        return NULL;
    }
    AVPacket *packet = av_packet_alloc();
    // 这里如果要严谨，需要多次receive
    ret = avcodec_receive_packet(codec_ctx_, packet);
    if(ret != 0)
    {
        char errbuf[1024] = {0};
        av_strerror(ret, errbuf, sizeof(errbuf) - 1);
        printf("avcodec_receive_packet failed:%s\n", errbuf);
        av_packet_free(&packet);
        return NULL;
    }
    packet->stream_index = stream_index;
    return packet;
}

int AudioEncoder::Encode(AVFrame *frame, int stream_index, int64_t pts, int64_t time_base, std::vector<AVPacket *> &packets)
{
    if(!codec_ctx_)
    {
        printf("codec_ctx_ null\n");
        return -1;
    }
    pts = av_rescale_q(pts, AVRational{1, (int)time_base}, codec_ctx_->time_base);
    if(frame)
    {
        frame -> pts = pts;
    }
    int ret = avcodec_send_frame(codec_ctx_, frame);
    if(ret != 0)
    {
        char errbuf[1024] = {0};
        av_strerror(ret, errbuf, sizeof(errbuf) - 1);
        printf("avcodec_send_frame failed:%s\n", errbuf);
        return -1;
    }
    while(1){
        AVPacket *packet = av_packet_alloc();
        ret = avcodec_receive_packet(codec_ctx_, packet);
        packet -> stream_index = stream_index;
        if(ret == AVERROR(EAGAIN) || ret == AVERROR_EOF){
            ret = 0;
            av_packet_free(&packet);
            break;
        }
        else if(ret < 0){
            char errbuf[1024] = {0};
            av_strerror(ret, errbuf, sizeof(errbuf) - 1);
            printf("avcodec_receive_packet failed:%s\n", errbuf);
            av_packet_free(&packet);
            ret = -1;
        }
        packets.push_back(packet);
    }
    return ret;
}

int AudioEncoder::GetFrameSize()
{
    if(codec_ctx_)
        return codec_ctx_ -> frame_size;
    return 0;
}

int AudioEncoder::GetSampleFormat()
{
    if(codec_ctx_)
        return codec_ctx_ -> sample_fmt;
    return -1; // AV_SAMPLE_FMT_NONE
}

AVCodecContext *AudioEncoder::GetCodecContext()
{
    return codec_ctx_;
}

int AudioEncoder::GetChannels()
{
    if(codec_ctx_)
        return codec_ctx_->channels;
    return -1;
}

int AudioEncoder::GetSampleRate()
{
    if(codec_ctx_)
        return codec_ctx_->sample_rate;
    return -1;
}

视频编码类

class VideoEncoder
{
public:
    VideoEncoder();
    ~VideoEncoder();
    int InitH264(int width, int height, int fps, int bit_rate);
    // AVPacket *Encode(uint8_t* yuv_data, int yuv_size, int stream_index, int64_t pts, int64_t time_base);
    int Encode(uint8_t* yuv_data, int yuv_size, int stream_index, int64_t pts, int64_t time_base, std::vector<AVPacket*> &packets);
    void DeInit();
    AVCodecContext *GetCodecContext();
private:
    int width_ = 0;
    int height_ = 0;
    int fps_ = 0;
    int bit_rate_ = 500*1024;
    int64_t pts_ = 0;
    AVCodecContext * codec_ctx_ = NULL;
    AVFrame* frame_ = NULL;
    AVDictionary* dict_ = NULL;
};

#include "videoencoder.h"

VideoEncoder::VideoEncoder()
{

}

VideoEncoder::~VideoEncoder()
{
    if(codec_ctx_)
    {
        DeInit();
    }
}

int VideoEncoder::InitH264(int width, int height, int fps, int bit_rate)
{
    width_ = width;
    height_ = height;
    fps_ = fps;
    bit_rate_ = bit_rate;

    AVCodec* codec = avcodec_find_encoder(AV_CODEC_ID_H264);
    if(!codec)
    {
        printf("avcodec_find_encoder AV_CODEC_ID_H264 failed\n");
        return -1;
    }
    codec_ctx_ = avcodec_alloc_context3(codec);
    if(!codec_ctx_)
    {
        printf("avcodec_alloc_context3 AV_CODEC_ID_H264 failed\n");
        return -1;
    }
    codec_ctx_ -> flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
    codec_ctx_ -> bit_rate = bit_rate_;
    codec_ctx_ -> width = width_;
    codec_ctx_ -> height = height_;
    codec_ctx_ -> framerate = {fps_ , 1};
    codec_ctx_ -> time_base = {1, 1000000}; // 单位是微妙
    codec_ctx_ -> gop_size = fps_;
    codec_ctx_ -> max_b_frames = 0;
    codec_ctx_ -> pix_fmt = AV_PIX_FMT_YUV420P;
    av_dict_set(&dict_, "tune", "zerolatency", 0);

    int ret = avcodec_open2(codec_ctx_, NULL, &dict_);
    if(ret != 0)
    {
        char errbuf[1024] = {0};
        av_strerror(ret, errbuf, sizeof(errbuf) - 1);
        printf("avcodec_open2 failed:%s\n", errbuf);
        return -1;
    }
    frame_ = av_frame_alloc();
    if(!frame_)
    {
        printf("av_frame_alloc failed\n");
        return -1;
    }
    frame_ -> width = width_;
    frame_ -> height = height_;
    frame_ -> format = codec_ctx_ -> pix_fmt;

    printf("InitH264 Success! \n");
    return 0;
}

AVPacket *VideoEncoder::Encode(uint8_t *yuv_data, int yuv_size, int stream_index, int64_t pts, int64_t time_base)
{
    if(!codec_ctx_)
    {
        printf("codec_ctx_ null\n");
        return NULL;
    }
    int ret = 0;
    pts = av_rescale_q(pts, AVRational{1, (int)time_base}, codec_ctx_->time_base);
    frame_ -> pts = pts;
    if(yuv_data)
    {
        int ret_size = av_image_fill_arrays(frame_->data,frame_->linesize,
                                            yuv_data, (AVPixelFormat)frame_->format,
                             frame_->width, frame_->height, 1);
        if(ret_size != yuv_size){
            printf("ret_size: %d != yuv_size: %d\n", ret_size, yuv_size);
            return NULL;
        }
        ret = avcodec_send_frame(codec_ctx_, frame_);
    }
    else{ // yuv_data为空时，执行flush机制
        ret = avcodec_send_frame(codec_ctx_, NULL);
    }
    if(ret != 0)
    {
        char errbuf[1024] = {0};
        av_strerror(ret, errbuf, sizeof(errbuf) - 1);
        printf("avcodec_send_frame failed:%s\n", errbuf);
        return NULL;
    }
    AVPacket *packet = av_packet_alloc();
    // 这里如果要严谨，需要多次receive
    ret = avcodec_receive_packet(codec_ctx_, packet);
    if(ret != 0)
    {
        char errbuf[1024] = {0};
        av_strerror(ret, errbuf, sizeof(errbuf) - 1);
        printf("avcodec_receive_packet failed:%s\n", errbuf);
        av_packet_free(&packet);
        return NULL;
    }
    packet->stream_index = stream_index;
    return packet;
}

int VideoEncoder::Encode(uint8_t *yuv_data, int yuv_size, int stream_index, int64_t pts, int64_t time_base, std::vector<AVPacket *> &packets)
{
    if(!codec_ctx_)
    {
        printf("codec_ctx_ null\n");
        return -1;
    }
    int ret = 0;
    pts = av_rescale_q(pts, AVRational{1, (int)time_base}, codec_ctx_->time_base);
    frame_ -> pts = pts;
    if(yuv_data)
    {
        int ret_size = av_image_fill_arrays(frame_->data,frame_->linesize,
                                            yuv_data, (AVPixelFormat)frame_->format,
                                            frame_->width, frame_->height, 1);
        if(ret_size != yuv_size){
            printf("ret_size: %d != yuv_size: %d\n", ret_size, yuv_size);
            return -1;
        }
        ret = avcodec_send_frame(codec_ctx_, frame_);
    }
    else{ // yuv_data为空时，执行flush机制
        ret = avcodec_send_frame(codec_ctx_, NULL);
    }
    if(ret != 0)
    {
        char errbuf[1024] = {0};
        av_strerror(ret, errbuf, sizeof(errbuf) - 1);
        printf("avcodec_send_frame failed:%s\n", errbuf);
        return -1;
    }
    while(1){
        AVPacket *packet = av_packet_alloc();
        ret = avcodec_receive_packet(codec_ctx_, packet);
        packet -> stream_index = stream_index;
        if(ret == AVERROR(EAGAIN) || ret == AVERROR_EOF){
            ret = 0;
            av_packet_free(&packet);
            break;
        }
        else if(ret < 0){
            char errbuf[1024] = {0};
            av_strerror(ret, errbuf, sizeof(errbuf) - 1);
            printf("avcodec_receive_packet failed:%s\n", errbuf);
            av_packet_free(&packet);
            ret = -1;
            break;
        }
        packets.push_back(packet);
    }
    return ret;
}

void VideoEncoder::DeInit()
{
    if(codec_ctx_)
    {
        avcodec_free_context(&codec_ctx_);
    }
    if(frame_)
    {
        av_frame_free(&frame_);
    }
    if(dict_)
    {
        av_dict_free(&dict_);
    }
}

AVCodecContext *VideoEncoder::GetCodecContext()
{
    return codec_ctx_;
}

封装类

class Muxer
{
public:
    Muxer();
    ~Muxer();
    // 输出文件 返回 < 0值异常
    // 初始化
    int Init(const char *url);
    // 资源释放
    void DeInit();
    // 创建流
    int AddStream(AVCodecContext *codec_ctx);
    // 写流
    int SendHeader();
    int SendPacket(AVPacket* packet);
    int SendTrailer();

    // avio open
    int Open();

    int GetAudioStreamIndex();
    int GetVideoStreamIndex();
private:
    AVFormatContext *fmt_ctx_ = NULL;
    std::string url_ = "";

    // 编码器上下文
    AVCodecContext *aud_codec_ctx_ = NULL;
    AVStream *aud_stream_ = NULL;
    AVCodecContext *vid_codec_ctx_ = NULL;
    AVStream *vid_stream_ = NULL;

    int audio_index_ = -1;
    int video_index_ = -1;
};

#include "muxer.h"


Muxer::Muxer()
{

}

Muxer::~Muxer()
{

}

int Muxer::Init(const char *url)
{
    int ret = avformat_alloc_output_context2(&fmt_ctx_, NULL, NULL, url);
    if(ret < 0)
    {
        char errbuf[1024] = {0};
        av_strerror(ret, errbuf, sizeof(errbuf) - 1);
        printf("avformat_alloc_output_context2 failed:%s\n", errbuf);
        return -1;
    }
    url_ = url;
    return 0;
}

void Muxer::DeInit()
{
    if(fmt_ctx_)
    {
        avformat_close_input(&fmt_ctx_);
    }
    url_ = "";
    aud_codec_ctx_ = NULL;
    aud_stream_ = NULL;
    vid_codec_ctx_ = NULL;
    vid_stream_ = NULL;
    audio_index_ = -1;
    video_index_ = -1;
}

int Muxer::AddStream(AVCodecContext *codec_ctx)
{
    if(!fmt_ctx_){
        printf("fmt ctx is NULL\n");
        return -1;
    }
    if(!codec_ctx)
    {
        printf("codec ctx is NULL\n");
        return -1;
    }
    AVStream *st = avformat_new_stream(fmt_ctx_, NULL);
    if(!st)
    {
        printf("avformat_new_stream failed!\n");
        return -1;
    }
    // st->codecpar->codec_tag = 0;
    // 选择从编码器上下文复制
    avcodec_parameters_from_context(st->codecpar, codec_ctx);
    av_dump_format(fmt_ctx_, 0, url_.c_str(), 1);

    // 判断当前是音频还是视频流
    if(codec_ctx->codec_type == AVMEDIA_TYPE_AUDIO){
        aud_codec_ctx_ = codec_ctx;
        aud_stream_ = st;
        audio_index_ = st->index;
    }  else  if(codec_ctx->codec_type == AVMEDIA_TYPE_VIDEO){
        vid_codec_ctx_ = codec_ctx;
        vid_stream_ = st;
        video_index_ = st->index;
    }
    return 0;
}

int Muxer::SendHeader()
{
    if(!fmt_ctx_){
        printf("fmt ctx is NULL\n");
        return -1;
    }
    int ret = avformat_write_header(fmt_ctx_, NULL);
    if(ret != 0)
    {
        char errbuf[1024] = {0};
        av_strerror(ret, errbuf, sizeof(errbuf) - 1);
        printf("avformat_write_header failed:%s\n", errbuf);
        return -1;
    }
    return 0;
}

int Muxer::SendPacket(AVPacket *packet)
{
    int stream_index = packet->stream_index;
    printf("index:%d, pts:%lld\n", stream_index, packet -> pts);
    if(!packet || packet->size <= 0 || !packet->data)
    {
        printf("packet is NULL \n");
        if(packet)
            av_packet_free(&packet);
        return -1;
    }
    AVRational src_time_base; // 编码后的包
    AVRational dst_time_base; // MP4文件对应的time_base
    if(vid_stream_ && vid_codec_ctx_ && stream_index == video_index_)
    {
        src_time_base = vid_codec_ctx_->time_base;
        dst_time_base = vid_stream_->time_base;
    }
    else if(aud_stream_ && aud_codec_ctx_ && stream_index == audio_index_)
    {
        src_time_base = aud_codec_ctx_->time_base;
        dst_time_base = aud_stream_->time_base;
    }
    // 时间基转换
    packet->pts = av_rescale_q(packet->pts, src_time_base, dst_time_base);
    packet->dts = av_rescale_q(packet->dts, src_time_base, dst_time_base);
    packet->duration = av_rescale_q(packet->duration, src_time_base, dst_time_base);

    int ret = 0;
    ret = av_interleaved_write_frame(fmt_ctx_, packet); // 不是立即写入，有缓存
    // ret = av_write_frame(fmt_ctx_, packet); // 无缓存
    av_packet_free(&packet); // ????
    if(ret == 0)
        return 0;
    else{
        char errbuf[1024] = {0};
        av_strerror(ret, errbuf, sizeof(errbuf) - 1);
        printf("avformat_alloc_output_context2 failed:%s\n", errbuf);
        return -1;
    }
}

int Muxer::SendTrailer()
{
    if(!fmt_ctx_){
        printf("fmt ctx is NULL\n");
        return -1;
    }
    int ret = av_write_trailer(fmt_ctx_);
    if(ret != 0)
    {
        char errbuf[1024] = {0};
        av_strerror(ret, errbuf, sizeof(errbuf) - 1);
        printf("av_write_trailer failed:%s\n", errbuf);
        return -1;
    }
    return 0;
}

int Muxer::Open()
{
    int ret = avio_open(&fmt_ctx_->pb, url_.c_str(), AVIO_FLAG_WRITE);
    if(ret < 0)
    {
        char errbuf[1024] = {0};
        av_strerror(ret, errbuf, sizeof(errbuf) - 1);
        printf("avio_open %s failed:%s\n", url_.c_str(), errbuf);
        return -1;
    }

    return ret;
}

int Muxer::GetAudioStreamIndex()
{
    return audio_index_;
}

int Muxer::GetVideoStreamIndex()
{
    return video_index_;
}

Main Test

#include <iostream>
#include "audioencoder.h"
#include "videoencoder.h"
#include "audioresample.h"
#include "muxer.h"
#include "demux_main.h"
using namespace std;

#define YUV_WIDTH 720
#define YUV_HEIGHT 576
#define YUV_FPS 25

#define VIDEO_BIT_RATE 500 * 1024


#define PCM_SAMPLE_FORMAT AV_SAMPLE_FMT_S16
#define PCM_SAMPLE_RATE 44100
#define PCM_CHANNELS 2

#define AUDIO_BIT_RATE 128*1024

#define AUDIO_TIME_BASE 1000000
#define VIDEO_TIME_BASE 1000000

// ffmpeg -i sound_in_sync_test.mp4 -pix_fmt yuv420p 720x576_yuv420p.yuv
// ffmpeg -i sound_in_sync_test.mp4 -vn -ar 44100 -ac 2 -f s16le 44100_2_s16le.pcm
// 执行文件 yuv文件 pcm文件 输出mp4文件
int main(int argc, char **argv)
{
    // demux_main("sound_in_sync_test.mp4");
    // return 0;

    // std::cout << "hello world" << std::endl;
    if(argc != 4)
    {
        printf("usage -> exe in.yuv in.pcm out.mp4");
        return -1;
    }
    // 1. 打开yuv pcm文件
    char *in_yuv_name  = argv[1];
    char *in_pcm_name  = argv[2];
    char *out_mp4_name  = argv[3];
    FILE *in_yuv_fd = NULL;
    FILE *in_pcm_fd = NULL;
    in_yuv_fd = fopen(in_yuv_name, "rb");
    if( !in_yuv_fd )
    {
        printf("Failed to open %s file\n", in_yuv_name);
        return -1;
    }
    in_pcm_fd = fopen(in_pcm_name, "rb");
    if( !in_pcm_fd )
    {
        printf("Failed to open %s file\n", in_pcm_name);
        return -1;
    }

    int ret = 0;
    // 2. 初始化编码器，包括视频、音频编码器，分配yuv、pcm的帧buffer
    // 初始化video
    int yuv_width = YUV_WIDTH;
    int yuv_height = YUV_HEIGHT;
    int yuv_fps = YUV_FPS;
    int video_bit_rate = VIDEO_BIT_RATE;

    VideoEncoder video_encoder;
    ret = video_encoder.InitH264(yuv_width, yuv_height, yuv_fps, video_bit_rate);
    if(ret < 0)
    {
        printf("video_encoder.InitH264 err\n");
        return -1;
    }
    // 分配YUV Buffer
    int y_frame_size = yuv_width * yuv_height;
    int u_frame_size = yuv_width * yuv_height / 4;
    int v_frame_size = yuv_width * yuv_height / 4;
    int yuv_frame_size = y_frame_size + u_frame_size + v_frame_size;
    uint8_t *yuv_frame_buf = (uint8_t*)malloc(yuv_frame_size);
    if( !yuv_frame_buf )
    {
        printf("malloc yuv_frame_buf err\n");
        return -1;
    }
    // 初始化audio
    int pcm_channels = PCM_CHANNELS;
    int pcm_sample_rate = PCM_SAMPLE_RATE;
    int pcm_sample_format = PCM_SAMPLE_FORMAT;
    int audio_bit_rate = AUDIO_BIT_RATE;
    AudioEncoder audio_encoder;
    audio_encoder.InitAAC(pcm_channels, pcm_sample_rate, audio_bit_rate);
    if(ret < 0)
    {
        printf("audio_encoder.InitAAC err\n");
        return -1;
    }
    // pcm_frame_size = 单个采样点占用的字节 * 通道数 * 每个通道有多少个采样点
    int pcm_frame_size = av_get_bytes_per_sample((AVSampleFormat)pcm_sample_format)
                         * pcm_channels
                         * audio_encoder.GetFrameSize();
    if( pcm_frame_size <= 0 )
    {
        printf("pcm_frame_size <= 0\n");
        return -1;
    }
    uint8_t* pcm_frame_buf = (uint8_t*) malloc (pcm_frame_size);
    if(!pcm_frame_buf)
    {
        printf("malloc pcm_frame_buf err\n");
        return -1;
    }
    // 初始化重采样
    AudioResample audio_resample;
    ret = audio_resample.InitFromS16ToFLTP(pcm_channels, pcm_sample_rate,
                                    audio_encoder.GetChannels(),
                                     audio_encoder.GetSampleRate());
    if(ret < 0)
    {
        printf("audio_resample.InitFromS16ToFLTP err\n");
        return -1;
    }
    // 3. mp4初始化 包括新建流，open io，send header
    Muxer mp4_muxer;
    ret = mp4_muxer.Init(out_mp4_name);
    if(ret < 0)
    {
        printf("mp4_muxer.Init err\n");
        return -1;
    }
    ret = mp4_muxer.AddStream(video_encoder.GetCodecContext());
    if(ret < 0)
    {
        printf("mp4_muxer.AddStream video err\n");
        return -1;
    }
    ret = mp4_muxer.AddStream(audio_encoder.GetCodecContext());
    if(ret < 0)
    {
        printf("mp4_muxer.AddStream audio err\n");
        return -1;
    }
    ret = mp4_muxer.Open();
    if(ret < 0)
    {
        printf("mp4_muxer.Open err\n");
        return -1;
    }
    ret = mp4_muxer.SendHeader();
    if(ret < 0)
    {
        printf("mp4_muxer.SendHeader err\n");
        return -1;
    }
    // 4. 在while循环里读取yuv、pcm进行编码然后发送给MP4 muxer

    /*
        根据帧间隔去叠加时间戳，如视频帧25fps，帧间隔是40ms
        音频是44100hz，一帧1024个采样点，帧间隔23.219954…ms
    */

    double audio_pts = 0;
    double video_pts = 0;
    double audio_frame_duration = 1.0 * audio_encoder.GetFrameSize()
                                  / pcm_sample_rate
                                  * AUDIO_TIME_BASE;
    double video_frame_duration = 1.0 / yuv_fps * VIDEO_TIME_BASE;

    int audio_finish = 0; // 两者都为0时结束循环
    int video_finish = 0;

    size_t read_len = 0;
    vector<AVPacket*> packets;
    int audio_index = mp4_muxer.GetAudioStreamIndex();
    int video_index = mp4_muxer.GetVideoStreamIndex();
    while(1)
    {
        if(audio_finish && video_finish)
            break;
        printf("apts:%0.0lf vpts:%0.0lf\n", audio_pts/1000, video_pts/1000);
        if((video_finish != 1 && audio_pts > video_pts)
            || (video_finish != 1 && audio_finish == 1)) // audio和video都有数据，优先audio
        {
            read_len = fread(yuv_frame_buf, 1, yuv_frame_size, in_yuv_fd);
            if(read_len < yuv_frame_size){
                video_finish = 1;
                printf("fread yuv_frame_buf finished\n");
            }

            if(video_finish != 1)
            {
                ret = video_encoder.Encode(yuv_frame_buf,
                                              yuv_frame_size,
                                              video_index,
                                              video_pts,
                                              VIDEO_TIME_BASE, packets);
            }
            else
            {
                ret = video_encoder.Encode(NULL,
                                           0,
                                           video_index,
                                           video_pts,
                                           VIDEO_TIME_BASE, packets);
            }
            video_pts += video_frame_duration; // 叠加时间戳
            if(ret >= 0){
                for(int i = 0 ; i < packets.size() ; i++){
                    mp4_muxer.SendPacket(packets[i]);
                }
            }
            packets.clear();
        }
        else if(audio_finish != 1)
        {
            read_len = fread(pcm_frame_buf, 1, pcm_frame_size, in_pcm_fd);
            if(read_len < pcm_frame_size){
                audio_finish = 1;
                printf("fread pcm_frame_buf finished\n");
            }
            if(audio_finish != 1)
            {
                AVFrame *fltp_frame = AllocFltpPcmFrame(pcm_channels, audio_encoder.GetFrameSize());
                ret = audio_resample.ResampleFromS16ToFLTP(pcm_frame_buf, fltp_frame);
                if(ret < 0)
                {
                    printf("audio_resample.ResampleFromS16ToFLTP \n");
                }
                ret = audio_encoder.Encode(fltp_frame,
                                                audio_index,
                                                audio_pts,
                                                AUDIO_TIME_BASE,
                                            packets);
                FreePCMFrame(fltp_frame);
            }
            else
            {
                ret = audio_encoder.Encode(NULL,
                                              audio_index,
                                              audio_pts,
                                              AUDIO_TIME_BASE,
                                           packets);
            }
            audio_pts += audio_frame_duration; // 叠加时间戳
            if(ret >= 0){
                for(int i = 0 ; i < packets.size() ; i++){
                    mp4_muxer.SendPacket(packets[i]);
                }
            }
            packets.clear();
        }
    }
    // 5. 清理现场
    ret = mp4_muxer.SendTrailer();
    if(ret < 0)
    {
        printf("mp4_muxer.SendTrailer err\n");
    }
    printf("Write mp4 finished\n");
    if(yuv_frame_buf)
        free(yuv_frame_buf);
    if(pcm_frame_buf)
        free(pcm_frame_buf);
    if(in_pcm_fd)
        fclose(in_pcm_fd);
    if(in_yuv_fd)
        fclose(in_yuv_fd);

    return 0;
}

2023-12-09 该篇文章被 Cleofwine 归为分类: 音视频