使用FFMPEG读取视频流中的SPS和PPS

版权声明：本文为博主原创文章，转载请注明出处，谢谢!

总有不规则的流需要我们处理

什么是SPS和PPS

H.264的SPS和PPS串，包含了初始化H.264解码器所需要的信息参数，包括编码所用的profile，level，图像的宽和高，deblock滤波器等。

版权声明：本文为博主原创文章，转载请注明出处：http://blog.jerkybible.com/2016/09/03/使用FFMPEG读取视频流中的SPS和PPS/
访问原文「使用FFMPEG读取视频流中的SPS和PPS」

正常的SPS和PPS在哪里

首先SPS和PPS包含在FLV的AVCDecoderConfigurationRecord结构中，而AVCDecoderConfigurationRecord就是经FFMPEG分析后，AVCodecContext里面的extradata，先列一下这个结构吧。

aligned(8) class AVCDecoderConfigurationRecord {     
 unsigned int(8) configurationVersion = 1;     
 unsigned int(8) AVCProfileIndication;     
 unsigned int(8) profile_compatibility;     
 unsigned int(8) AVCLevelIndication;     
 bit(6) reserved = '111111'b;     
 unsigned int(2) lengthSizeMinusOne;     
 bit(3) reserved = '111'b;     
 unsigned int(5) numOfSequenceParameterSets;     
for (i=0; i< numOfSequenceParameterSetsispan>    
 unsigned int(16) sequenceParameterSetLength ;     
 bit(8*sequenceParameterSetLength) sequenceParameterSetNALUnit;     
 }     
 unsigned int(8) numOfPictureParameterSets;     
for (i=0; i< numOfPictureParameterSetsispan>    
 unsigned int(16) pictureParameterSetLength;     
 bit(8*pictureParameterSetLength) pictureParameterSetNALUnit;     
 }     
}

FFMPEG获取相应数据的代码类似于下面。代码中的m_formatCtx为AVFormatContext实例。

for (unsigned int i = 0; i < h264Source->m_formatCtx->nb_streams; i++) {
	if (h264Source->m_formatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO){
		h264Source->m_videoIndex = i;
		for (int j = 0; j < h264Source->m_formatCtx->streams[i]->codec->extradata_size; j++)
		{
			printf("%02x ", h264Source->m_formatCtx->streams[i]->codec->extradata[j]);
		}
		printf("\n");
		return h264Source->m_videoIndex;
	}
}

打印结果例如下面。

根据AVCDecoderConfigurationRecord结构：

configurationVersion为第1字节，即0x01；
AVCProfileIndication为第2字节，即SPS[1]；
profile_compatibility为第3字节，即SPS[2]；
profile_compatibility为第4字节，即SPS[3]；
lengthSizeMinusOne为第5字节的后2个bit，是NALUnitLength的长度-1，一般为3；
numOfSequenceParameterSets为第6字节的后5个bit，是SPS的个数，即1；
sequenceParameterSetLength为第7、8字节，是SPS的长度，即0x0019；

sequenceParameterSetNALUnits为接下来的25个字节，是SPS的内容，即：

1	0x6764　0x001f　0xacd9　0x4046　0x059f　0xb840　0x0000　0x0300　0x4000　0x000f　0x23c6　0x0c65　0x80

numOfPictureParameterSets为接下来的1字节，是PPS的个数，即0x01；
pictureParameterSetLength为接下来的2字节，是PPS的长度，即0x0004；
pictureParameterSetNALUnit为最后的4字节，是PPS的内容，即：
1
0x68ef　0xbcb0

不正常的SPS和PPS

最近在做的一个事儿，RTSP协议获取SPS和PPS，但是不知道是流的问题还是FFMPEG的问题，读取rtsp的时候extradata中始终为NULL，后来通过分析AVPacket发现，AVPacket中的data，不是通常理解中的前2字节为包长度，而是以’0000 0001’出现的，这明显是NALU的格式啊，而且I帧的前面明显包含了SPS和PPS，这和我平常理解的AVPacket只包含图像数据完全不一样。接着参考雷霄骅的最简单的基于librtmp的示例：发布H.264（H.264通过RTMP发布），大致流程如下：

首先获取第1个NALU，在这里我们用作读取SPS，的代码如下：

int ReadFirstNaluFromBuf(jerky_h264_source* h264Source, NaluUnit &nalu, int packetSize, unsigned char *packetData){
	int naltail_pos = h264Source->nalhead_pos;
	memset(m_tmp_nalu_data, 0, BUFFER_SIZE);
	while (h264Source->nalhead_pos < packetSize)
	{
		//搜索NAL头部
		if (packetData[h264Source->nalhead_pos++] == 0x00 &&
			packetData[h264Source->nalhead_pos++] == 0x00)
		{
			if (packetData[h264Source->nalhead_pos++] == 0x01)
				goto gotnal_head;
			else
			{
				//判断0000 0001
				h264Source->nalhead_pos--;
				if (packetData[h264Source->nalhead_pos++] == 0x00 &&
					packetData[h264Source->nalhead_pos++] == 0x01)
					goto gotnal_head;
				else
					continue;
			}
		}
		else
			continue;
		//搜索NAL尾部，也是下个NALU的头部
	gotnal_head:
		//正常情况下NALU在一个packetData内部
		naltail_pos = h264Source->nalhead_pos;
		while (naltail_pos < packetSize)
		{
			if (packetData[naltail_pos++] == 0x00 &&
				packetData[naltail_pos++] == 0x00)
			{
				if (packetData[naltail_pos++] == 0x01)
				{
					nalu.size = (naltail_pos - 3) - h264Source->nalhead_pos;
					break;
				}
				else
				{
					naltail_pos--;
					if (packetData[naltail_pos++] == 0x00 &&
						packetData[naltail_pos++] == 0x01)
					{
						nalu.size = (naltail_pos - 4) - h264Source->nalhead_pos;
						break;
					}
				}
			}
		}
		nalu.type = packetData[h264Source->nalhead_pos] & 0x1f;
		memcpy(m_tmp_nalu_data, packetData + h264Source->nalhead_pos, nalu.size);
		nalu.data = m_tmp_nalu_data;
		h264Source->nalhead_pos = naltail_pos;
		return TRUE;
	}
}

然后读取下一个NALU，在这里我们用作读取PPS，代码如下：

int ReadOneNaluFromBuf(jerky_h264_source* h264Source, NaluUnit &nalu, int packetSize, unsigned char *packetData)
{
	int naltail_pos = h264Source->nalhead_pos;
	int ret;
	int nalustart;//nal的开始标识符是几个00
	memset(m_tmp_nalu_data, 0, BUFFER_SIZE);
	nalu.size = 0;
	while (1)
	{
		if (naltail_pos >= packetSize){
			break;
		}
		if (h264Source->nalhead_pos == NO_MORE_BUFFER_TO_READ)
			return FALSE;
		while (naltail_pos < packetSize)
		{
			//寻找NALU的尾部
			if (packetData[naltail_pos++] == 0x00 &&
				packetData[naltail_pos++] == 0x00)
			{
				if (packetData[naltail_pos++] == 0x01)
				{
					nalustart = 3;
					goto gotnal;
				}
				else
				{
					//寻找0000 0001
					naltail_pos--;
					if (packetData[naltail_pos++] == 0x00 &&
						packetData[naltail_pos++] == 0x01)
					{
						nalustart = 4;
						goto gotnal;
					}
					else
						continue;
				}
			}
			else
				continue;
		gotnal:
			// 整个NALU不在packetData内
			if (h264Source->nalhead_pos == GOT_A_NAL_CROSS_BUFFER || h264Source->nalhead_pos == GOT_A_NAL_INCLUDE_A_BUFFER)
			{
				return FALSE;
			}
			// 整个NALU在packetData内
			else
			{
				nalu.type = packetData[h264Source->nalhead_pos] & 0x1f;
				nalu.size = naltail_pos - h264Source->nalhead_pos - nalustart;
				if (nalu.type == 0x06)
				{
					h264Source->nalhead_pos = naltail_pos;
					continue;
				}
				memcpy(m_tmp_nalu_data, packetData + h264Source->nalhead_pos, nalu.size);
				nalu.data = m_tmp_nalu_data;
				h264Source->nalhead_pos = naltail_pos;
				return TRUE;
			}
		}
	}
	return FALSE;
}