fix audio

This commit is contained in:
Andrey Semochkin 2021-01-09 17:24:48 +03:00
parent 33b07c6a20
commit f16439f7ef
4 changed files with 51 additions and 16 deletions

View File

@ -238,6 +238,7 @@ type Packet struct {
Idx int8 // stream index in container format Idx int8 // stream index in container format
CompositionTime time.Duration // packet presentation time minus decode time for H264 B-Frame CompositionTime time.Duration // packet presentation time minus decode time for H264 B-Frame
Time time.Duration // packet decode time Time time.Duration // packet decode time
Duration time.Duration //packet duration
Data []byte // packet data Data []byte // packet data
} }

View File

@ -26,7 +26,7 @@ func (self OpusCodecData) ChannelLayout() av.ChannelLayout {
} }
func (self OpusCodecData) PacketDuration(data []byte) (time.Duration, error) { func (self OpusCodecData) PacketDuration(data []byte) (time.Duration, error) {
return time.Duration(1000) * time.Second / time.Duration(self.SampleRate_), nil return time.Duration(20) * time.Millisecond, nil
} }
func (self OpusCodecData) SampleFormat() av.SampleFormat { func (self OpusCodecData) SampleFormat() av.SampleFormat {

View File

@ -79,7 +79,7 @@ func (self *Muxer) newStream(codec av.CodecData) (err error) {
stream.sample.SyncSample = &mp4io.SyncSample{} stream.sample.SyncSample = &mp4io.SyncSample{}
stream.timeScale = 90000 stream.timeScale = 90000
case av.AAC: case av.AAC:
stream.timeScale = 8000 stream.timeScale = int64(codec.(av.AudioCodecData).SampleRate())
} }
stream.muxer = self stream.muxer = self
@ -179,10 +179,12 @@ func (self *Stream) fillTrackAtom() (err error) {
self.sample.SampleDesc.MP4ADesc = &mp4io.MP4ADesc{ self.sample.SampleDesc.MP4ADesc = &mp4io.MP4ADesc{
DataRefIdx: 1, DataRefIdx: 1,
NumberOfChannels: int16(codec.ChannelLayout().Count()), NumberOfChannels: int16(codec.ChannelLayout().Count()),
SampleSize: 16, SampleSize: int16(codec.SampleFormat().BytesPerSample() * 4),
SampleRate: float64(codec.SampleRate()), SampleRate: float64(codec.SampleRate()),
Unknowns: []mp4io.Atom{self.buildEsds(codec.MPEG4AudioConfigBytes())}, Unknowns: []mp4io.Atom{self.buildEsds(codec.MPEG4AudioConfigBytes())},
} }
//log.Fatalln(codec.MPEG4AudioConfigBytes())
//log.Fatalln(codec.SampleFormat().BytesPerSample())
self.trackAtom.Header.Volume = 1 self.trackAtom.Header.Volume = 1
self.trackAtom.Header.AlternateGroup = 1 self.trackAtom.Header.AlternateGroup = 1
self.trackAtom.Header.Duration = 0 self.trackAtom.Header.Duration = 0
@ -270,6 +272,9 @@ func (element *Muxer) WritePacket(pkt av.Packet, GOP bool) (bool, []byte, error)
if stream.lastpkt != nil { if stream.lastpkt != nil {
ts = pkt.Time - stream.lastpkt.Time ts = pkt.Time - stream.lastpkt.Time
} }
if stream.CodecData.Type().IsAudio() {
pkt.Data = pkt.Data[4:]
}
got, buf, err := stream.writePacketV2(pkt, ts, 5) got, buf, err := stream.writePacketV2(pkt, ts, 5)
stream.lastpkt = &pkt stream.lastpkt = &pkt
if err != nil { if err != nil {

View File

@ -77,7 +77,9 @@ type RTSPClient struct {
CodecData []av.CodecData CodecData []av.CodecData
AudioTimeLine time.Duration AudioTimeLine time.Duration
AudioTimeScale int64 AudioTimeScale int64
audioCodec string audioCodec av.CodecType
PreAudioTS int64
PreVideoTS int64
} }
type RTSPClientOptions struct { type RTSPClientOptions struct {
@ -180,7 +182,7 @@ func Dial(options RTSPClientOptions) (*RTSPClient, error) {
if CodecData != nil { if CodecData != nil {
client.CodecData = append(client.CodecData, CodecData) client.CodecData = append(client.CodecData, CodecData)
client.audioIDX = int8(len(client.CodecData) - 1) client.audioIDX = int8(len(client.CodecData) - 1)
client.audioCodec = CodecData.Type().String() client.audioCodec = CodecData.Type()
if i2.TimeScale != 0 { if i2.TimeScale != 0 {
client.AudioTimeScale = int64(i2.TimeScale) client.AudioTimeScale = int64(i2.TimeScale)
} }
@ -227,7 +229,7 @@ func (client *RTSPClient) startStream() {
timer = time.Now() timer = time.Now()
} }
if !fixed { if !fixed {
nb, err := io.ReadFull(client.conn, header) nb, err := io.ReadFull(client.connRW, header)
if err != nil || nb != 4 { if err != nil || nb != 4 {
client.Println("RTSP Client RTP Read Header", err) client.Println("RTSP Client RTP Read Header", err)
return return
@ -246,7 +248,7 @@ func (client *RTSPClient) startStream() {
content[1] = header[1] content[1] = header[1]
content[2] = header[2] content[2] = header[2]
content[3] = header[3] content[3] = header[3]
n, rerr := io.ReadFull(client.conn, content[4:length+4]) n, rerr := io.ReadFull(client.connRW, content[4:length+4])
if rerr != nil || n != int(length) { if rerr != nil || n != int(length) {
client.Println("RTSP Client RTP ReadFull", err) client.Println("RTSP Client RTP ReadFull", err)
return return
@ -274,7 +276,7 @@ func (client *RTSPClient) startStream() {
case 0x52: case 0x52:
var responseTmp []byte var responseTmp []byte
for { for {
n, rerr := io.ReadFull(client.conn, oneb) n, rerr := io.ReadFull(client.connRW, oneb)
if rerr != nil || n != 1 { if rerr != nil || n != 1 {
client.Println("RTSP Client RTP Read Keep-Alive Header", rerr) client.Println("RTSP Client RTP Read Keep-Alive Header", rerr)
return return
@ -288,7 +290,7 @@ func (client *RTSPClient) startStream() {
return return
} }
cont := make([]byte, si) cont := make([]byte, si)
_, err = io.ReadFull(client.conn, cont) _, err = io.ReadFull(client.connRW, cont)
if err != nil { if err != nil {
client.Println("RTSP Client RTP Read Keep-Alive ReadFull", err) client.Println("RTSP Client RTP Read Keep-Alive ReadFull", err)
return return
@ -512,6 +514,9 @@ func (client *RTSPClient) RTPDemuxer(payloadRAW *[]byte) ([]*av.Packet, bool) {
offset += 4 offset += 4
switch int(content[1]) { switch int(content[1]) {
case client.videoID: case client.videoID:
if client.PreVideoTS == 0 {
client.PreVideoTS = timestamp
}
if client.BufferRtpPacket.Len() > 4048576 { if client.BufferRtpPacket.Len() > 4048576 {
client.Println("Big Buffer Flush") client.Println("Big Buffer Flush")
client.BufferRtpPacket.Truncate(0) client.BufferRtpPacket.Truncate(0)
@ -529,6 +534,7 @@ func (client *RTSPClient) RTPDemuxer(payloadRAW *[]byte) ([]*av.Packet, bool) {
CompositionTime: time.Duration(1) * time.Millisecond, CompositionTime: time.Duration(1) * time.Millisecond,
Idx: client.videoIDX, Idx: client.videoIDX,
IsKeyFrame: naluType == 5, IsKeyFrame: naluType == 5,
Duration: time.Duration(float32(timestamp-client.PreVideoTS)/90) * time.Millisecond,
Time: time.Duration(timestamp/90) * time.Millisecond, Time: time.Duration(timestamp/90) * time.Millisecond,
}) })
case naluType == 7: case naluType == 7:
@ -556,40 +562,63 @@ func (client *RTSPClient) RTPDemuxer(payloadRAW *[]byte) ([]*av.Packet, bool) {
retmap = append(retmap, &av.Packet{ retmap = append(retmap, &av.Packet{
Data: append(binSize(client.BufferRtpPacket.Len()), client.BufferRtpPacket.Bytes()...), Data: append(binSize(client.BufferRtpPacket.Len()), client.BufferRtpPacket.Bytes()...),
CompositionTime: time.Duration(1) * time.Millisecond, CompositionTime: time.Duration(1) * time.Millisecond,
Duration: time.Duration(float32(timestamp-client.PreVideoTS)/90) * time.Millisecond,
Idx: client.videoIDX, Idx: client.videoIDX,
IsKeyFrame: naluTypef == 5, IsKeyFrame: naluTypef == 5,
Time: time.Duration(timestamp/90) * time.Millisecond, Time: time.Duration(timestamp/90) * time.Millisecond,
}) })
} }
} }
default: default:
client.Println("Unsupported NAL Type", naluType) client.Println("Unsupported NAL Type", naluType)
} }
} }
if len(retmap) > 0 { if len(retmap) > 0 {
client.PreVideoTS = timestamp
return retmap, true return retmap, true
} }
case client.audioID: case client.audioID:
if client.PreAudioTS == 0 {
client.PreAudioTS = timestamp
}
nalRaw, _ := h264parser.SplitNALUs(content[offset:end]) nalRaw, _ := h264parser.SplitNALUs(content[offset:end])
var retmap []*av.Packet var retmap []*av.Packet
for _, nal := range nalRaw { for _, nal := range nalRaw {
if client.audioCodec == av.PCM_MULAW.String() || client.audioCodec == av.PCM_ALAW.String() || client.audioCodec == av.PCM.String() { var duration time.Duration
client.AudioTimeLine += time.Duration(len(nal)) * time.Second / time.Duration(client.AudioTimeScale) switch client.audioCodec {
} else if client.audioCodec == av.OPUS.String() { case av.PCM_MULAW:
client.AudioTimeLine += time.Duration(20) * time.Millisecond duration = time.Duration(len(nal)) * time.Second / time.Duration(client.AudioTimeScale)
} else { client.AudioTimeLine += duration
client.AudioTimeLine = time.Duration(float32(timestamp)/float32(float32(client.AudioTimeScale)/float32(1000))) * time.Millisecond case av.PCM_ALAW:
duration = time.Duration(len(nal)) * time.Second / time.Duration(client.AudioTimeScale)
client.AudioTimeLine += duration
case av.OPUS:
duration = time.Duration(20) * time.Millisecond
client.AudioTimeLine += duration
case av.AAC:
if nal[1] == 32 {
return nil, false
}
nal = nal[4:]
if _, _, _, _, err := aacparser.ParseADTSHeader(nal); err == nil {
nal = nal[7:]
}
duration = time.Duration((float32(1024)/float32(client.AudioTimeScale))*1000) * time.Millisecond
client.AudioTimeLine += duration
} }
retmap = append(retmap, &av.Packet{ retmap = append(retmap, &av.Packet{
Data: append(binSize(len(nal)), nal...), Data: append(binSize(len(nal)), nal...),
CompositionTime: time.Duration(1) * time.Millisecond, CompositionTime: time.Duration(1) * time.Millisecond,
Duration: duration,
Idx: client.audioIDX, Idx: client.audioIDX,
IsKeyFrame: false, IsKeyFrame: false,
Time: client.AudioTimeLine, Time: client.AudioTimeLine,
}) })
} }
if len(retmap) > 0 { if len(retmap) > 0 {
client.PreAudioTS = timestamp
return retmap, true return retmap, true
} }
default: default: