客户管理系统开发定制Go语学习笔记 - 调用ffmpeg-api实现音频重采样

目录


前言

最近对golang客户管理系统开发定制处理很感兴趣,对golang客户管理系统开发定制音视频常用库goav进行了一番研究。自己写了一个wav转采样率的功能。给大家分享一下,中间遇到了不少坑,解决的过程中还是蛮有意思的。

环境部署

代码运行在Ubuntu环境上,需要使用到goav,goav是对ffmpeg源码的golang封装。

goav地址:https://github.com/giorgisio/goav

goav安装如下

  1. sudo apt-get -y install autoconf automake build-essential libass-dev libfreetype6-dev libsdl1.2-dev libtheora-dev libtool libva-dev libvdpau-dev libvorbis-dev libxcb1-dev libxcb-shm0-dev libxcb-xfixes0-dev pkg-config texi2html zlib1g-dev
  2. sudo apt install -y libavdevice-dev libavfilter-dev libswscale-dev libavcodec-dev libavformat-dev libswresample-dev libavutil-dev
  3. sudo apt-get install yasm
  4. export FFMPEG_ROOT=$HOME/ffmpeg
  5. export CGO_LDFLAGS="-L$FFMPEG_ROOT/lib/ -lavcodec -lavformat -lavutil -lswscale -lswresample -lavdevice -lavfilter"
  6. export CGO_CFLAGS="-I$FFMPEG_ROOT/include"
  7. export LD_LIBRARY_PATH=$HOME/ffmpeg/lib
  8. ```
  9. ```
  10. go get github.com/xueqing/goav

代码

先看代码

  1. package main
  2. //#include<stdlib.h>
  3. import "C"
  4. import (
  5. "flag"
  6. "fmt"
  7. "github.com/google/logger"
  8. "github.com/xueqing/ffmpeg-demo/logutil"
  9. "github.com/xueqing/goav/libswresample"
  10. "github.com/youpy/go-wav"
  11. "io"
  12. "os"
  13. "reflect"
  14. "unsafe"
  15. )
  16. func main() {
  17. var (
  18. inputUrl string = "./data/1.wav"
  19. inNumChannels int64 = 1
  20. inSampleRate int = 16000
  21. //inBitsPerSample uint16 = 16
  22. outNumChannels int64 = 1
  23. outSampleRate int = 48000
  24. outBitsPerSample uint16 = 16
  25. swr *libswresample.SwrContext = libswresample.SwrAlloc()
  26. )
  27. flag.Parse()
  28. logutil.Init(true, false, "resample.log")
  29. defer logutil.Close()
  30. swr.SwrAllocSetOpts(outNumChannels,
  31. libswresample.AvSampleFormat(1),
  32. outSampleRate,
  33. inNumChannels,
  34. libswresample.AvSampleFormat(1),
  35. inSampleRate,
  36. 0,
  37. 0)
  38. swr.SwrInit()
  39. defer swr.SwrClose()
  40. _inputFile, err := os.Open(inputUrl)
  41. if err != nil {
  42. logger.Errorf("open input file error(%v)", err)
  43. return
  44. }
  45. defer _inputFile.Close()
  46. _reader := wav.NewReader(_inputFile)
  47. format, err := _reader.Format()
  48. if err != nil {
  49. logger.Errorf("input file format error(%v)", err)
  50. return
  51. }
  52. fmt.Printf("input file format info -> AudioFormat:%v,NumChannels:%v,SampleRate:%v,ByteRate:%v,BlockAlign:%v,BitsPerSample:%v", int(format.AudioFormat), format.NumChannels, format.SampleRate, format.ByteRate, format.BlockAlign, format.BitsPerSample)
  53. _tempFile, err := os.CreateTemp("", "*.wav")
  54. if err != nil {
  55. logger.Errorf("create temp file error(%v)", err)
  56. return
  57. }
  58. logger.Infof("Create tempFile %v", _tempFile.Name())
  59. defer func() {
  60. _tempFile.Close()
  61. }()
  62. _samples := []wav.Sample{}
  63. n := 4096
  64. for {
  65. spls, err := _reader.ReadSamples(uint32(n))
  66. if err == io.EOF {
  67. break
  68. }
  69. _samples = append(_samples, spls...)
  70. }
  71. _result := ResampleByFFmpegApi2(swr, _samples)
  72. _writer := wav.NewWriter(_tempFile, uint32(len(_result)), uint16(outNumChannels), uint32(outSampleRate), outBitsPerSample)
  73. err4 := _writer.WriteSamples(_result)
  74. if err4 != nil {
  75. logger.Errorf("write file error(%v)", err4)
  76. err = err4
  77. return
  78. }
  79. }
  80. func ResampleByFFmpegApi2(swr *libswresample.SwrContext, samples []wav.Sample) []wav.Sample {
  81. var (
  82. _inArr **uint8
  83. _outArr **uint8
  84. _inptr []uint16
  85. _outptr []uint16
  86. )
  87. _inArr = (**uint8)(C.malloc(C.sizeof_int))
  88. defer C.free(unsafe.Pointer(_inArr))
  89. _inptr = make([]uint16, len(samples))
  90. _outArr = (**uint8)(C.malloc(C.sizeof_int))
  91. defer C.free(unsafe.Pointer(_outArr))
  92. _outptr = make([]uint16, len(samples)*3)
  93. //fmt.Println(unsafe.Sizeof(uint16(0)))
  94. for i, v := range samples {
  95. _inptr[i] = uint16(v.Values[0])
  96. }
  97. *_inArr = (*uint8)(unsafe.Pointer((*reflect.SliceHeader)(unsafe.Pointer(&_inptr)).Data))
  98. *_outArr = (*uint8)(unsafe.Pointer((*reflect.SliceHeader)(unsafe.Pointer(&_outptr)).Data))
  99. ret := swr.SwrConvert(_outArr, len(samples)*3, _inArr, len(samples))
  100. if ret > 0 {
  101. fmt.Println(ret)
  102. }
  103. _result := make([]wav.Sample, ret)
  104. for i := 0; i < ret; i++ {
  105. _result[i] = wav.Sample{[2]int{int(_outptr[i]), 0}}
  106. }
  107. return _result
  108. }

代码说明:

1、代码不是个工具方法,如果看懂逻辑的话,可以自行修改成工具方法。

2、里面会用到ffmpeg里面swresample库,对音频数据进行冲采样。

3、可以细看一下,如果你想作实时处理也是可以改的。

4、其中SwrAllocSetOpts方法中有个参数libswresample.AvSampleFormat(1),为什么取1,这里主要是选择采样表示方式的枚举,参考底层源码枚举,我发在下面。我这边因为音频是s16的,所以选择1。

  1. enum AVSampleFormat {
  2. AV_SAMPLE_FMT_NONE = -1,
  3. AV_SAMPLE_FMT_U8, ///< unsigned 8 bits
  4. AV_SAMPLE_FMT_S16, ///< signed 16 bits
  5. AV_SAMPLE_FMT_S32, ///< signed 32 bits
  6. AV_SAMPLE_FMT_FLT, ///< float
  7. AV_SAMPLE_FMT_DBL, ///< double
  8. AV_SAMPLE_FMT_U8P, ///< unsigned 8 bits, planar
  9. AV_SAMPLE_FMT_S16P, ///< signed 16 bits, planar
  10. AV_SAMPLE_FMT_S32P, ///< signed 32 bits, planar
  11. AV_SAMPLE_FMT_FLTP, ///< float, planar
  12. AV_SAMPLE_FMT_DBLP, ///< double, planar
  13. AV_SAMPLE_FMT_S64, ///< signed 64 bits
  14. AV_SAMPLE_FMT_S64P, ///< signed 64 bits, planar
  15. AV_SAMPLE_FMT_NB ///< Number of sample formats. DO NOT USE if linking dynamically
  16. };

音频准备,输入音频为16k音频。

(base) xxx@hu:~/GolandProjects/MediaRelay/data$ ffmpeg -i 1. 
ffmpeg version 4.2.7-0ubuntu0.1 Copyright (c) 2000-2022 the FFmpeg developers
  built with gcc 9 (Ubuntu 9.4.0-1ubuntu1~20.04.1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-avresample --disable-filter=resample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librsvg --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-nvenc --enable-chromaprint --enable-frei0r --enable-libx264 --enable-shared
  libavutil      56. 31.100 / 56. 31.100
  libavcodec     58. 54.100 / 58. 54.100
  libavformat    58. 29.100 / 58. 29.100
  libavdevice    58.  8.100 / 58.  8.100
  libavfilter     7. 57.100 /  7. 57.100
  libavresample   4.  0.  0 /  4.  0.  0
  libswscale      5.  5.100 /  5.  5.100
  libswresample   3.  5.100 /  3.  5.100
  libpostproc    55.  5.100 / 55.  5.100
Guessed Channel Layout for Input Stream #0.0 : mono
Input #0, wav, from '1.wav':
  Metadata:
    date            : 2020-09-28
    encoder         : Lavf58.45.100
  Duration: 00:04:01.75, bitrate: 256 kb/s
    Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 16000 Hz, mono, s16, 256 kb/s

执行情况

input file format info -> AudioFormat:1,NumChannels:1,SampleRate:16000,ByteRate:32000,BlockAlign:2,BitsPerSample:16INFO : 2022/12/06 17:14:49.937547 csdn_wav_util.go:62: Create tempFile /tmp/2402235346.wav
11603961
 

最终音频

(base) xxx@hu:/tmp$ ffmpeg -i 2402235346.wav 
ffmpeg version 4.2.7-0ubuntu0.1 Copyright (c) 2000-2022 the FFmpeg developers
  built with gcc 9 (Ubuntu 9.4.0-1ubuntu1~20.04.1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-avresample --disable-filter=resample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librsvg --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-nvenc --enable-chromaprint --enable-frei0r --enable-libx264 --enable-shared
  libavutil      56. 31.100 / 56. 31.100
  libavcodec     58. 54.100 / 58. 54.100
  libavformat    58. 29.100 / 58. 29.100
  libavdevice    58.  8.100 / 58.  8.100
  libavfilter     7. 57.100 /  7. 57.100
  libavresample   4.  0.  0 /  4.  0.  0
  libswscale      5.  5.100 /  5.  5.100
  libswresample   3.  5.100 /  3.  5.100
  libpostproc    55.  5.100 / 55.  5.100
Guessed Channel Layout for Input Stream #0.0 : mono
Input #0, wav, from '2402235346.wav':
  Duration: 00:04:01.75, bitrate: 768 kb/s
    Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 48000 Hz, mono, s16, 768 kb/s 

总结

其实在写代码过程中,有个让我特别头疼的东西,就是怎么把数组转为**uint。如果大家有兴趣可以研究一下ResampleByFFmpegApi2方法的转换逻辑,会学到不少东西。

分享:

        我们的疲劳往往不是由工作引起的,而是由于忧烦、挫折和不满等。——《人性的弱点》

网站建设定制开发 软件系统开发定制 定制软件开发 软件开发定制 定制app开发 app开发定制 app开发定制公司 电商商城定制开发 定制小程序开发 定制开发小程序 客户管理系统开发定制 定制网站 定制开发 crm开发定制 开发公司 小程序开发定制 定制软件 收款定制开发 企业网站定制开发 定制化开发 android系统定制开发 定制小程序开发费用 定制设计 专注app软件定制开发 软件开发定制定制 知名网站建设定制 软件定制开发供应商 应用系统定制开发 软件系统定制开发 企业管理系统定制开发 系统定制开发