python音频处理

前言：python音频处理

标准库

wave

python自带的wave模块提供了一个方便的WAV格式接口，可用于读写WAV文件

matplotlib.pyplot

Python提供的一个二维绘图库

numpy

一个多维数组对象和用于处理数组的例程集合组成的库。

struct

该库用于提供python数据类型与字节流之间的转换

math

python 数学函数模块

wave

open()
读取wave文件

close()
关闭该流，使得该实例不可用，它会自动调用对象收集机制。

getnchannels()
返回音频的声道数（1是单声道，2是立体声）。

getsampwidth()
返回该实例每一帧的字节宽度。

getframerate()
返回实例的频率（单位：Hz）。

getnframes()
返回音频的帧数。

getcomtype()
返回压缩类型（仅支持’NONE’）。

getcompname()
是getcomtype()的人类可读的版本，通常用’not compressed’表示’NONE’。

getparams()
返回一个元组tuple (nchannels, sampwidth, framerate, nframes, comptype, compname)，包含了以上的方法取得的几个值。

readframes(n)
从流的当前指针位置一次读出音频的n个帧，并且指针后移n个帧，返回一个字节数组。

rewind()
倒带，将该文件指针指向音频流的最开始位置。

查看

#!/usr/bin/python
# -*- coding: UTF-8 -*-
import wave
import matplotlib.pyplot as plt
import numpy as np


f = wave.open('stego100.wav')

# 输出信息（声道，采样宽度，帧速率，帧数）
params = f.getparams()
nchannels, sampwidth, framerate, nframes = params[:4]

# 读取音频，字符串格式
strData = f.readframes(nframes)

# 将字符串转化为int
waveData = np.fromstring(strData,dtype=np.int16)

# wave幅值归一化
waveData = waveData*1.0/(max(abs(waveData)))
waveData = np.reshape(waveData,[nframes,nchannels])

# 画图
time = np.arange(0,nframes)*(1.0 / framerate)
plt.figure()
plt.subplot(5,1,1)
plt.plot(time,waveData[:,0])
plt.xlabel("Time(s)")
plt.ylabel("Amplitude")
plt.title("Ch-1 wavedata")
plt.grid(True)
plt.subplot(5,1,3)
plt.plot(time,waveData[:,1])
plt.xlabel("Time(s)")
plt.ylabel("Amplitude")
plt.title("Ch-2 wavedata")
plt.grid(True)
plt.show()
f.close()

# 第一条声道存在密文
x = waveData[:, 0]
size = waveData[:, 0].size
i = 0
flag = False
k = 0
count = 0
my_morseCode = ""
while i < size:
    # 根据图像abs(x[i])大于0.05说明存在密文
    if abs(x[i]) > 0.05:
        # 当flag从false转为true，记录位置为k，即波动的起点
        if not flag:
            k = i
        flag = True
        # count用于增加容错性
        count = 0
    else:
        if flag and i - k >= 300 and count >= 10:
            if i - k > 10000:
                my_morseCode += "-"
            else:
                my_morseCode += "."
            flag = False
            count = 0
            k = i
        elif flag and i - k >= 300 and count < 12:
            count += 1
        elif (not flag) and i - k >= 50000:
            my_morseCode += " "
            k = i
    # 间隔为每50数据点
    i += 50

# 去除两段多余空格
my_morseCode = my_morseCode.strip()
print (my_morseCode)

correct_morseCode = "..... -... -.-. ----. ..--- ..... -.... ....- ----. -.-. -... ----- .---- ---.. ---.. ..-. ..... ..--- . -.... .---- --... -.. --... ----- ----. ..--- ----. .---- ----. .---- -.-."
# 解密莫尔斯密码是否正确
if correct_morseCode == my_morseCode:
    print("good job!")

修改

原始数据:

# -*- coding: gb2312 -*-
import wave
import numpy as np
import struct
import math

# 读取wav文件
inputfile = wave.open('example.wav','r')

# wav音频文件信息（依次为声道，采样宽度，帧速率，帧数，唯一标识，无损）
params = inputfile.getparams()
nchannels, sampwidth, framerate, nframes, comptype, compname = params[:6]
print("声道：", nchannels)
print("采样宽度：", sampwidth)
print("帧速率：", framerate)
print("帧数：", nframes)
print("唯一标示：", comptype)
print("无损：", compname)

# 读取音频，字符串格式
strData = inputfile.readframes(nframes)

# 将字符串转化为int类型
waveData = np.fromstring(strData,dtype=np.int16)

# waveData幅值归一化,转化为(nframes,nchannels)型数组
waveData = waveData*1.0/(max(abs(waveData)))
waveData = np.reshape(waveData,[nframes,nchannels])
# print(waveData.shape)

# 关闭文件流
inputfile.close()

# 接下来修改wav文件第二条通道数据

# 第二条通道数据大小
size = waveData[:, 0].size

# 明文
s = "..... -... -.-. ----. ..--- ..... -.... ....- ----. -.-. -... ----- .---- ---.. ---.. ..-. ..... ..--- . -.... .---- --... -.. --... ----- ----. ..--- ----. .---- ----. .---- -.-."

# 从第k处开始修改数据
k = 8000
# 莫尔斯密码 .(滴) 长度
di_length = 1000
# 莫尔斯密码 -(嗒) 长度
da_length = 3000

for i in s:
    if i == '.':
        for j in range(k, k + di_length):
            waveData[:, 1][j] = 0.1 * math.sin((j - k) / math.pi)
        k += di_length + 1000
    elif i == '-':
        for j in range(k, k + da_length):
            waveData[:, 1][j] = 0.1 * math.sin((j - k) / math.pi)
        k += da_length + 1000
    else:
        k = k + 10000

# 将waveData重新转化为(nframes * nchannels, 1)型数组
waveData = np.reshape(waveData, [nframes * nchannels, 1])

# 打开文件
outfile_name = 'outfile.wav'
f = wave.open(outfile_name, 'wb')

# 设置输出wav参数（依次为声道，采样宽度，帧速率，帧数，唯一标识，无损）
f.setparams((nchannels, sampwidth, framerate, nframes, comptype, compname))

# 写入文件，通过struct.pack将数据从numpy.ndarray转换为字节流
for v in waveData:
    f.writeframes(struct.pack('h', int(v * 64000 / 2)))

# 关闭文件流
f.close()

修改结果：

最终程序

#!/usr/bin/env python
# -*- coding:utf-8 -*-
import wave
import numpy as np
import struct
import math
import argparse
import os

letter_code = [".-", "-...", "-.-.", "-..", ".", "..-.", "--.", "....", "..", ".---", "-.-", ".-..", "--", "-.", "---",".--.", "--.-", ".-.", "...", "-", "..-", "...-", ".--", "-..-", "-.--", "--.."]
number_code = ["-----", ".----", "..---", "...--", "....-", ".....", "-....", "--...", "---..", "----."]


def wav_encrypt(input_file, plaintext, outfile):
    # 读取wav文件
    if not os.path.exists(input_file):
        print("Wav is not exist")
        print("Encrypt fail")
        return
    if input_file[-4:] != ".wav":
        print("input file is not wav")
        return
    if outfile[-4:] != ".wav":
        print("output file is not wav")
        return
    f = wave.open(input_file, 'r')
    print("Read wav successful")

    # wav音频文件信息（依次为声道，采样宽度，帧速率，帧数，唯一标识，无损）
    params = f.getparams()
    nchannels, sampwidth, framerate, nframes, comptype, compname = params[:6]
    if int(nchannels) == 1:
        print("the wav file need two sound channel at least")
        return
    # print("声道：", nchannels)
    # print("采样宽度：", sampwidth)
    # print("帧速率：", framerate)
    # print("帧数：", nframes)
    # print("唯一标示：", comptype)
    # print("无损：", compname)

    # 读取音频，字符串格式
    strData = f.readframes(nframes)

    # 将字符串转化为int类型
    waveData = np.fromstring(strData, dtype=np.int16)

    # waveData幅值归一化,转化为(nframes,nchannels)型数组
    waveData = waveData * 1.0 / (max(abs(waveData)))
    waveData = np.reshape(waveData, [nframes, nchannels])
    # print(waveData.shape)

    # 关闭文件流
    f.close()

    # 接下来修改wav文件第二条通道数据

    # 第二条通道数据大小
    size = waveData[:, 0].size
    # 从第k处开始修改数据
    k = int(size * 0.1)
    # 莫尔斯密码 .(滴) 长度
    di_length = 1000
    # 莫尔斯密码 -(嗒) 长度
    da_length = 3000
    #空格即两个莫尔斯密码间隔长度
    space_length = 10000
    # .与. .与-  -与- 间隔
    gap = 1000
    # 明文
    morse_code = morse_code_encrypt(plaintext)
    print("Morse_code:",morse_code)
    max_cipher_text_length = k
    for i in morse_code:
        if i == '.':
            max_cipher_text_length += gap + di_length
        elif i == '-':
            max_cipher_text_length += gap + da_length
        else:
            max_cipher_text_length += 10000
    if max_cipher_text_length > int(size * 0.99):
        print("the wav file is small or the plaintext is big，Cannot hide data")
        return
    for i in morse_code:
        if i == '.':
            for j in range(k, k + di_length):
                waveData[:, 1][j] = 0.1 * math.sin((j - k) / math.pi)
            k += di_length + gap
        elif i == '-':
            for j in range(k, k + da_length):
                waveData[:, 1][j] = 0.1 * math.sin((j - k) / math.pi)
            k += da_length + gap
        else:
            k = k + space_length

    # 将waveData重新转化为(nframes * nchannels, 1)型数组
    waveData = np.reshape(waveData, [nframes * nchannels, 1])

    # 打开文件
    f = wave.open(outfile, 'wb')

    # 设置输出wav参数（依次为声道，采样宽度，帧速率，帧数，唯一标识，无损）
    f.setparams((nchannels, sampwidth, framerate, nframes, comptype, compname))

    # 写入文件，通过struct.pack将数据从numpy.ndarray转换为字节流
    print("Writing file, please wait.")
    for v in waveData:
        f.writeframes(struct.pack('h', int(v * 64000 / 2)))

    # 关闭文件流
    f.close()
    print("Wav encrypt Successful")


def morse_code_encrypt(plaintext):
    morse_code = ""
    for i in plaintext:
        if 97 <= ord(i) <= 122:
            morse_code += letter_code[ord(i) - 97]+' '
        elif 65 <= ord(i) <= 90:
            morse_code += letter_code[ord(i) - 65] + ' '
        elif 48 <= ord(i) <= 57:
            morse_code += number_code[ord(i) - 48] + ' '
    return morse_code


def main():
    parser = argparse.ArgumentParser(description="usage:python wav_encrypt.py -i inputfile.wav -p plaintext -o outfile.wav")
    parser.add_argument("-i", help="Wav file name", type=str)
    parser.add_argument("-p", help="plaintext", type=str)
    parser.add_argument("-o", help="output file name", type=str)
    args = parser.parse_args()
    wav_encrypt(args.i, args.p, args.o)


if __name__ == "__main__":
    main()