java实现录音并保存为wav格式的音频文件

前言：本意是想像个录屏的软件，这篇先从录音功能开始。
整体思路：采用java官方API——TargetDataLine，从声卡中采集音频数据达到录音效果，采集的数据为PCM裸流，再将PCM转为wav格式。

如果你对音频文件一点也不了解，建议看一下这篇文章中的内容，主要解析了wav文件的格式，并涉及到一些音频有关的概念：
WAV文件格式详解

然后我将分成两部分代码来讲解。

第一部分代码：从声卡中采集数据并保存为pcm文件。

package com;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Scanner;

import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.TargetDataLine;

/**
* 思路：采用java官方API——TargetDataLine，从声卡中采集音频数据达到录音效果，采集的数据为PCM裸流需要转为wav格式的话参照——PCM转WAV 。
* @author Administrator
*
*/
public class Sound {
boolean isStop=false;
//采样率
private static float RATE = 44100f;
//编码格式PCM
private static AudioFormat.Encoding ENCODING = AudioFormat.Encoding.PCM_SIGNED;
//帧大小 16
private static int SAMPLE_SIZE = 16;
//是否大端
private static boolean BIG_ENDIAN = false;//true
//通道数
private static int CHANNELS = 2;

public void save(String path) throws Exception {
//创建指定文件
File file = new File(path);

if(file.isDirectory()) {
if(!file.exists()) {
file.mkdirs();
}
file.createNewFile();
}
//设置格式
AudioFormat audioFormat = new AudioFormat(ENCODING,RATE, SAMPLE_SIZE, CHANNELS, (SAMPLE_SIZE / 8) * CHANNELS,
RATE, BIG_ENDIAN);
//获取线路
TargetDataLine targetDataLine = AudioSystem.getTargetDataLine(audioFormat);
targetDataLine.open();
targetDataLine.start();

/**targetDataLine.read()
* 从数据线的输入缓冲区读取音频数据，该方法会阻塞，当数据先关闭之后就不会阻塞了
*/
Thread thread=new Thread() {
int flag = 0;
OutputStream os = new FileOutputStream(file);
byte[] b = new byte[256];
public void run() {
while((flag = targetDataLine.read(b, 0, b.length))>0) {//从声卡中采集数据
try {
os.write(b);
} catch (IOException e) {
// TODO 自动生成的 catch 块
e.printStackTrace();
}
// System.out.println(flag);
if(isStop) {
isStop=false;
break;
}

}
}
};

thread.start();
//监听按键
Thread thread2=new Thread() {
public void run() {
Scanner in=new Scanner(System.in);
if(in.next().equals("s")) {
isStop=true;
}
}
};
thread2.start();
}
}

第二部分代码：将pcm文件转换为wav格式的文件。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183

package com;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;

public class PcmToWave {

/**
*
* @param src
* src[0]指定pcm文件位置，src[1]指定输出的wav文件存放位置
* @throws Exception
*/
public static void convertAudioFiles(String[] src) throws Exception {
FileInputStream fis = new FileInputStream(src[0]);

//获取PCM文件大小
File file=new File(src[0]);
int PCMSize =(int) file.length();

//定义wav文件头
//填入参数，比特率等等。这里用的是16位单声道 8000 hz
WaveHeader header = new WaveHeader(PCMSize);
//长度字段 = 内容的大小（PCMSize) + 头部字段的大小(不包括前面4字节的标识符RIFF以及fileLength本身的4字节)
header.fileLength = PCMSize + (44 - 8);
header.FmtHdrLeth = 16;
header.BitsPerSample = 16;
header.Channels = 1;
header.FormatTag = 0x0001;
header.SamplesPerSec = 44100;//8000;
header.BlockAlign = (short)(header.Channels * header.BitsPerSample / 8);
header.AvgBytesPerSec = header.BlockAlign * header.SamplesPerSec;
header.DataHdrLeth = PCMSize;

//获取wav文件头字节数组
byte[] h = header.getHeader();

assert h.length == 44; //WAV标准，头部应该是44字节
System.out.println((PCMSize+44));
// auline.write(h, 0, h.length);

byte[] b = new byte[10];

//将文件头写入文件
FileOutputStream fs = new FileOutputStream(src[1]);
fs.write(h);
//将pcm文件写到文件头后面
FileInputStream fiss = new FileInputStream(src[0]);
byte[] bb = new byte[10];
int len = -1;
while((len = fiss.read(bb))>0) {

fs.write(bb, 0, len);
}

}
}

/**
* WavHeader辅助类。用于生成头部信息。
* @author Administrator
*
*/
class WaveHeader {

/**wav文件头：RIFF区块
* 名称偏移地址字节数端序内容
* ID 0x00 4Byte 大端 'RIFF' (0x52494646)
Size 0x04 4Byte 小端 fileSize - 8
Type 0x08 4Byte 大端 'WAVE'(0x57415645)
解析：
以'RIFF'为标识
Size是整个文件的长度减去ID和Size的长度
Type是WAVE表示后面需要两个子块：Format区块和Data区块
*/
/**
* FORMAT区块：
* 名称偏移地址字节数端序内容
ID 0x00 4Byte 大端 'fmt ' (0x666D7420)
Size 0x04 4Byte 小端 16
AudioFormat 0x08 2Byte 小端音频格式
NumChannels 0x0A 2Byte 小端声道数
SampleRate 0x0C 4Byte 小端采样率
ByteRate 0x10 4Byte 小端每秒数据字节数
BlockAlign 0x14 2Byte 小端数据块对齐
BitsPerSample 0x16 2Byte 小端采样位数
解析：
以'fmt '为标识
Size表示该区块数据的长度（不包含ID和Size的长度）
AudioFormat表示Data区块存储的音频数据的格式，PCM音频数据的值为1
NumChannels表示音频数据的声道数，1：单声道，2：双声道
SampleRate表示音频数据的采样率
ByteRate每秒数据字节数 = SampleRate * NumChannels * BitsPerSample / 8
BlockAlign每个采样所需的字节数 = NumChannels * BitsPerSample / 8
BitsPerSample每个采样存储的bit数，8：8bit，16：16bit，32：32bit
*/
/**
* DATA区块
*
* 名称偏移地址字节数端序内容
ID 0x00 4Byte 大端 'data' (0x64617461)
Size 0x04 4Byte 小端 N
Data 0x08 NByte 小端音频数据
解析：
以'data'为标识
Size表示音频数据的长度，N = ByteRate * seconds
Data音频数据

*/

public final char fileID[] = {'R', 'I', 'F', 'F'};
public int fileLength;
public short FormatTag;
public short Channels;
public int SamplesPerSec;
public int AvgBytesPerSec;
public short BlockAlign;
public short BitsPerSample;
public char DataHdrID[] = {'d','a','t','a'};
public int DataHdrLeth;
public char wavTag[] = {'W', 'A', 'V', 'E'};;
public char FmtHdrID[] = {'f', 'm', 't', ' '};
public int FmtHdrLeth;

public WaveHeader() {}//无参构造方法
/**
*
* @param a
*/
public WaveHeader(int a) {

}

public byte[] getHeader() throws IOException {
//创建一个输出流，用于将各个字节数组写入缓存中，缓存区会自动增长。然后可以将整个输出流转换为完整的字节数组，关闭该流不会有任何效果。
ByteArrayOutputStream bos = new ByteArrayOutputStream();
WriteChar(bos, fileID);
WriteInt(bos, fileLength);
WriteChar(bos, wavTag);
WriteChar(bos, FmtHdrID);
WriteInt(bos,FmtHdrLeth);
WriteShort(bos,FormatTag);
WriteShort(bos,Channels);
WriteInt(bos,SamplesPerSec);
WriteInt(bos,AvgBytesPerSec);
WriteShort(bos,BlockAlign);
WriteShort(bos,BitsPerSample);
WriteChar(bos,DataHdrID);
WriteInt(bos,DataHdrLeth);
bos.flush();
byte[] r = bos.toByteArray();
bos.close();
return r;
}

private void WriteShort(ByteArrayOutputStream bos, int s) throws IOException {
byte[] mybyte = new byte[2];
mybyte[1] =(byte)( (s << 16) >> 24 );//存放高位
mybyte[0] =(byte)( (s << 24) >> 24 );//存放低位
bos.write(mybyte);
}

private void WriteInt(ByteArrayOutputStream bos, int n) throws IOException {
byte[] buf = new byte[4];
buf[3] =(byte)( n >> 24 );
buf[2] =(byte)( (n << 8) >> 24 );
buf[1] =(byte)( (n << 16) >> 24 );
buf[0] =(byte)( (n << 24) >> 24 );
bos.write(buf);
}

private void WriteChar(ByteArrayOutputStream bos, char[] id) {
for (int i=0; i<id.length; i++) {
char c = id[i];
bos.write(c);
}
}
}

这个需要注意的一件事就是，wav格式的文件是以小端形式来存储的，即低位存放在低位内存中，高位存放在高位内存中。所以你在生成pcm文件的时候，一定要以小端形式存储数据，否则播放音频文件的时候完全是混乱的杂音。另外，还需要注意的是，pcm文件与wav文件的采样率要一致，它会根据采样率来计算播放时长。