-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwebui.py
301 lines (257 loc) · 11.2 KB
/
webui.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
import streamlit as st
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import os
from events_guess_only_ambient import predict_audio_events
import tempfile
import matplotlib
matplotlib.use('Agg')
# 设置matplotlib样式
plt.style.use('default')
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
if 'has_displayed_results' not in st.session_state:
st.session_state.has_displayed_results = False
def plot_waveform(y, sr):
"""绘制音频波形图"""
fig = plt.figure(figsize=(12, 3))
ax = fig.add_subplot(1, 1, 1)
ax.plot(np.linspace(0, len(y)/sr, len(y)), y)
ax.set_title('音频波形图')
ax.set_xlabel('时间 (秒)')
ax.set_ylabel('振幅')
ax.grid(True)
plt.tight_layout()
return fig
def plot_melspectrogram(y, sr):
"""绘制梅尔频谱图"""
fig = plt.figure(figsize=(12, 3))
ax = fig.add_subplot(1, 1, 1)
mel_spect = librosa.feature.melspectrogram(y=y, sr=sr)
mel_spect_db = librosa.power_to_db(mel_spect, ref=np.max)
img = librosa.display.specshow(mel_spect_db,
y_axis='mel',
x_axis='time',
ax=ax)
fig.colorbar(img, ax=ax, format='%+2.0f dB')
ax.set_title('梅尔频谱图')
plt.tight_layout()
return fig
def plot_mfcc(y, sr):
"""绘制MFCC特征图"""
fig = plt.figure(figsize=(12, 3))
ax = fig.add_subplot(1, 1, 1)
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
img = librosa.display.specshow(mfccs,
x_axis='time',
ax=ax)
fig.colorbar(img, ax=ax)
ax.set_title('MFCC特征图')
plt.tight_layout()
return fig
def main():
# 设置页面配置
st.set_page_config(
page_title="音频事件分析系统",
layout="wide"
)
# 添加侧边栏
st.sidebar.title("参数设置")
# 添加模型选择
model_type = st.sidebar.radio(
"选择模型类型",
["随机森林 (RF)", "XGBoost (XGB)"],
index=1, # 默认选择XGBoost
help="选择用于检测的模型类型"
)
# 转换为代码中使用的模型类型
model_code_type = "xgb" if "XGBoost" in model_type else "rf"
# 根据模型类型设置默认置信度阈值
default_confidence = 0.8 if model_code_type == "xgb" else 0.6
# 添加无人声音频处理选项
use_ambient_only = st.sidebar.checkbox(
"仅使用无人声部分进行检测",
value=True,
help="启用后,将先分离音频,仅使用无人声部分进行事件检测"
)
# 高级选项折叠面板
with st.sidebar.expander("高级选项"):
custom_model_path = st.text_input(
"自定义模型路径",
value="",
help="如果要使用自定义模型,请输入完整路径 (.pkl)"
)
window_size = st.sidebar.slider(
"窗口大小 (秒)",
min_value=0.5,
max_value=5.0,
value=2.0,
step=0.1,
help="音频分析的时间窗口大小"
)
hop_length = st.sidebar.slider(
"滑动步长 (秒)",
min_value=0.1,
max_value=2.0,
value=1.5,
step=0.1,
help="连续窗口之间的时间间隔"
)
confidence_threshold = st.sidebar.slider(
"置信度阈值",
min_value=0.0,
max_value=1.0,
value=default_confidence,
step=0.05,
help="事件检测的置信度阈值"
)
st.title("音频事件分析系统")
st.write("上传音频文件进行分析")
# 显示当前使用的模型
st.markdown(f"**当前模型**: {model_type}")
# 文件上传
uploaded_file = st.file_uploader("选择音频文件", type=['wav', 'mp3'])
if uploaded_file is not None:
progress_placeholder = st.empty()
status_placeholder = st.empty()
result_placeholder = st.empty()
# 如果已经显示过结果,显示保存的图片和信息
if st.session_state.has_displayed_results:
with result_placeholder.container():
col1, col2 = st.columns([2, 1])
with col1:
if 'waveform' in st.session_state:
st.subheader("波形图")
st.pyplot(st.session_state.waveform)
if 'melspectrogram' in st.session_state:
st.subheader("梅尔频谱图")
st.pyplot(st.session_state.melspectrogram)
if 'mfcc' in st.session_state:
st.subheader("MFCC特征图")
st.pyplot(st.session_state.mfcc)
with col2:
if 'audio_info' in st.session_state:
st.subheader("音频信息")
st.write(st.session_state.audio_info)
if 'events' in st.session_state:
st.subheader("事件检测结果")
if st.session_state.events:
for event, start, end, confidence in st.session_state.events:
with st.expander(f"事件: {event} ({confidence:.2%})"):
st.write(f"开始时间: {start:.2f}秒")
st.write(f"结束时间: {end:.2f}秒")
else:
st.write("未检测到显著事件")
if st.button("开始检测"):
st.session_state.has_displayed_results = True
try:
# 清空之前的结果
result_placeholder.empty()
# 显示进度条和状态文本
progress_bar = progress_placeholder.progress(0)
status_text = status_placeholder.text("正在加载音频...")
# 创建临时文件
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_audio:
# 保存上传的文件到临时文件
temp_audio.write(uploaded_file.read())
temp_audio_path = temp_audio.name
progress_bar.progress(20)
# 从临时文件加载音频
y, sr = librosa.load(temp_audio_path, sr=None)
duration = librosa.get_duration(y=y, sr=sr)
ambient_text = "(仅使用无人声部分)" if use_ambient_only else ""
status_text.text(f"正在使用{model_type}模型{ambient_text}进行事件检测...")
progress_bar.progress(60)
# 准备模型参数
model_params = {
"window_size": window_size,
"hop_length": hop_length,
"confidence_threshold": confidence_threshold,
"model_type": model_code_type,
"use_ambient_only": use_ambient_only
}
# 根据模型类型设置默认模型路径
if not custom_model_path:
if model_code_type == 'rf':
model_params["model_path"] = "models/audio_event_model_segments.pkl"
else: # xgb
model_params["model_path"] = "models/audio_event_model_xgboost.pkl"
else:
# 如果有自定义模型路径
model_params["model_path"] = custom_model_path
# 使用临时文件路径进行事件检测
try:
events = predict_audio_events(
temp_audio_path,
**model_params
)
except Exception as e:
st.error(f"事件检测失败: {str(e)}")
import traceback
st.error(f"详细错误: {traceback.format_exc()}")
events = []
# 删除临时文件
try:
os.unlink(temp_audio_path)
except:
pass
# 使用列布局显示结果
with result_placeholder.container() as results:
col1, col2 = st.columns([2, 1])
with col1:
# 显示波形图
st.subheader("波形图")
try:
plt.close('all')
fig1 = plot_waveform(y, sr)
st.pyplot(fig1)
st.session_state.waveform = fig1
except Exception as e:
st.error(f"波形图生成失败: {str(e)}")
# 显示梅尔频谱图
st.subheader("梅尔频谱图")
try:
plt.close('all')
fig2 = plot_melspectrogram(y, sr)
st.pyplot(fig2)
st.session_state.melspectrogram = fig2
except Exception as e:
st.error(f"梅尔频谱图生成失败: {str(e)}")
# 显示MFCC特征图
st.subheader("MFCC特征图")
try:
plt.close('all')
fig3 = plot_mfcc(y, sr)
st.pyplot(fig3)
st.session_state.mfcc = fig3
except Exception as e:
st.error(f"MFCC特征图生成失败: {str(e)}")
with col2:
# 显示音频信息
st.subheader("音频信息")
audio_info = {
"采样率": f"{sr} Hz",
"时长": f"{duration:.2f} 秒",
"使用模型": model_type,
"仅使用无人声部分": "是" if use_ambient_only else "否"
}
st.write(audio_info)
st.session_state.audio_info = audio_info
# 显示检测结果
st.subheader("事件检测结果")
st.session_state.events = events
if events:
for event, start, end, confidence in events:
with st.expander(f"事件: {event} ({confidence:.2%})"):
st.write(f"开始时间: {start:.2f}秒")
st.write(f"结束时间: {end:.2f}秒")
else:
st.write("未检测到显著事件")
status_text.text("分析完成!")
progress_bar.progress(100)
except Exception as e:
status_placeholder.error(f"处理过程中出错: {str(e)}")
if __name__ == "__main__":
main()