From 4e32dd2c0bb811327bad2133b99339b4e7f717ae Mon Sep 17 00:00:00 2001 From: Leedong414 <165615367+Leedong414@users.noreply.github.com> Date: Wed, 29 May 2024 00:49:13 +0900 Subject: [PATCH 1/6] =?UTF-8?q?class=20text=20=EC=B6=94=EC=B6=9C=ED=95=9C?= =?UTF-8?q?=20=ED=8C=8C=EC=9D=BC=EC=9D=84=20=EC=9D=8C=EC=84=B1=EC=9C=BC?= =?UTF-8?q?=EB=A1=9C=20=EC=B6=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Untitled1.ipynb | 355 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 355 insertions(+) create mode 100644 Untitled1.ipynb diff --git a/Untitled1.ipynb b/Untitled1.ipynb new file mode 100644 index 000000000000..9ea3d6f067f6 --- /dev/null +++ b/Untitled1.ipynb @@ -0,0 +1,355 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyNnTQLE9Hln//EdaNSeva+y", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8pmMHhGEOE2B" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NBJOrKoMOGT5", + "outputId": "f1b29278-57c4-4886-93a5-ecf01612288e" + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Mounted at /content/drive\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "pip install gtts" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "collapsed": true, + "id": "BeEnl52JOKNY", + "outputId": "7b73f30b-f650-45ef-af5f-4b5f7c85eff3" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting gtts\n", + " Downloading gTTS-2.5.1-py3-none-any.whl (29 kB)\n", + "Requirement already satisfied: requests<3,>=2.27 in /usr/local/lib/python3.10/dist-packages (from gtts) (2.31.0)\n", + "Requirement already satisfied: click<8.2,>=7.1 in /usr/local/lib/python3.10/dist-packages (from gtts) (8.1.7)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gtts) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gtts) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gtts) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gtts) (2024.2.2)\n", + "Installing collected packages: gtts\n", + "Successfully installed gtts-2.5.1\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from gtts import gTTS\n", + "\n", + "def speak(text):\n", + "\ttts = gTTS(text=text, lang='ko')\n", + "\ttts.save('voice.mp3')\n", + "\n", + "speak(\"안녕하세요, 저는 IML이에요.\")" + ], + "metadata": { + "id": "VnDDOpZjOYGe" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "tts = gTTS(text=text, lang='ko')\n", + "mp3_fp = BytesIO()\n", + "tts.write_to_fp(mp3_fp)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 184 + }, + "id": "jLuBkQBZO5cv", + "outputId": "3de9b61b-8168-4272-c409-5d5b6eca5c7d" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "error", + "ename": "NameError", + "evalue": "name 'text' is not defined", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgTTS\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlang\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'ko'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mmp3_fp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBytesIO\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mtts\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite_to_fp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmp3_fp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'text' is not defined" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "%cd /content/drive/MyDrive/yolov5" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BJw9zD_OUeYI", + "outputId": "f8e4baf3-9809-4504-d5db-2f0a487910d6" + }, + "execution_count": 16, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/drive/MyDrive/yolov5\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!python detect.py --weights /content/drive/MyDrive/yolov5/runs/train/Project_yolo5/weights/best.pt --conf 0.5 --source /content/drive/MyDrive/13.jpg\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "44W31K33V2nK", + "outputId": "c38dd175-49ca-4622-b830-7603bd7a9b38" + }, + "execution_count": 20, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Using cache found in /root/.cache/torch/hub/ultralytics_yolov5_master\n", + "YOLOv5 🚀 2024-5-28 Python-3.10.12 torch-2.3.0+cu121 CPU\n", + "\n", + "Fusing layers... \n", + "YOLOv5s summary: 157 layers, 7037095 parameters, 0 gradients, 15.8 GFLOPs\n", + "Adding AutoShape... \n", + "Detected Classes:\n", + "Hardhat\n", + "NO-Mask\n", + "NO-Mask\n", + "Hardhat\n", + "Safety Vest\n", + "sh: 1: mpg321: not found\n", + "\u001b[34m\u001b[1mdetect: \u001b[0mweights=['/content/drive/MyDrive/yolov5/runs/train/Project_yolo5/weights/best.pt'], source=/content/drive/MyDrive/13.jpg, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.5, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_csv=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1\n", + "YOLOv5 🚀 2024-5-28 Python-3.10.12 torch-2.3.0+cu121 CPU\n", + "\n", + "Fusing layers... \n", + "YOLOv5s summary: 157 layers, 7037095 parameters, 0 gradients, 15.8 GFLOPs\n", + "image 1/1 /content/drive/MyDrive/13.jpg: 608x640 2 Hardhats, 2 NO-Masks, 1 Safety Vest, 674.3ms\n", + "Speed: 9.7ms pre-process, 674.3ms inference, 1.1ms NMS per image at shape (1, 3, 640, 640)\n", + "Results saved to \u001b[1mruns/detect/exp3\u001b[0m\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import pyaudio\n", + "import wave\n", + "import speech_recognition as sr\n", + "\n", + "def audio_save():\n", + "\n", + " FORMAT = pyaudio.paInt16 # 16비트 형식으로 설정\n", + " CHANNELS = 1\n", + " RATE = 44100\n", + " CHUNK = 1024\n", + " RECORD_SECONDS = 10\n", + " OUTPUT_FILENAME = \"recorded_audio.wav\" # 저장할 파일 이름\n", + "\n", + "\n", + " audio = pyaudio.PyAudio()\n", + "\n", + " stream = audio.open(format=FORMAT, channels=CHANNELS,\n", + " rate=RATE, input=True,\n", + " frames_per_buffer=CHUNK)\n", + "\n", + " print(\"녹음 시작...\")\n", + "\n", + " frames = []\n", + " for _ in range(0, int(RATE / CHUNK * RECORD_SECONDS)):\n", + " data = stream.read(CHUNK)\n", + " frames.append(data)\n", + "\n", + " print(\"녹음 완료.\")\n", + "\n", + "\n", + " stream.stop_stream()\n", + " stream.close()\n", + " audio.terminate()\n", + "\n", + " # 음성 파일로 저장\n", + " with wave.open(OUTPUT_FILENAME, 'wb') as wf:\n", + " wf.setnchannels(CHANNELS)\n", + " wf.setsampwidth(audio.get_sample_size(FORMAT))\n", + " wf.setframerate(RATE)\n", + " wf.writeframes(b''.join(frames))\n", + "\n", + " return print(\"파일 저장 완료:\", OUTPUT_FILENAME)\n", + "\n", + "def tts_module():\n", + "\n", + " r = sr.Recognizer()\n", + " kr_audio = sr.AudioFile('recorded_audio.wav')\n", + "\n", + " with kr_audio as source:\n", + " audio = r.record(source)\n", + "\n", + " a=r.recognize_google(audio, language='ko-KR')\n", + " print(r.recognize_google(audio, language='ko-KR'))\n", + "\n", + "\n", + "\n", + " return a\n", + "\n", + "if __name__ == '__main__':\n", + " audio_save()\n", + " sentiment_data=tts_module()\n", + " print(\"a\",sentiment_data)" + ], + "metadata": { + "id": "-uVK5NOKaiF6" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "sentiment_data=='내 눈 앞에 뭐가 있어'" + ], + "metadata": { + "id": "P9WufmsAbG02" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import speech_recognition as sr\n", + "import subprocess\n", + "\n", + "def tts_module(audio_file):\n", + " r = sr.Recognizer()\n", + "\n", + " with sr.AudioFile(audio_file) as source:\n", + " audio = r.record(source)\n", + "\n", + " try:\n", + " recognized_text = r.recognize_google(audio, language='ko-KR')\n", + " print(\"인식된 텍스트:\", recognized_text)\n", + " return recognized_text\n", + " except sr.UnknownValueError:\n", + " print(\"Google Speech Recognition이 음성을 인식할 수 없습니다.\")\n", + " return \"\"\n", + " except sr.RequestError as e:\n", + " print(f\"Google Speech Recognition 서비스에 요청할 수 없습니다; {e}\")\n", + " return \"\"\n", + "\n", + "if __name__ == '__main__':\n", + " audio_file = '/content/drive/MyDrive/recorded_audio (1).wav' # 이미 녹음된 음성 파일 경로\n", + " recognized_text = tts_module(audio_file)\n", + "\n", + " if recognized_text == \"내 앞에 뭐 있어\":\n", + " print(\"명령어를 인식했습니다. detect.py를 실행합니다.\")\n", + " subprocess.run([\"python\", \"/content/drive/MyDrive/yolov5/detect.py\", \"--weights\", \"/content/drive/MyDrive/yolov5/runs/train/Project_yolo5/weights/best.pt\", \"--conf\", \"0.5\", \"--source\",\"/content/drive/MyDrive/33.jpg\"\n", + "])\n", + "\n", + " else:\n", + " print(\"인식된 명령어가 없습니다.\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KXSMitA_xiJE", + "outputId": "e667394c-2649-446c-c41e-22b915a7bec1" + }, + "execution_count": 101, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "인식된 텍스트: 내 앞에 뭐 있어\n", + "명령어를 인식했습니다. detect.py를 실행합니다.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "K1iWPfmVx1Wp" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file From ede96a6b9a30570663b4e8b1f0b9f3824c00e795 Mon Sep 17 00:00:00 2001 From: Leedong414 <165615367+Leedong414@users.noreply.github.com> Date: Wed, 29 May 2024 01:19:41 +0900 Subject: [PATCH 2/6] Update detect.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit class text 추출 text 추출한걸 음성으로 출력 Signed-off-by: Leedong414 <165615367+Leedong414@users.noreply.github.com> --- detect.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/detect.py b/detect.py index c58aa80a68fc..48041c1ced0b 100644 --- a/detect.py +++ b/detect.py @@ -37,6 +37,57 @@ import torch +from gtts import gTTS + + + +def detect(opt): + source, weights, conf, save_txt_path = opt.source, opt.weights, opt.conf, opt.save_txt_path + + # 모델 로드 + model = torch.hub.load('ultralytics/yolov5', 'custom', path=weights) + + # 이미지 로드 및 추론 수행 + results = model(source) + + # 결과를 pandas 데이터프레임으로 변환 + results_df = results.pandas().xyxy[0] + + # 데이터프레임의 클래스 컬럼 추출 + classes = results_df['name'].tolist() + + # 클래스 정보를 텍스트 파일로 저장 + with open(save_txt_path, 'w') as f: + for cls in classes: + f.write(f"{cls}\n") + + # 콘솔에 출력 + print("Detected Classes:") + for cls in classes: + print(cls) + + # TTS를 사용하여 클래스 이름들을 음성으로 변환 + if classes: + text_to_speak = 'Detected classes are: ' + ', '.join(classes) + tts = gTTS(text=text_to_speak, lang='en') + tts.save("detected_classes.mp3") + os.system("mpg321 detected_classes.mp3") # mpg321 설치 필요 (리눅스의 경우) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--weights', type=str, default='./runs/train/HongRyeon_yolov5s_results/weights/best.pt', help='model.pt path') + parser.add_argument('--source', type=str, default='/HongRyeon_test01.jpg', help='source') # file/folder, 0 for webcam + parser.add_argument('--conf', type=float, default=0.5, help='object confidence threshold') + parser.add_argument('--save_txt_path', type=str, default='detected_classes.txt', help='path to save detected classes txt file') + opt = parser.parse_args() + + with torch.no_grad(): + detect(opt) + + + + + FILE = Path(__file__).resolve() ROOT = FILE.parents[0] # YOLOv5 root directory if str(ROOT) not in sys.path: From 5329de2af7e63c24308d7469e57961c81cb313b4 Mon Sep 17 00:00:00 2001 From: UltralyticsAssistant Date: Tue, 28 May 2024 16:20:23 +0000 Subject: [PATCH 3/6] Auto-format by https://ultralytics.com/actions --- detect.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/detect.py b/detect.py index 48041c1ced0b..2c6b489277a5 100644 --- a/detect.py +++ b/detect.py @@ -36,16 +36,14 @@ from pathlib import Path import torch - from gtts import gTTS - def detect(opt): source, weights, conf, save_txt_path = opt.source, opt.weights, opt.conf, opt.save_txt_path # 모델 로드 - model = torch.hub.load('ultralytics/yolov5', 'custom', path=weights) + model = torch.hub.load("ultralytics/yolov5", "custom", path=weights) # 이미지 로드 및 추론 수행 results = model(source) @@ -54,10 +52,10 @@ def detect(opt): results_df = results.pandas().xyxy[0] # 데이터프레임의 클래스 컬럼 추출 - classes = results_df['name'].tolist() + classes = results_df["name"].tolist() # 클래스 정보를 텍스트 파일로 저장 - with open(save_txt_path, 'w') as f: + with open(save_txt_path, "w") as f: for cls in classes: f.write(f"{cls}\n") @@ -68,26 +66,30 @@ def detect(opt): # TTS를 사용하여 클래스 이름들을 음성으로 변환 if classes: - text_to_speak = 'Detected classes are: ' + ', '.join(classes) - tts = gTTS(text=text_to_speak, lang='en') + text_to_speak = "Detected classes are: " + ", ".join(classes) + tts = gTTS(text=text_to_speak, lang="en") tts.save("detected_classes.mp3") os.system("mpg321 detected_classes.mp3") # mpg321 설치 필요 (리눅스의 경우) + if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('--weights', type=str, default='./runs/train/HongRyeon_yolov5s_results/weights/best.pt', help='model.pt path') - parser.add_argument('--source', type=str, default='/HongRyeon_test01.jpg', help='source') # file/folder, 0 for webcam - parser.add_argument('--conf', type=float, default=0.5, help='object confidence threshold') - parser.add_argument('--save_txt_path', type=str, default='detected_classes.txt', help='path to save detected classes txt file') + parser.add_argument( + "--weights", type=str, default="./runs/train/HongRyeon_yolov5s_results/weights/best.pt", help="model.pt path" + ) + parser.add_argument( + "--source", type=str, default="/HongRyeon_test01.jpg", help="source" + ) # file/folder, 0 for webcam + parser.add_argument("--conf", type=float, default=0.5, help="object confidence threshold") + parser.add_argument( + "--save_txt_path", type=str, default="detected_classes.txt", help="path to save detected classes txt file" + ) opt = parser.parse_args() with torch.no_grad(): detect(opt) - - - FILE = Path(__file__).resolve() ROOT = FILE.parents[0] # YOLOv5 root directory if str(ROOT) not in sys.path: From ba176b29a3f76ca215417e2b8b68f197f07bedf5 Mon Sep 17 00:00:00 2001 From: Leedong414 <165615367+Leedong414@users.noreply.github.com> Date: Wed, 29 May 2024 01:23:31 +0900 Subject: [PATCH 4/6] Update detect.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit class text 추출 text 추출한걸 음성으로 출력 Signed-off-by: Leedong414 <165615367+Leedong414@users.noreply.github.com> --- detect.py | 175 +++++++++++++++++++++++++----------------------------- 1 file changed, 80 insertions(+), 95 deletions(-) diff --git a/detect.py b/detect.py index 2c6b489277a5..c7d6eab26936 100644 --- a/detect.py +++ b/detect.py @@ -42,34 +42,27 @@ def detect(opt): source, weights, conf, save_txt_path = opt.source, opt.weights, opt.conf, opt.save_txt_path - # 모델 로드 model = torch.hub.load("ultralytics/yolov5", "custom", path=weights) - # 이미지 로드 및 추론 수행 results = model(source) - # 결과를 pandas 데이터프레임으로 변환 results_df = results.pandas().xyxy[0] - # 데이터프레임의 클래스 컬럼 추출 classes = results_df["name"].tolist() - # 클래스 정보를 텍스트 파일로 저장 with open(save_txt_path, "w") as f: for cls in classes: f.write(f"{cls}\n") - # 콘솔에 출력 print("Detected Classes:") for cls in classes: print(cls) - # TTS를 사용하여 클래스 이름들을 음성으로 변환 if classes: text_to_speak = "Detected classes are: " + ", ".join(classes) tts = gTTS(text=text_to_speak, lang="en") tts.save("detected_classes.mp3") - os.system("mpg321 detected_classes.mp3") # mpg321 설치 필요 (리눅스의 경우) + os.system("mpg321 detected_classes.mp3") if __name__ == "__main__": @@ -79,7 +72,7 @@ def detect(opt): ) parser.add_argument( "--source", type=str, default="/HongRyeon_test01.jpg", help="source" - ) # file/folder, 0 for webcam + ) parser.add_argument("--conf", type=float, default=0.5, help="object confidence threshold") parser.add_argument( "--save_txt_path", type=str, default="detected_classes.txt", help="path to save detected classes txt file" @@ -91,10 +84,10 @@ def detect(opt): FILE = Path(__file__).resolve() -ROOT = FILE.parents[0] # YOLOv5 root directory +ROOT = FILE.parents[0] if str(ROOT) not in sys.path: - sys.path.append(str(ROOT)) # add ROOT to PATH -ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative + sys.path.append(str(ROOT)) +ROOT = Path(os.path.relpath(ROOT, Path.cwd())) from ultralytics.utils.plotting import Annotator, colors, save_one_box @@ -121,56 +114,56 @@ def detect(opt): @smart_inference_mode() def run( - weights=ROOT / "yolov5s.pt", # model path or triton URL - source=ROOT / "data/images", # file/dir/URL/glob/screen/0(webcam) - data=ROOT / "data/coco128.yaml", # dataset.yaml path - imgsz=(640, 640), # inference size (height, width) - conf_thres=0.25, # confidence threshold - iou_thres=0.45, # NMS IOU threshold - max_det=1000, # maximum detections per image - device="", # cuda device, i.e. 0 or 0,1,2,3 or cpu - view_img=False, # show results - save_txt=False, # save results to *.txt - save_csv=False, # save results in CSV format - save_conf=False, # save confidences in --save-txt labels - save_crop=False, # save cropped prediction boxes - nosave=False, # do not save images/videos - classes=None, # filter by class: --class 0, or --class 0 2 3 - agnostic_nms=False, # class-agnostic NMS - augment=False, # augmented inference - visualize=False, # visualize features - update=False, # update all models - project=ROOT / "runs/detect", # save results to project/name - name="exp", # save results to project/name - exist_ok=False, # existing project/name ok, do not increment - line_thickness=3, # bounding box thickness (pixels) - hide_labels=False, # hide labels - hide_conf=False, # hide confidences - half=False, # use FP16 half-precision inference - dnn=False, # use OpenCV DNN for ONNX inference - vid_stride=1, # video frame-rate stride + weights=ROOT / "yolov5s.pt", + source=ROOT / "data/images", + data=ROOT / "data/coco128.yaml", + imgsz=(640, 640), + conf_thres=0.25, + iou_thres=0.45, + max_det=1000, + device="", + view_img=False, + save_txt=False, + save_csv=False, + save_conf=False, + save_crop=False, + nosave=False, + classes=None, + agnostic_nms=False, + augment=False, + visualize=False, + update=False, + project=ROOT / "runs/detect", + name="exp", + exist_ok=False, + line_thickness=3, + hide_labels=False, + hide_conf=False, + half=False, + dnn=False, + vid_stride=1, ): source = str(source) - save_img = not nosave and not source.endswith(".txt") # save inference images + save_img = not nosave and not source.endswith(".txt") is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) is_url = source.lower().startswith(("rtsp://", "rtmp://", "http://", "https://")) webcam = source.isnumeric() or source.endswith(".streams") or (is_url and not is_file) screenshot = source.lower().startswith("screen") if is_url and is_file: - source = check_file(source) # download + source = check_file(source) + + + save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) + (save_dir / "labels" if save_txt else save_dir).mkdir(parents=True, exist_ok=True) - # Directories - save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run - (save_dir / "labels" if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir - # Load model device = select_device(device) model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) stride, names, pt = model.stride, model.names, model.pt - imgsz = check_img_size(imgsz, s=stride) # check image size + imgsz = check_img_size(imgsz, s=stride) - # Dataloader - bs = 1 # batch_size + + bs = 1 if webcam: view_img = check_imshow(warn=True) dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) @@ -181,20 +174,20 @@ def run( dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) vid_path, vid_writer = [None] * bs, [None] * bs - # Run inference - model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup + + model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) seen, windows, dt = 0, [], (Profile(device=device), Profile(device=device), Profile(device=device)) for path, im, im0s, vid_cap, s in dataset: with dt[0]: im = torch.from_numpy(im).to(model.device) - im = im.half() if model.fp16 else im.float() # uint8 to fp16/32 - im /= 255 # 0 - 255 to 0.0 - 1.0 + im = im.half() if model.fp16 else im.float() + im /= 255 if len(im.shape) == 3: - im = im[None] # expand for batch dim + im = im[None] if model.xml and im.shape[0] > 1: ims = torch.chunk(im, im.shape[0], 0) - # Inference + with dt[1]: visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False if model.xml and im.shape[0] > 1: @@ -207,17 +200,13 @@ def run( pred = [pred, None] else: pred = model(im, augment=augment, visualize=visualize) - # NMS + with dt[2]: pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) - # Second-stage classifier (optional) - # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s) - # Define the path for the CSV file csv_path = save_dir / "predictions.csv" - # Create or append to the CSV file def write_to_csv(image_name, prediction, confidence): """Writes prediction data for an image to a CSV file, appending if the file exists.""" data = {"Image Name": image_name, "Prediction": prediction, "Confidence": confidence} @@ -227,34 +216,33 @@ def write_to_csv(image_name, prediction, confidence): writer.writeheader() writer.writerow(data) - # Process predictions - for i, det in enumerate(pred): # per image + for i, det in enumerate(pred): seen += 1 - if webcam: # batch_size >= 1 + if webcam: p, im0, frame = path[i], im0s[i].copy(), dataset.count s += f"{i}: " else: p, im0, frame = path, im0s.copy(), getattr(dataset, "frame", 0) - p = Path(p) # to Path - save_path = str(save_dir / p.name) # im.jpg - txt_path = str(save_dir / "labels" / p.stem) + ("" if dataset.mode == "image" else f"_{frame}") # im.txt - s += "%gx%g " % im.shape[2:] # print string - gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh - imc = im0.copy() if save_crop else im0 # for save_crop + p = Path(p) + save_path = str(save_dir / p.name) + txt_path = str(save_dir / "labels" / p.stem) + ("" if dataset.mode == "image" else f"_{frame}") + s += "%gx%g " % im.shape[2:] + gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] + imc = im0.copy() if save_crop else im0 annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): - # Rescale boxes from img_size to im0 size + det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round() - # Print results + for c in det[:, 5].unique(): - n = (det[:, 5] == c).sum() # detections per class - s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string + n = (det[:, 5] == c).sum() + s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " - # Write results + for *xyxy, conf, cls in reversed(det): - c = int(cls) # integer class + c = int(cls) label = names[c] if hide_conf else f"{names[c]}" confidence = float(conf) confidence_str = f"{confidence:.2f}" @@ -262,59 +250,56 @@ def write_to_csv(image_name, prediction, confidence): if save_csv: write_to_csv(p.name, label, confidence_str) - if save_txt: # Write to file - xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh - line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format + if save_txt: + xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() + line = (cls, *xywh, conf) if save_conf else (cls, *xywh) with open(f"{txt_path}.txt", "a") as f: f.write(("%g " * len(line)).rstrip() % line + "\n") - if save_img or save_crop or view_img: # Add bbox to image - c = int(cls) # integer class + if save_img or save_crop or view_img: + c = int(cls) label = None if hide_labels else (names[c] if hide_conf else f"{names[c]} {conf:.2f}") annotator.box_label(xyxy, label, color=colors(c, True)) if save_crop: save_one_box(xyxy, imc, file=save_dir / "crops" / names[c] / f"{p.stem}.jpg", BGR=True) - # Stream results + im0 = annotator.result() if view_img: if platform.system() == "Linux" and p not in windows: windows.append(p) - cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux) + cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0]) cv2.imshow(str(p), im0) - cv2.waitKey(1) # 1 millisecond + cv2.waitKey(1) - # Save results (image with detections) if save_img: if dataset.mode == "image": cv2.imwrite(save_path, im0) - else: # 'video' or 'stream' - if vid_path[i] != save_path: # new video + else: + if vid_path[i] != save_path: vid_path[i] = save_path if isinstance(vid_writer[i], cv2.VideoWriter): - vid_writer[i].release() # release previous video writer - if vid_cap: # video + vid_writer[i].release() + if vid_cap: fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - else: # stream + else: fps, w, h = 30, im0.shape[1], im0.shape[0] - save_path = str(Path(save_path).with_suffix(".mp4")) # force *.mp4 suffix on results videos + save_path = str(Path(save_path).with_suffix(".mp4")) vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) vid_writer[i].write(im0) - # Print time (inference-only) LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms") - # Print results - t = tuple(x.t / seen * 1e3 for x in dt) # speeds per image + t = tuple(x.t / seen * 1e3 for x in dt) LOGGER.info(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}" % t) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else "" LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") if update: - strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning) + strip_optimizer(weights[0]) def parse_opt(): @@ -349,7 +334,7 @@ def parse_opt(): parser.add_argument("--dnn", action="store_true", help="use OpenCV DNN for ONNX inference") parser.add_argument("--vid-stride", type=int, default=1, help="video frame-rate stride") opt = parser.parse_args() - opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand + opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 print_args(vars(opt)) return opt From 17d5fd4e7626ec761d3d38514ffac7306458cfc2 Mon Sep 17 00:00:00 2001 From: UltralyticsAssistant Date: Tue, 28 May 2024 16:23:52 +0000 Subject: [PATCH 5/6] Auto-format by https://ultralytics.com/actions --- detect.py | 152 +++++++++++++++++++++++++----------------------------- 1 file changed, 70 insertions(+), 82 deletions(-) diff --git a/detect.py b/detect.py index c7d6eab26936..a9cad32050a0 100644 --- a/detect.py +++ b/detect.py @@ -62,7 +62,7 @@ def detect(opt): text_to_speak = "Detected classes are: " + ", ".join(classes) tts = gTTS(text=text_to_speak, lang="en") tts.save("detected_classes.mp3") - os.system("mpg321 detected_classes.mp3") + os.system("mpg321 detected_classes.mp3") if __name__ == "__main__": @@ -70,9 +70,7 @@ def detect(opt): parser.add_argument( "--weights", type=str, default="./runs/train/HongRyeon_yolov5s_results/weights/best.pt", help="model.pt path" ) - parser.add_argument( - "--source", type=str, default="/HongRyeon_test01.jpg", help="source" - ) + parser.add_argument("--source", type=str, default="/HongRyeon_test01.jpg", help="source") parser.add_argument("--conf", type=float, default=0.5, help="object confidence threshold") parser.add_argument( "--save_txt_path", type=str, default="detected_classes.txt", help="path to save detected classes txt file" @@ -84,10 +82,10 @@ def detect(opt): FILE = Path(__file__).resolve() -ROOT = FILE.parents[0] +ROOT = FILE.parents[0] if str(ROOT) not in sys.path: - sys.path.append(str(ROOT)) -ROOT = Path(os.path.relpath(ROOT, Path.cwd())) + sys.path.append(str(ROOT)) +ROOT = Path(os.path.relpath(ROOT, Path.cwd())) from ultralytics.utils.plotting import Annotator, colors, save_one_box @@ -114,56 +112,53 @@ def detect(opt): @smart_inference_mode() def run( - weights=ROOT / "yolov5s.pt", - source=ROOT / "data/images", - data=ROOT / "data/coco128.yaml", - imgsz=(640, 640), - conf_thres=0.25, - iou_thres=0.45, - max_det=1000, - device="", - view_img=False, - save_txt=False, - save_csv=False, - save_conf=False, - save_crop=False, - nosave=False, - classes=None, - agnostic_nms=False, - augment=False, - visualize=False, - update=False, - project=ROOT / "runs/detect", - name="exp", - exist_ok=False, - line_thickness=3, - hide_labels=False, - hide_conf=False, - half=False, - dnn=False, - vid_stride=1, + weights=ROOT / "yolov5s.pt", + source=ROOT / "data/images", + data=ROOT / "data/coco128.yaml", + imgsz=(640, 640), + conf_thres=0.25, + iou_thres=0.45, + max_det=1000, + device="", + view_img=False, + save_txt=False, + save_csv=False, + save_conf=False, + save_crop=False, + nosave=False, + classes=None, + agnostic_nms=False, + augment=False, + visualize=False, + update=False, + project=ROOT / "runs/detect", + name="exp", + exist_ok=False, + line_thickness=3, + hide_labels=False, + hide_conf=False, + half=False, + dnn=False, + vid_stride=1, ): source = str(source) - save_img = not nosave and not source.endswith(".txt") + save_img = not nosave and not source.endswith(".txt") is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) is_url = source.lower().startswith(("rtsp://", "rtmp://", "http://", "https://")) webcam = source.isnumeric() or source.endswith(".streams") or (is_url and not is_file) screenshot = source.lower().startswith("screen") if is_url and is_file: - source = check_file(source) - - - save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) - (save_dir / "labels" if save_txt else save_dir).mkdir(parents=True, exist_ok=True) + source = check_file(source) + save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) + (save_dir / "labels" if save_txt else save_dir).mkdir(parents=True, exist_ok=True) device = select_device(device) model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) stride, names, pt = model.stride, model.names, model.pt - imgsz = check_img_size(imgsz, s=stride) + imgsz = check_img_size(imgsz, s=stride) - - bs = 1 + bs = 1 if webcam: view_img = check_imshow(warn=True) dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) @@ -174,20 +169,18 @@ def run( dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) vid_path, vid_writer = [None] * bs, [None] * bs - - model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) + model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) seen, windows, dt = 0, [], (Profile(device=device), Profile(device=device), Profile(device=device)) for path, im, im0s, vid_cap, s in dataset: with dt[0]: im = torch.from_numpy(im).to(model.device) - im = im.half() if model.fp16 else im.float() - im /= 255 + im = im.half() if model.fp16 else im.float() + im /= 255 if len(im.shape) == 3: - im = im[None] + im = im[None] if model.xml and im.shape[0] > 1: ims = torch.chunk(im, im.shape[0], 0) - with dt[1]: visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False if model.xml and im.shape[0] > 1: @@ -200,11 +193,10 @@ def run( pred = [pred, None] else: pred = model(im, augment=augment, visualize=visualize) - + with dt[2]: pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) - csv_path = save_dir / "predictions.csv" def write_to_csv(image_name, prediction, confidence): @@ -216,33 +208,30 @@ def write_to_csv(image_name, prediction, confidence): writer.writeheader() writer.writerow(data) - for i, det in enumerate(pred): + for i, det in enumerate(pred): seen += 1 - if webcam: + if webcam: p, im0, frame = path[i], im0s[i].copy(), dataset.count s += f"{i}: " else: p, im0, frame = path, im0s.copy(), getattr(dataset, "frame", 0) - p = Path(p) - save_path = str(save_dir / p.name) - txt_path = str(save_dir / "labels" / p.stem) + ("" if dataset.mode == "image" else f"_{frame}") - s += "%gx%g " % im.shape[2:] - gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] - imc = im0.copy() if save_crop else im0 + p = Path(p) + save_path = str(save_dir / p.name) + txt_path = str(save_dir / "labels" / p.stem) + ("" if dataset.mode == "image" else f"_{frame}") + s += "%gx%g " % im.shape[2:] + gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] + imc = im0.copy() if save_crop else im0 annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): - det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round() - for c in det[:, 5].unique(): - n = (det[:, 5] == c).sum() - s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " + n = (det[:, 5] == c).sum() + s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " - for *xyxy, conf, cls in reversed(det): - c = int(cls) + c = int(cls) label = names[c] if hide_conf else f"{names[c]}" confidence = float(conf) confidence_str = f"{confidence:.2f}" @@ -250,44 +239,43 @@ def write_to_csv(image_name, prediction, confidence): if save_csv: write_to_csv(p.name, label, confidence_str) - if save_txt: - xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() - line = (cls, *xywh, conf) if save_conf else (cls, *xywh) + if save_txt: + xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() + line = (cls, *xywh, conf) if save_conf else (cls, *xywh) with open(f"{txt_path}.txt", "a") as f: f.write(("%g " * len(line)).rstrip() % line + "\n") - if save_img or save_crop or view_img: - c = int(cls) + if save_img or save_crop or view_img: + c = int(cls) label = None if hide_labels else (names[c] if hide_conf else f"{names[c]} {conf:.2f}") annotator.box_label(xyxy, label, color=colors(c, True)) if save_crop: save_one_box(xyxy, imc, file=save_dir / "crops" / names[c] / f"{p.stem}.jpg", BGR=True) - im0 = annotator.result() if view_img: if platform.system() == "Linux" and p not in windows: windows.append(p) - cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) + cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0]) cv2.imshow(str(p), im0) - cv2.waitKey(1) + cv2.waitKey(1) if save_img: if dataset.mode == "image": cv2.imwrite(save_path, im0) - else: - if vid_path[i] != save_path: + else: + if vid_path[i] != save_path: vid_path[i] = save_path if isinstance(vid_writer[i], cv2.VideoWriter): - vid_writer[i].release() - if vid_cap: + vid_writer[i].release() + if vid_cap: fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - else: + else: fps, w, h = 30, im0.shape[1], im0.shape[0] - save_path = str(Path(save_path).with_suffix(".mp4")) + save_path = str(Path(save_path).with_suffix(".mp4")) vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)) vid_writer[i].write(im0) @@ -299,7 +287,7 @@ def write_to_csv(image_name, prediction, confidence): s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else "" LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") if update: - strip_optimizer(weights[0]) + strip_optimizer(weights[0]) def parse_opt(): @@ -334,7 +322,7 @@ def parse_opt(): parser.add_argument("--dnn", action="store_true", help="use OpenCV DNN for ONNX inference") parser.add_argument("--vid-stride", type=int, default=1, help="video frame-rate stride") opt = parser.parse_args() - opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 + opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 print_args(vars(opt)) return opt From 3be19499450bd64dfd03a6a176ba776f1d4744a9 Mon Sep 17 00:00:00 2001 From: UltralyticsAssistant Date: Mon, 19 Aug 2024 21:03:43 +0000 Subject: [PATCH 6/6] Auto-format by https://ultralytics.com/actions --- Untitled1.ipynb | 686 ++++++++++++++++++++++++------------------------ 1 file changed, 348 insertions(+), 338 deletions(-) diff --git a/Untitled1.ipynb b/Untitled1.ipynb index 9ea3d6f067f6..eb1d67797ce7 100644 --- a/Untitled1.ipynb +++ b/Untitled1.ipynb @@ -1,355 +1,365 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "view-in-github" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8pmMHhGEOE2B" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { "colab": { - "provenance": [], - "authorship_tag": "ABX9TyNnTQLE9Hln//EdaNSeva+y", - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" + "base_uri": "https://localhost:8080/" }, - "language_info": { - "name": "python" + "id": "NBJOrKoMOGT5", + "outputId": "f1b29278-57c4-4886-93a5-ecf01612288e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mounted at /content/drive\n" + ] } + ], + "source": [ + "from google.colab import drive\n", + "\n", + "drive.mount(\"/content/drive\")" + ] }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "8pmMHhGEOE2B" - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "source": [ - "from google.colab import drive\n", - "drive.mount('/content/drive')" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "NBJOrKoMOGT5", - "outputId": "f1b29278-57c4-4886-93a5-ecf01612288e" - }, - "execution_count": 1, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Mounted at /content/drive\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "pip install gtts" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "collapsed": true, - "id": "BeEnl52JOKNY", - "outputId": "7b73f30b-f650-45ef-af5f-4b5f7c85eff3" - }, - "execution_count": 2, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Collecting gtts\n", - " Downloading gTTS-2.5.1-py3-none-any.whl (29 kB)\n", - "Requirement already satisfied: requests<3,>=2.27 in /usr/local/lib/python3.10/dist-packages (from gtts) (2.31.0)\n", - "Requirement already satisfied: click<8.2,>=7.1 in /usr/local/lib/python3.10/dist-packages (from gtts) (8.1.7)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gtts) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gtts) (3.7)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gtts) (2.0.7)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gtts) (2024.2.2)\n", - "Installing collected packages: gtts\n", - "Successfully installed gtts-2.5.1\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "from gtts import gTTS\n", - "\n", - "def speak(text):\n", - "\ttts = gTTS(text=text, lang='ko')\n", - "\ttts.save('voice.mp3')\n", - "\n", - "speak(\"안녕하세요, 저는 IML이에요.\")" - ], - "metadata": { - "id": "VnDDOpZjOYGe" - }, - "execution_count": 4, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "tts = gTTS(text=text, lang='ko')\n", - "mp3_fp = BytesIO()\n", - "tts.write_to_fp(mp3_fp)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 184 - }, - "id": "jLuBkQBZO5cv", - "outputId": "3de9b61b-8168-4272-c409-5d5b6eca5c7d" - }, - "execution_count": 5, - "outputs": [ - { - "output_type": "error", - "ename": "NameError", - "evalue": "name 'text' is not defined", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgTTS\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlang\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'ko'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mmp3_fp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBytesIO\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mtts\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite_to_fp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmp3_fp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'text' is not defined" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "%cd /content/drive/MyDrive/yolov5" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "BJw9zD_OUeYI", - "outputId": "f8e4baf3-9809-4504-d5db-2f0a487910d6" - }, - "execution_count": 16, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "/content/drive/MyDrive/yolov5\n" - ] - } - ] + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "collapsed": true, + "id": "BeEnl52JOKNY", + "outputId": "7b73f30b-f650-45ef-af5f-4b5f7c85eff3" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "!python detect.py --weights /content/drive/MyDrive/yolov5/runs/train/Project_yolo5/weights/best.pt --conf 0.5 --source /content/drive/MyDrive/13.jpg\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "44W31K33V2nK", - "outputId": "c38dd175-49ca-4622-b830-7603bd7a9b38" - }, - "execution_count": 20, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Using cache found in /root/.cache/torch/hub/ultralytics_yolov5_master\n", - "YOLOv5 🚀 2024-5-28 Python-3.10.12 torch-2.3.0+cu121 CPU\n", - "\n", - "Fusing layers... \n", - "YOLOv5s summary: 157 layers, 7037095 parameters, 0 gradients, 15.8 GFLOPs\n", - "Adding AutoShape... \n", - "Detected Classes:\n", - "Hardhat\n", - "NO-Mask\n", - "NO-Mask\n", - "Hardhat\n", - "Safety Vest\n", - "sh: 1: mpg321: not found\n", - "\u001b[34m\u001b[1mdetect: \u001b[0mweights=['/content/drive/MyDrive/yolov5/runs/train/Project_yolo5/weights/best.pt'], source=/content/drive/MyDrive/13.jpg, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.5, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_csv=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1\n", - "YOLOv5 🚀 2024-5-28 Python-3.10.12 torch-2.3.0+cu121 CPU\n", - "\n", - "Fusing layers... \n", - "YOLOv5s summary: 157 layers, 7037095 parameters, 0 gradients, 15.8 GFLOPs\n", - "image 1/1 /content/drive/MyDrive/13.jpg: 608x640 2 Hardhats, 2 NO-Masks, 1 Safety Vest, 674.3ms\n", - "Speed: 9.7ms pre-process, 674.3ms inference, 1.1ms NMS per image at shape (1, 3, 640, 640)\n", - "Results saved to \u001b[1mruns/detect/exp3\u001b[0m\n" - ] - } - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting gtts\n", + " Downloading gTTS-2.5.1-py3-none-any.whl (29 kB)\n", + "Requirement already satisfied: requests<3,>=2.27 in /usr/local/lib/python3.10/dist-packages (from gtts) (2.31.0)\n", + "Requirement already satisfied: click<8.2,>=7.1 in /usr/local/lib/python3.10/dist-packages (from gtts) (8.1.7)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gtts) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gtts) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gtts) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gtts) (2024.2.2)\n", + "Installing collected packages: gtts\n", + "Successfully installed gtts-2.5.1\n" + ] + } + ], + "source": [ + "pip install gtts" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "VnDDOpZjOYGe" + }, + "outputs": [], + "source": [ + "from gtts import gTTS\n", + "\n", + "\n", + "def speak(text):\n", + " tts = gTTS(text=text, lang=\"ko\")\n", + " tts.save(\"voice.mp3\")\n", + "\n", + "\n", + "speak(\"안녕하세요, 저는 IML이에요.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 184 }, + "id": "jLuBkQBZO5cv", + "outputId": "3de9b61b-8168-4272-c409-5d5b6eca5c7d" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "import pyaudio\n", - "import wave\n", - "import speech_recognition as sr\n", - "\n", - "def audio_save():\n", - "\n", - " FORMAT = pyaudio.paInt16 # 16비트 형식으로 설정\n", - " CHANNELS = 1\n", - " RATE = 44100\n", - " CHUNK = 1024\n", - " RECORD_SECONDS = 10\n", - " OUTPUT_FILENAME = \"recorded_audio.wav\" # 저장할 파일 이름\n", - "\n", - "\n", - " audio = pyaudio.PyAudio()\n", - "\n", - " stream = audio.open(format=FORMAT, channels=CHANNELS,\n", - " rate=RATE, input=True,\n", - " frames_per_buffer=CHUNK)\n", - "\n", - " print(\"녹음 시작...\")\n", - "\n", - " frames = []\n", - " for _ in range(0, int(RATE / CHUNK * RECORD_SECONDS)):\n", - " data = stream.read(CHUNK)\n", - " frames.append(data)\n", - "\n", - " print(\"녹음 완료.\")\n", - "\n", - "\n", - " stream.stop_stream()\n", - " stream.close()\n", - " audio.terminate()\n", - "\n", - " # 음성 파일로 저장\n", - " with wave.open(OUTPUT_FILENAME, 'wb') as wf:\n", - " wf.setnchannels(CHANNELS)\n", - " wf.setsampwidth(audio.get_sample_size(FORMAT))\n", - " wf.setframerate(RATE)\n", - " wf.writeframes(b''.join(frames))\n", - "\n", - " return print(\"파일 저장 완료:\", OUTPUT_FILENAME)\n", - "\n", - "def tts_module():\n", - "\n", - " r = sr.Recognizer()\n", - " kr_audio = sr.AudioFile('recorded_audio.wav')\n", - "\n", - " with kr_audio as source:\n", - " audio = r.record(source)\n", - "\n", - " a=r.recognize_google(audio, language='ko-KR')\n", - " print(r.recognize_google(audio, language='ko-KR'))\n", - "\n", - "\n", - "\n", - " return a\n", - "\n", - "if __name__ == '__main__':\n", - " audio_save()\n", - " sentiment_data=tts_module()\n", - " print(\"a\",sentiment_data)" - ], - "metadata": { - "id": "-uVK5NOKaiF6" - }, - "execution_count": null, - "outputs": [] + "ename": "NameError", + "evalue": "name 'text' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgTTS\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlang\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'ko'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mmp3_fp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBytesIO\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mtts\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite_to_fp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmp3_fp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'text' is not defined" + ] + } + ], + "source": [ + "tts = gTTS(text=text, lang=\"ko\")\n", + "mp3_fp = BytesIO()\n", + "tts.write_to_fp(mp3_fp)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "BJw9zD_OUeYI", + "outputId": "f8e4baf3-9809-4504-d5db-2f0a487910d6" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "sentiment_data=='내 눈 앞에 뭐가 있어'" - ], - "metadata": { - "id": "P9WufmsAbG02" - }, - "execution_count": null, - "outputs": [] + "name": "stdout", + "output_type": "stream", + "text": [ + "/content/drive/MyDrive/yolov5\n" + ] + } + ], + "source": [ + "%cd /content/drive/MyDrive/yolov5" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "44W31K33V2nK", + "outputId": "c38dd175-49ca-4622-b830-7603bd7a9b38" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "import speech_recognition as sr\n", - "import subprocess\n", - "\n", - "def tts_module(audio_file):\n", - " r = sr.Recognizer()\n", - "\n", - " with sr.AudioFile(audio_file) as source:\n", - " audio = r.record(source)\n", - "\n", - " try:\n", - " recognized_text = r.recognize_google(audio, language='ko-KR')\n", - " print(\"인식된 텍스트:\", recognized_text)\n", - " return recognized_text\n", - " except sr.UnknownValueError:\n", - " print(\"Google Speech Recognition이 음성을 인식할 수 없습니다.\")\n", - " return \"\"\n", - " except sr.RequestError as e:\n", - " print(f\"Google Speech Recognition 서비스에 요청할 수 없습니다; {e}\")\n", - " return \"\"\n", - "\n", - "if __name__ == '__main__':\n", - " audio_file = '/content/drive/MyDrive/recorded_audio (1).wav' # 이미 녹음된 음성 파일 경로\n", - " recognized_text = tts_module(audio_file)\n", - "\n", - " if recognized_text == \"내 앞에 뭐 있어\":\n", - " print(\"명령어를 인식했습니다. detect.py를 실행합니다.\")\n", - " subprocess.run([\"python\", \"/content/drive/MyDrive/yolov5/detect.py\", \"--weights\", \"/content/drive/MyDrive/yolov5/runs/train/Project_yolo5/weights/best.pt\", \"--conf\", \"0.5\", \"--source\",\"/content/drive/MyDrive/33.jpg\"\n", - "])\n", - "\n", - " else:\n", - " print(\"인식된 명령어가 없습니다.\")" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "KXSMitA_xiJE", - "outputId": "e667394c-2649-446c-c41e-22b915a7bec1" - }, - "execution_count": 101, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "인식된 텍스트: 내 앞에 뭐 있어\n", - "명령어를 인식했습니다. detect.py를 실행합니다.\n" - ] - } - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Using cache found in /root/.cache/torch/hub/ultralytics_yolov5_master\n", + "YOLOv5 🚀 2024-5-28 Python-3.10.12 torch-2.3.0+cu121 CPU\n", + "\n", + "Fusing layers... \n", + "YOLOv5s summary: 157 layers, 7037095 parameters, 0 gradients, 15.8 GFLOPs\n", + "Adding AutoShape... \n", + "Detected Classes:\n", + "Hardhat\n", + "NO-Mask\n", + "NO-Mask\n", + "Hardhat\n", + "Safety Vest\n", + "sh: 1: mpg321: not found\n", + "\u001b[34m\u001b[1mdetect: \u001b[0mweights=['/content/drive/MyDrive/yolov5/runs/train/Project_yolo5/weights/best.pt'], source=/content/drive/MyDrive/13.jpg, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.5, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_csv=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1\n", + "YOLOv5 🚀 2024-5-28 Python-3.10.12 torch-2.3.0+cu121 CPU\n", + "\n", + "Fusing layers... \n", + "YOLOv5s summary: 157 layers, 7037095 parameters, 0 gradients, 15.8 GFLOPs\n", + "image 1/1 /content/drive/MyDrive/13.jpg: 608x640 2 Hardhats, 2 NO-Masks, 1 Safety Vest, 674.3ms\n", + "Speed: 9.7ms pre-process, 674.3ms inference, 1.1ms NMS per image at shape (1, 3, 640, 640)\n", + "Results saved to \u001b[1mruns/detect/exp3\u001b[0m\n" + ] + } + ], + "source": [ + "!python detect.py --weights /content/drive/MyDrive/yolov5/runs/train/Project_yolo5/weights/best.pt --conf 0.5 --source /content/drive/MyDrive/13.jpg" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-uVK5NOKaiF6" + }, + "outputs": [], + "source": [ + "import wave\n", + "\n", + "import pyaudio\n", + "import speech_recognition as sr\n", + "\n", + "\n", + "def audio_save():\n", + " FORMAT = pyaudio.paInt16 # 16비트 형식으로 설정\n", + " CHANNELS = 1\n", + " RATE = 44100\n", + " CHUNK = 1024\n", + " RECORD_SECONDS = 10\n", + " OUTPUT_FILENAME = \"recorded_audio.wav\" # 저장할 파일 이름\n", + "\n", + " audio = pyaudio.PyAudio()\n", + "\n", + " stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)\n", + "\n", + " print(\"녹음 시작...\")\n", + "\n", + " frames = []\n", + " for _ in range(0, int(RATE / CHUNK * RECORD_SECONDS)):\n", + " data = stream.read(CHUNK)\n", + " frames.append(data)\n", + "\n", + " print(\"녹음 완료.\")\n", + "\n", + " stream.stop_stream()\n", + " stream.close()\n", + " audio.terminate()\n", + "\n", + " # 음성 파일로 저장\n", + " with wave.open(OUTPUT_FILENAME, \"wb\") as wf:\n", + " wf.setnchannels(CHANNELS)\n", + " wf.setsampwidth(audio.get_sample_size(FORMAT))\n", + " wf.setframerate(RATE)\n", + " wf.writeframes(b\"\".join(frames))\n", + "\n", + " return print(\"파일 저장 완료:\", OUTPUT_FILENAME)\n", + "\n", + "\n", + "def tts_module():\n", + " r = sr.Recognizer()\n", + " kr_audio = sr.AudioFile(\"recorded_audio.wav\")\n", + "\n", + " with kr_audio as source:\n", + " audio = r.record(source)\n", + "\n", + " a = r.recognize_google(audio, language=\"ko-KR\")\n", + " print(r.recognize_google(audio, language=\"ko-KR\"))\n", + "\n", + " return a\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " audio_save()\n", + " sentiment_data = tts_module()\n", + " print(\"a\", sentiment_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "P9WufmsAbG02" + }, + "outputs": [], + "source": [ + "sentiment_data == \"내 눈 앞에 뭐가 있어\"" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "KXSMitA_xiJE", + "outputId": "e667394c-2649-446c-c41e-22b915a7bec1" + }, + "outputs": [ { - "cell_type": "code", - "source": [], - "metadata": { - "id": "K1iWPfmVx1Wp" - }, - "execution_count": null, - "outputs": [] + "name": "stdout", + "output_type": "stream", + "text": [ + "인식된 텍스트: 내 앞에 뭐 있어\n", + "명령어를 인식했습니다. detect.py를 실행합니다.\n" + ] } - ] + ], + "source": [ + "import subprocess\n", + "\n", + "\n", + "def tts_module(audio_file):\n", + " r = sr.Recognizer()\n", + "\n", + " with sr.AudioFile(audio_file) as source:\n", + " audio = r.record(source)\n", + "\n", + " try:\n", + " recognized_text = r.recognize_google(audio, language=\"ko-KR\")\n", + " print(\"인식된 텍스트:\", recognized_text)\n", + " return recognized_text\n", + " except sr.UnknownValueError:\n", + " print(\"Google Speech Recognition이 음성을 인식할 수 없습니다.\")\n", + " return \"\"\n", + " except sr.RequestError as e:\n", + " print(f\"Google Speech Recognition 서비스에 요청할 수 없습니다; {e}\")\n", + " return \"\"\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " audio_file = \"/content/drive/MyDrive/recorded_audio (1).wav\" # 이미 녹음된 음성 파일 경로\n", + " recognized_text = tts_module(audio_file)\n", + "\n", + " if recognized_text == \"내 앞에 뭐 있어\":\n", + " print(\"명령어를 인식했습니다. detect.py를 실행합니다.\")\n", + " subprocess.run(\n", + " [\n", + " \"python\",\n", + " \"/content/drive/MyDrive/yolov5/detect.py\",\n", + " \"--weights\",\n", + " \"/content/drive/MyDrive/yolov5/runs/train/Project_yolo5/weights/best.pt\",\n", + " \"--conf\",\n", + " \"0.5\",\n", + " \"--source\",\n", + " \"/content/drive/MyDrive/33.jpg\",\n", + " ]\n", + " )\n", + "\n", + " else:\n", + " print(\"인식된 명령어가 없습니다.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "K1iWPfmVx1Wp" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyNnTQLE9Hln//EdaNSeva+y", + "include_colab_link": true, + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } \ No newline at end of file