

{"id":25463,"date":"2018-08-22T04:24:40","date_gmt":"2018-08-22T04:24:40","guid":{"rendered":"https:\/\/data-flair.training\/blogs\/?p=25463"},"modified":"2025-07-25T20:56:55","modified_gmt":"2025-07-25T15:26:55","slug":"python-speech-recognition-ai","status":"publish","type":"post","link":"https:\/\/data-flair.training\/blogs\/python-speech-recognition-ai\/","title":{"rendered":"Python Speech Recognition &#8211; Artificial Intelligence"},"content":{"rendered":"<div class='__iawmlf-post-loop-links' style='display:none;' data-iawmlf-post-links='[{&quot;id&quot;:1802,&quot;href&quot;:&quot;http:\\\/\\\/www.voiptroubleshooter.com\\\/open_speech\\\/american.html&quot;,&quot;archived_href&quot;:&quot;http:\\\/\\\/web-wp.archive.org\\\/web\\\/20250704091421\\\/http:\\\/\\\/www.voiptroubleshooter.com\\\/open_speech\\\/american.html&quot;,&quot;redirect_href&quot;:&quot;&quot;,&quot;checks&quot;:[{&quot;date&quot;:&quot;2025-12-10 02:22:09&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-15 00:03:12&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-18 00:35:40&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-21 16:48:31&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-28 05:01:22&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-31 06:11:52&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-03 17:31:50&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-06 23:04:43&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-15 06:44:51&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-21 12:42:20&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-26 14:32:40&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-31 13:02:26&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-08 10:26:10&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-11 10:29:01&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-14 15:07:33&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-19 09:06:32&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-23 13:05:31&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-26 15:06:24&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-02 17:50:43&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-11 11:14:42&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-16 06:44:38&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-19 08:40:04&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-22 18:01:05&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-04 13:25:03&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-10 01:03:13&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-14 08:30:47&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-17 12:37:56&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-20 15:39:09&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-23 21:02:39&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-28 03:31:24&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-05 22:22:03&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-10 02:44:23&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-13 15:12:07&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-19 05:35:07&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-22 16:35:40&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-28 04:53:13&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-06-04 02:45:51&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-06-09 17:04:48&quot;,&quot;http_code&quot;:206}],&quot;broken&quot;:false,&quot;last_checked&quot;:{&quot;date&quot;:&quot;2026-06-09 17:04:48&quot;,&quot;http_code&quot;:206},&quot;process&quot;:&quot;done&quot;},{&quot;id&quot;:1803,&quot;href&quot;:&quot;https:\\\/\\\/www.lfd.uci.edu\\\/~gohlke\\\/pythonlibs\\\/#pyaudio&quot;,&quot;archived_href&quot;:&quot;http:\\\/\\\/web-wp.archive.org\\\/web\\\/20240404002728\\\/https:\\\/\\\/www.lfd.uci.edu\\\/~gohlke\\\/pythonlibs\\\/&quot;,&quot;redirect_href&quot;:&quot;&quot;,&quot;checks&quot;:[{&quot;date&quot;:&quot;2025-12-10 02:22:12&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-15 00:03:14&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-18 00:37:45&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-22 17:01:02&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-28 05:01:24&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-01 16:47:10&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-05 15:45:29&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-08 16:13:27&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-15 06:45:27&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-21 12:42:36&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-26 14:32:54&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-02 00:05:33&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-08 10:26:14&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-11 10:30:01&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-14 15:07:45&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-21 02:49:43&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-25 14:02:06&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-02 17:50:52&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-11 11:15:27&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-16 06:44:47&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-19 08:40:05&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-24 08:46:05&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-04 13:25:05&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-15 12:44:00&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-18 17:37:25&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-22 17:03:16&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-28 03:31:34&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-05 22:22:23&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-10 02:44:24&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-13 15:12:10&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-19 05:35:19&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-22 16:36:23&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-28 04:53:29&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-06-04 02:47:06&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-06-09 17:04:57&quot;,&quot;http_code&quot;:206}],&quot;broken&quot;:false,&quot;last_checked&quot;:{&quot;date&quot;:&quot;2026-06-09 17:04:57&quot;,&quot;http_code&quot;:206},&quot;process&quot;:&quot;done&quot;}]'><\/div>\n<p><span style=\"font-weight: 400\">Welcome to our <strong>Python<\/strong> Speech Recognition Tutorial. In this <strong>tutorial of AI with Python<\/strong> Speech Recognition, we will learn to read an audio file with Python. We will make use of the speech recognition API to perform this task. Moreover, we will discuss reading a segment and dealing with noise.<\/span><\/p>\n<p>So, let&#8217;s start the Python Speech recognition Tutorial.<\/p>\n<h3>What is Python Speech Recognition?<\/h3>\n<p><span style=\"font-weight: 400\">From systems facilitating single speakers and limited vocabularies of around a dozen words, to systems that recognize from multiple speakers and possess huge vocabularies in various languages, we have come a long way. <\/span><\/p>\n<p><span style=\"font-weight: 400\">What we do here is- we convert speech from physical sound to electrical signals using a microphone. Then, we use an analogue-to-digital converter to convert this to digital data. <\/span><\/p>\n<p><span style=\"font-weight: 400\">Finally, we use multiple models to transcribe audio to text. In the <strong>Hidden Markov Model (HMM)<\/strong>, we divide the speech signal into 10-millisecond fragments.<\/span><\/p>\n<h4>a. Available APIs in Python Speech Recognition<\/h4>\n<p><span style=\"font-weight: 400\">With Python, we have several APIs available:<\/span><\/p>\n<ul>\n<li style=\"font-weight: 400\"><span style=\"font-weight: 400\">apiai<\/span><\/li>\n<li style=\"font-weight: 400\"><span style=\"font-weight: 400\">assemblyai<\/span><\/li>\n<li style=\"font-weight: 400\"><span style=\"font-weight: 400\">google-cloud-speech<\/span><\/li>\n<li style=\"font-weight: 400\"><span style=\"font-weight: 400\">pocketsphinx<\/span><\/li>\n<li style=\"font-weight: 400\"><span style=\"font-weight: 400\">SpeechRecognition<\/span><\/li>\n<li style=\"font-weight: 400\"><span style=\"font-weight: 400\">watson-developer-cloud<\/span><\/li>\n<li style=\"font-weight: 400\"><span style=\"font-weight: 400\">wit<\/span><\/li>\n<\/ul>\n<p><span style=\"font-weight: 400\">Some <strong>Python packages<\/strong> like wit and apiai offer more than just basic speech recognition. Here, though, we will demonstrate SpeechRecognition, which is easier to use. This hard-codes a default API key for the Google Web Speech API.<\/span><\/p>\n<h4>b. Supported File Types in Python Speech Recognition<\/h4>\n<ul>\n<li style=\"font-weight: 400\"><span style=\"font-weight: 400\">WAV- PCM\/LPCM format<\/span><\/li>\n<li style=\"font-weight: 400\"><span style=\"font-weight: 400\">AIFF<\/span><\/li>\n<li style=\"font-weight: 400\"><span style=\"font-weight: 400\">AIFF-C<\/span><\/li>\n<li style=\"font-weight: 400\"><span style=\"font-weight: 400\">FLAC<\/span><\/li>\n<\/ul>\n<h4>c. Prerequisites for Python Speech Recognition<\/h4>\n<p><span style=\"font-weight: 400\">You can use pip to install this-<\/span><\/p>\n<p><strong>pip install SpeechRecognition<\/strong><\/p>\n<p><span style=\"font-weight: 400\">To test the installation, you can import this in the interpreter and check the version-<\/span><\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; import speech_recognition as sr\r\n&gt;&gt;&gt; sr.__version__<\/pre>\n<p><strong>&#8216;3.8.1&#8217;<\/strong><\/p>\n<p><span style=\"font-weight: 400\">We also download a sample audio from here-<\/span><\/p>\n<p><strong><a href=\"http:\/\/www.voiptroubleshooter.com\/open_speech\/american.html\">http:\/\/www.voiptroubleshooter.com\/open_speech\/american.html<\/a><\/strong><\/p>\n<h3>Reading an Audio File in Python<\/h3>\n<h4>a. The Recognizer class<\/h4>\n<p><span style=\"font-weight: 400\">First, we make an instance of the Recognizer class.<\/span><\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; r=sr.Recognizer()<\/pre>\n<p>With Recognizer, we have a method for each API-<\/p>\n<ul>\n<li><strong>recognize_bing()-<\/strong> Microsoft Bing Speech<\/li>\n<li style=\"font-weight: 400\"><span style=\"font-weight: 400\"><strong>recognize_google()-<\/strong> Google Web Speech API<\/span><\/li>\n<li style=\"font-weight: 400\"><span style=\"font-weight: 400\"><strong>recognize_google_cloud()-<\/strong> Google Cloud Speech<\/span><\/li>\n<li style=\"font-weight: 400\"><span style=\"font-weight: 400\"><strong>recognize_houndify()-<\/strong> Houndify<\/span><\/li>\n<li style=\"font-weight: 400\"><span style=\"font-weight: 400\"><strong>recognize_ibm()-<\/strong> IBM Speech to Text<\/span><\/li>\n<li style=\"font-weight: 400\"><span style=\"font-weight: 400\"><strong>recognize_sphinx-<\/strong> CMU Sphinx<\/span><\/li>\n<li style=\"font-weight: 400\"><span style=\"font-weight: 400\"><strong>recognize_wit()-<\/strong> Wit.ai<\/span><\/li>\n<\/ul>\n<p><span style=\"font-weight: 400\">Exempting recognize_sphinx(), you need an Internet connection for anything else you\u2019re working with.<\/span><\/p>\n<h4>b. Capturing data with record()<\/h4>\n<p><span style=\"font-weight: 400\">We can have the context manager open the file and read its contents, then record it into an AudioData instance.<\/span><\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; demo=sr.AudioFile('demo.wav')\r\n&gt;&gt;&gt; with demo as source:\r\n       audio=r.record(source)<\/pre>\n<p>To confirm this, try:<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; type(audio)<\/pre>\n<p><strong>&lt;class &#8216;speech_recognition.AudioData&#8217;&gt;<\/strong><\/p>\n<h4>c. Recognizing Speech in the Audio<\/h4>\n<p><span style=\"font-weight: 400\">Finally, you can call recognize_google() to perform the transcription.<\/span><\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; r.recognize_google(audio)<\/pre>\n<p><strong>&#8220;The Purge can use within The Smurfs the sheet without playback Mount delivery date habitat of a Vow these days it&#8217;s okay microwave devices are installed in Windows to use of lemons next find the password on the site that the houses such hard core in a garbage for the study core exercises talking is hard disk&#8221;<\/strong><\/p>\n<p><span style=\"font-weight: 400\">Well, you can read audio of a different language using the <\/span><i><span style=\"font-weight: 400\">language<\/span><\/i><span style=\"font-weight: 400\"> parameter-<\/span><\/p>\n<pre class=\"EnlighterJSRAW\">r.recognize_google(audio,language='ro-RO') #for Romanian<\/pre>\n<h3>Reading a Segment of Audio<\/h3>\n<p><span style=\"font-weight: 400\">When you only want to read a part of your audio file, you can use the arguments <\/span><i><span style=\"font-weight: 400\">offset<\/span><\/i><span style=\"font-weight: 400\">&#8211; telling it where to begin (in seconds), and <\/span><i><span style=\"font-weight: 400\">duration<\/span><\/i><span style=\"font-weight: 400\">&#8211; telling it how long to listen.<\/span><\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; with demo as source:\r\n       audio=r.record(source,offset=4,duration=3)\r\n&gt;&gt;&gt; r.recognize_google(audio)<\/pre>\n<p><strong>&#8216;clear the sheet without me back&#8217;<\/strong><\/p>\n<p><span style=\"font-weight: 400\">Note that this caused issues at the extremes. It heard \u2018murfs\u2019, which it translated to \u2018clear\u2019. It also heard \u2018me back\u2019 instead of \u2018playback\u2019 because of the noise in the audio.<\/span><\/p>\n<p><span style=\"font-weight: 400\">If we set the offset to 3.3,<\/span><\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; with demo as source:\r\n       audio=r.record(source,offset=3.3,duration=3)\r\n&gt;&gt;&gt; r.recognize_google(audio)<\/pre>\n<p><strong>&#8216;clear the sheet with Ok&#8217;<\/strong><\/p>\n<p><span style=\"font-weight: 400\">But check what happens when we set the offset to 2.5-<\/span><\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; with demo as source:\r\n       audio=r.record(source,offset=2.5,duration=3)\r\n&gt;&gt;&gt; r.recognize_google(audio)<\/pre>\n<p><strong>&#8216;National thanks&#8217;<\/strong><\/p>\n<h3>Python Speech Recognition &#8211; Dealing with Noise<\/h3>\n<p><span style=\"font-weight: 400\">Okay, let\u2019s face it. There will always be noise, no matter how professional appliances you use to record your audio. So let\u2019s better learn to deal with it. <\/span><\/p>\n<p><span style=\"font-weight: 400\">The method adjust_for_ambient_noise() reads the first second of a file stream to calibrate the recognizer to the audio\u2019s noise level. This often consumes that part of the audio, and it doesn\u2019t make it to the transcription.<\/span><\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; with demo as source:\r\n       r.adjust_for_ambient_noise(source)\r\n       audio=r.record(source,offset=2.5,duration=3)\r\n&gt;&gt;&gt; r.recognize_google(audio)<\/pre>\n<p><strong>&#8216;clear the sheet&#8217;<\/strong><br \/>\n<span style=\"font-weight: 400\">We can provide this an argument for how long it should listen for noise so it can calibrate the recognizer. Let\u2019s see how it produces two entirely different outputs for a difference as low as 0.005-<\/span><\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; with demo as source:\r\n       r.adjust_for_ambient_noise(source,duration=0.51)\r\n       audio=r.record(source,offset=2.5,duration=3)\r\n&gt;&gt;&gt; r.recognize_google(audio)<\/pre>\n<p><strong>&#8216;National thanks&#8217;<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; with demo as source:\r\n       r.adjust_for_ambient_noise(source,duration=0.515)\r\n       audio=r.record(source,offset=2.5,duration=3)\r\n&gt;&gt;&gt; r.recognize_google(audio)<\/pre>\n<p><strong>&#8216;clear the sheet&#8217;<\/strong><\/p>\n<p><span style=\"font-weight: 400\">As you can see, adjust_for_ambient_noise() is definitely not a miracle worker. To get around this, you can use an audio-editing software like Audacity to preprocess the audio.<\/span><\/p>\n<h3>Working With Microphones<\/h3>\n<p><span style=\"font-weight: 400\">To be able to work with your own voice with speech recognition, you need the PyAudio package. You can install it with pip-<\/span><\/p>\n<p><strong>pip install PyAudio<\/strong><\/p>\n<p><span style=\"font-weight: 400\">Or you can download and install the binaries with pip. Download link-<\/span><\/p>\n<p><strong><a href=\"https:\/\/www.lfd.uci.edu\/~gohlke\/pythonlibs\/#pyaudio\">https:\/\/www.lfd.uci.edu\/~gohlke\/pythonlibs\/#pyaudio<\/a><\/strong><\/p>\n<p><span style=\"font-weight: 400\">Then:<\/span><\/p>\n<p><strong>pip install [file_name_for_binary]<\/strong><\/p>\n<p><span style=\"font-weight: 400\">For example:<\/span><\/p>\n<p><strong>pip install PyAudio-0.2.11-cp37-cp37m-win32.whl<\/strong><\/p>\n<h4>a. The Microphone class<\/h4>\n<p><span style=\"font-weight: 400\">Like Recognizer for audio files, we will need Microphone for real-time speech data. Since we installed new packages, let\u2019s exit our interpreter and open another session.<\/span><\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; import speech_recognition as sr\r\n&gt;&gt;&gt; r=sr.Recognizer()<\/pre>\n<p>Now, let\u2019s create an instance of Microphone.<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; mic=sr.Microphone()<\/pre>\n<p>Microphone has a static method to list out all microphones available-<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; sr.Microphone.list_microphone_names()<\/pre>\n<p><strong>[&#8216;Microsoft Sound Mapper &#8211; Input&#8217;, &#8216;Microphone (Realtek High Defini&#8217;, &#8216;Microsoft Sound Mapper &#8211; Output&#8217;, &#8216;Speakers (Realtek High Definiti&#8217;, &#8216;Primary Sound Capture Driver&#8217;, &#8216;Microphone (Realtek High Definition Audio)&#8217;, &#8216;Primary Sound Driver&#8217;, &#8216;Speakers (Realtek High Definition Audio)&#8217;, &#8216;Speakers (Realtek High Definition Audio)&#8217;, &#8216;Microphone (Realtek High Definition Audio)&#8217;, &#8216;Speakers (Realtek HD Audio output)&#8217;, &#8216;Line In (Realtek HD Audio <\/strong><strong>Line input)&#8217;, &#8216;Microphone (Realtek HD Audio Mic input)&#8217;, &#8216;Stereo Mix (Realtek HD Audio Stereo input)&#8217;]<\/strong><\/p>\n<p><span style=\"font-weight: 400\">Now it is possible to select a certain microphone by its device index with likes of the following piece of code-<\/span><\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; mic=sr.Microphone(device_index=3)<\/pre>\n<p><span style=\"font-weight: 400\">But let\u2019s stick with the default for now.<\/span><\/p>\n<h4>b. Capturing Microphone Input<\/h4>\n<p><span style=\"font-weight: 400\">With the context manager, we capture input using the listen() method.<\/span><\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; with mic as source:\r\n          audio=r.listen(source)<\/pre>\n<p><span style=\"font-weight: 400\">You shall now speak into your microphone. When it detects silence, it stops listening. It then displays the interpreter prompt (&gt;&gt;&gt;).<\/span><\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; r.recognize_google(audio)<\/pre>\n<p><strong>decease a test<\/strong><br \/>\n<span style=\"font-weight: 400\">You can call the adjust_for_ambient_noise() method with Microphone too.<\/span><\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; with mic as source:\r\n         r.adjust_for_ambient_noise(source)\r\n         audio=r.listen(source)\r\n&gt;&gt;&gt; r.recognize_google(audio)<\/pre>\n<p><strong>this is a test<\/strong><\/p>\n<h4>c. Unintelligible Speech<\/h4>\n<p><span style=\"font-weight: 400\">When Python cannot match some audio to text, it raises an UnknownValueError exception.<\/span><\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; r.recognize_google(audio)<\/pre>\n<p><strong>Traceback (most recent call last):<\/strong><br \/>\n<strong> \u00a0File &#8220;&lt;pyshell#7&gt;&#8221;, line 1, in &lt;module&gt;<\/strong><br \/>\n<strong> \u00a0\u00a0\u00a0r.recognize_google(audio)<\/strong><br \/>\n<strong> \u00a0File &#8220;C:\\Users\\Ram\\AppData\\Local\\Programs\\Python\\Python37-32\\lib\\site-packages\\speech_recognition\\__init__.py&#8221;, line 858, in recognize_google<\/strong><br \/>\n<strong> \u00a0\u00a0\u00a0if not isinstance(actual_result, dict) or len(actual_result.get(&#8220;alternative&#8221;, [])) == 0: raise UnknownValueError()<\/strong><br \/>\n<strong>speech_recognition.UnknownValueError<\/strong><\/p>\n<p><span style=\"font-weight: 400\">Some pieces of audio that would lead to this will be- coughing sounds, gagging sounds, hand claps, and tongue clicks.<\/span><\/p>\n<p>So, this was all in Python Speech Recognition. Hope you like our explanation.<\/p>\n<h3>Conclusion<\/h3>\n<p>Python can also help computers understand speech. With the help of libraries like SpeechRecognition, you can make Python listen to your voice and turn it into text. This is called speech-to-text. It\u2019s useful in making voice assistants, phone bots, and tools for people who cannot type.<\/p>\n<p>The SpeechRecognition library in Python is easy to use. It connects with microphones and audio files. It works well with Google Speech API and other services to turn spoken words into written text. Python can also be used to analyze the tone or speed of speech, which helps in understanding emotions or stress in voice.<\/p>\n<p>Speech recognition using Python is used in smart homes, mobile apps, and customer support. It helps machines become more interactive and human-like. With just a few lines of Python code, you can build apps that respond when someone speaks. This is a great step in building intelligent systems that can talk and listen.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>Welcome to our Python Speech Recognition Tutorial. In this tutorial of AI with Python Speech Recognition, we will learn to read an audio file with Python. We will make use of the speech recognition&#46;&#46;&#46;<\/p>\n","protected":false},"author":5,"featured_media":25487,"comment_status":"open","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[11,46],"tags":[5117,6006,10849,10921,10927,13189],"class_list":["post-25463","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-artificial-intelligence","category-python","tag-google-speech-recognition-python","tag-how-to-convert-speech-to-text-in-python","tag-python-speech-recognition","tag-python-voice-recognition","tag-python-with-ai","tag-speech-recognition"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v27.4 - https:\/\/yoast.com\/product\/yoast-seo-wordpress\/ -->\n<title>Python Speech Recognition - Artificial Intelligence - DataFlair<\/title>\n<meta name=\"description\" content=\"Python speech Recognition,how to convert speech to text in Python,google speech recognition Python, Working With Microphones,Reading a audio Segment\" \/>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/data-flair.training\/blogs\/python-speech-recognition-ai\/\" \/>\n<meta property=\"og:locale\" content=\"en_US\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"Python Speech Recognition - Artificial Intelligence - DataFlair\" \/>\n<meta property=\"og:description\" content=\"Python speech Recognition,how to convert speech to text in Python,google speech recognition Python, Working With Microphones,Reading a audio Segment\" \/>\n<meta property=\"og:url\" content=\"https:\/\/data-flair.training\/blogs\/python-speech-recognition-ai\/\" \/>\n<meta property=\"og:site_name\" content=\"DataFlair\" \/>\n<meta property=\"article:publisher\" content=\"https:\/\/www.facebook.com\/DataFlairWS\/\" \/>\n<meta property=\"article:published_time\" content=\"2018-08-22T04:24:40+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2025-07-25T15:26:55+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/08\/Speech-Recognition-with-Python-AI-01.jpg\" \/>\n\t<meta property=\"og:image:width\" content=\"1200\" \/>\n\t<meta property=\"og:image:height\" content=\"628\" \/>\n\t<meta property=\"og:image:type\" content=\"image\/jpeg\" \/>\n<meta name=\"author\" content=\"DataFlair Team\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:creator\" content=\"@DataFlairWS\" \/>\n<meta name=\"twitter:site\" content=\"@DataFlairWS\" \/>\n<meta name=\"twitter:label1\" content=\"Written by\" \/>\n\t<meta name=\"twitter:data1\" content=\"DataFlair Team\" \/>\n\t<meta name=\"twitter:label2\" content=\"Est. reading time\" \/>\n\t<meta name=\"twitter:data2\" content=\"6 minutes\" \/>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"Python Speech Recognition - Artificial Intelligence - DataFlair","description":"Python speech Recognition,how to convert speech to text in Python,google speech recognition Python, Working With Microphones,Reading a audio Segment","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/data-flair.training\/blogs\/python-speech-recognition-ai\/","og_locale":"en_US","og_type":"article","og_title":"Python Speech Recognition - Artificial Intelligence - DataFlair","og_description":"Python speech Recognition,how to convert speech to text in Python,google speech recognition Python, Working With Microphones,Reading a audio Segment","og_url":"https:\/\/data-flair.training\/blogs\/python-speech-recognition-ai\/","og_site_name":"DataFlair","article_publisher":"https:\/\/www.facebook.com\/DataFlairWS\/","article_published_time":"2018-08-22T04:24:40+00:00","article_modified_time":"2025-07-25T15:26:55+00:00","og_image":[{"width":1200,"height":628,"url":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/08\/Speech-Recognition-with-Python-AI-01.jpg","type":"image\/jpeg"}],"author":"DataFlair Team","twitter_card":"summary_large_image","twitter_creator":"@DataFlairWS","twitter_site":"@DataFlairWS","twitter_misc":{"Written by":"DataFlair Team","Est. reading time":"6 minutes"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"Article","@id":"https:\/\/data-flair.training\/blogs\/python-speech-recognition-ai\/#article","isPartOf":{"@id":"https:\/\/data-flair.training\/blogs\/python-speech-recognition-ai\/"},"author":{"name":"DataFlair Team","@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/person\/7f83c342f5d1632d6f7b4b0b0f447823"},"headline":"Python Speech Recognition &#8211; Artificial Intelligence","datePublished":"2018-08-22T04:24:40+00:00","dateModified":"2025-07-25T15:26:55+00:00","mainEntityOfPage":{"@id":"https:\/\/data-flair.training\/blogs\/python-speech-recognition-ai\/"},"wordCount":1267,"commentCount":6,"publisher":{"@id":"https:\/\/data-flair.training\/blogs\/#organization"},"image":{"@id":"https:\/\/data-flair.training\/blogs\/python-speech-recognition-ai\/#primaryimage"},"thumbnailUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/08\/Speech-Recognition-with-Python-AI-01.jpg","keywords":["google speech recognition Python","how to convert speech to text in Python","Python speech Recognition","Python voice recognition","Python with AI","speech recognition"],"articleSection":["Artificial Intelligence Tutorials","Python Tutorials"],"inLanguage":"en-US","potentialAction":[{"@type":"CommentAction","name":"Comment","target":["https:\/\/data-flair.training\/blogs\/python-speech-recognition-ai\/#respond"]}]},{"@type":"WebPage","@id":"https:\/\/data-flair.training\/blogs\/python-speech-recognition-ai\/","url":"https:\/\/data-flair.training\/blogs\/python-speech-recognition-ai\/","name":"Python Speech Recognition - Artificial Intelligence - DataFlair","isPartOf":{"@id":"https:\/\/data-flair.training\/blogs\/#website"},"primaryImageOfPage":{"@id":"https:\/\/data-flair.training\/blogs\/python-speech-recognition-ai\/#primaryimage"},"image":{"@id":"https:\/\/data-flair.training\/blogs\/python-speech-recognition-ai\/#primaryimage"},"thumbnailUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/08\/Speech-Recognition-with-Python-AI-01.jpg","datePublished":"2018-08-22T04:24:40+00:00","dateModified":"2025-07-25T15:26:55+00:00","description":"Python speech Recognition,how to convert speech to text in Python,google speech recognition Python, Working With Microphones,Reading a audio Segment","breadcrumb":{"@id":"https:\/\/data-flair.training\/blogs\/python-speech-recognition-ai\/#breadcrumb"},"inLanguage":"en-US","potentialAction":[{"@type":"ReadAction","target":["https:\/\/data-flair.training\/blogs\/python-speech-recognition-ai\/"]}]},{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/data-flair.training\/blogs\/python-speech-recognition-ai\/#primaryimage","url":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/08\/Speech-Recognition-with-Python-AI-01.jpg","contentUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/08\/Speech-Recognition-with-Python-AI-01.jpg","width":1200,"height":628,"caption":"Python Speech Recognition - Artificial Intelligence"},{"@type":"BreadcrumbList","@id":"https:\/\/data-flair.training\/blogs\/python-speech-recognition-ai\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Blog Home","item":"https:\/\/data-flair.training\/blogs\/"},{"@type":"ListItem","position":2,"name":"Artificial Intelligence Tutorials","item":"https:\/\/data-flair.training\/blogs\/category\/artificial-intelligence\/"},{"@type":"ListItem","position":3,"name":"Python Speech Recognition &#8211; Artificial Intelligence"}]},{"@type":"WebSite","@id":"https:\/\/data-flair.training\/blogs\/#website","url":"https:\/\/data-flair.training\/blogs\/","name":"DataFlair","description":"Learn Today. Lead Tomorrow.","publisher":{"@id":"https:\/\/data-flair.training\/blogs\/#organization"},"potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/data-flair.training\/blogs\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"en-US"},{"@type":"Organization","@id":"https:\/\/data-flair.training\/blogs\/#organization","name":"DataFlair","url":"https:\/\/data-flair.training\/blogs\/","logo":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/logo\/image\/","url":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/Data-Flair.png","contentUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/Data-Flair.png","width":106,"height":48,"caption":"DataFlair"},"image":{"@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/logo\/image\/"},"sameAs":["https:\/\/www.facebook.com\/DataFlairWS\/","https:\/\/x.com\/DataFlairWS","https:\/\/www.linkedin.com\/company\/dataflair-web-services-pvt-ltd\/","https:\/\/www.youtube.com\/user\/DataFlairWS"]},{"@type":"Person","@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/person\/7f83c342f5d1632d6f7b4b0b0f447823","name":"DataFlair Team","image":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/secure.gravatar.com\/avatar\/4cf3a74600d131330b8c481d519afd1574093ed89f6d3396a95393ad223eb7cd?s=96&d=mm&r=g","url":"https:\/\/secure.gravatar.com\/avatar\/4cf3a74600d131330b8c481d519afd1574093ed89f6d3396a95393ad223eb7cd?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/4cf3a74600d131330b8c481d519afd1574093ed89f6d3396a95393ad223eb7cd?s=96&d=mm&r=g","caption":"DataFlair Team"},"description":"DataFlair Team creates expert-level guides on programming, Java, Python, C++, DSA, AI, ML, data Science, Android, Flutter, MERN, Web Development, and technology. Our goal is to empower learners with easy-to-understand content. Explore our resources for career growth and practical learning.","url":"https:\/\/data-flair.training\/blogs\/author\/dfteam1\/"}]}},"amp_enabled":true,"_links":{"self":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts\/25463","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/users\/5"}],"replies":[{"embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/comments?post=25463"}],"version-history":[{"count":2,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts\/25463\/revisions"}],"predecessor-version":[{"id":146163,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts\/25463\/revisions\/146163"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/media\/25487"}],"wp:attachment":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/media?parent=25463"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/categories?post=25463"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/tags?post=25463"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}