

{"id":99494,"date":"2021-08-13T09:00:02","date_gmt":"2021-08-13T03:30:02","guid":{"rendered":"https:\/\/data-flair.training\/blogs\/?p=99494"},"modified":"2025-07-28T16:04:15","modified_gmt":"2025-07-28T10:34:15","slug":"machine-learning-text-summarization","status":"publish","type":"post","link":"https:\/\/data-flair.training\/blogs\/machine-learning-text-summarization\/","title":{"rendered":"Text Summarization using Machine Learning"},"content":{"rendered":"<div class='__iawmlf-post-loop-links' style='display:none;' data-iawmlf-post-links='[{&quot;id&quot;:329,&quot;href&quot;:&quot;https:\\\/\\\/www.kaggle.com\\\/snap\\\/amazon-fine-food-reviews&quot;,&quot;archived_href&quot;:&quot;http:\\\/\\\/web-wp.archive.org\\\/web\\\/20230925090633\\\/https:\\\/\\\/www.kaggle.com\\\/snap\\\/amazon-fine-food-reviews&quot;,&quot;redirect_href&quot;:&quot;&quot;,&quot;checks&quot;:[{&quot;date&quot;:&quot;2025-12-08 06:55:15&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-11 08:15:19&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-14 10:44:59&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-17 15:55:57&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-20 16:39:24&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-24 03:26:53&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-27 12:29:40&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-30 14:43:45&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-02 18:16:57&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-06 01:40:26&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-09 05:14:38&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-12 06:38:01&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-15 08:46:03&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-18 12:55:24&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-21 15:48:52&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-25 06:27:13&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-28 06:59:33&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-31 14:53:42&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-03 23:39:51&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-07 06:49:25&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-10 12:32:42&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-13 16:33:11&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-17 04:59:49&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-20 06:15:41&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-23 12:28:41&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-26 14:56:28&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-01 15:49:23&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-04 17:34:27&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-07 17:52:13&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-11 00:13:33&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-14 00:57:00&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-17 06:51:05&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-20 07:10:07&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-23 10:54:47&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-26 15:52:44&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-29 18:45:15&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-02 02:20:55&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-05 09:41:51&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-08 13:08:40&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-11 14:07:26&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2026-04-14 14:57:27&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-17 16:03:36&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-21 04:14:53&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-24 16:28:49&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-27 19:00:56&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-01 13:57:46&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-04 16:24:41&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-07 17:39:27&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-11 04:01:17&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-14 04:03:46&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-17 05:47:18&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-20 12:59:04&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-24 16:09:11&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-27 20:46:58&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-31 08:11:01&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-06-03 08:48:32&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-06-06 10:46:28&quot;,&quot;http_code&quot;:200}],&quot;broken&quot;:false,&quot;last_checked&quot;:{&quot;date&quot;:&quot;2026-06-06 10:46:28&quot;,&quot;http_code&quot;:200},&quot;process&quot;:&quot;done&quot;},{&quot;id&quot;:330,&quot;href&quot;:&quot;https:\\\/\\\/drive.google.com\\\/file\\\/d\\\/1HMYB2qnudW9EeaHaZdaqozVE3EI_ajOn\\\/view?usp=sharing&quot;,&quot;archived_href&quot;:&quot;http:\\\/\\\/web-wp.archive.org\\\/web\\\/20251208065655\\\/https:\\\/\\\/drive.google.com\\\/file\\\/d\\\/1HMYB2qnudW9EeaHaZdaqozVE3EI_ajOn\\\/view?usp=sharing&quot;,&quot;redirect_href&quot;:&quot;&quot;,&quot;checks&quot;:[{&quot;date&quot;:&quot;2025-12-08 07:44:55&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-11 08:15:19&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-14 10:44:59&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-17 15:56:00&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-20 16:39:24&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-24 03:26:53&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-27 12:29:40&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-30 14:43:45&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-02 18:16:58&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-06 01:40:28&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-09 05:14:38&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-12 06:38:01&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-15 08:52:45&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-18 12:55:24&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-21 15:48:52&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-25 06:27:13&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-28 06:59:33&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-31 14:53:42&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-03 23:39:51&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-07 06:49:25&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-10 12:32:44&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-13 16:33:11&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-17 04:59:49&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-20 06:15:42&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-23 12:28:40&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-26 14:56:28&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-01 15:49:23&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-04 17:34:27&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-07 17:52:14&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-11 00:13:33&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-14 00:57:00&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-17 06:51:06&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-20 07:10:07&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-23 10:54:47&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-26 15:52:44&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-29 18:45:15&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-02 02:20:58&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-05 09:41:51&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-08 13:08:41&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-11 14:07:23&quot;,&quot;http_code&quot;:404},{&quot;date&quot;:&quot;2026-04-14 14:57:28&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-17 16:03:32&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-21 04:14:53&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-24 16:28:49&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-27 19:00:56&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-01 13:57:46&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-04 16:24:42&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-07 17:39:27&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-11 04:01:16&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-14 04:03:46&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-17 05:47:18&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-20 12:59:04&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-24 16:09:11&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-27 20:46:58&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-31 08:11:28&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-06-03 08:48:32&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-06-06 10:46:28&quot;,&quot;http_code&quot;:200}],&quot;broken&quot;:false,&quot;last_checked&quot;:{&quot;date&quot;:&quot;2026-06-06 10:46:28&quot;,&quot;http_code&quot;:200},&quot;process&quot;:&quot;done&quot;}]'><\/div>\n<p>Today you will learn how to create a Text Summarizer Project using Deep Learning. Let&#8217;s start!!!<\/p>\n<h3>What is Text Summarization?<\/h3>\n<p>The process of producing summaries from the huge sets of information while maintaining the actual context of information is called Text Summarization. The summary should be fluent and concise throughout.<\/p>\n<p>Google uses featured snippets to show the summary of the article or the answer for the user\u2019s query. These snippets are basically extracted from webpages.<\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/google-dataflair-snippet.png\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-100304\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/google-dataflair-snippet.png\" alt=\"google dataflair snippet\" width=\"1274\" height=\"498\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/google-dataflair-snippet.png 1274w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/google-dataflair-snippet-768x300.png 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/google-dataflair-snippet-720x281.png 720w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/google-dataflair-snippet-520x203.png 520w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/google-dataflair-snippet-320x125.png 320w\" sizes=\"auto, (max-width: 1274px) 100vw, 1274px\" \/><\/a><\/p>\n<h3>Types of Text Summarizer<\/h3>\n<p><strong>Extractive Summarization:<\/strong> In this process, we focus on the vital information from the input sentence and extract that specific sentence to generate a summary. There is no generation of new sentences for summary, they are exactly the same that is present in the original group of input sentences<\/p>\n<p><strong>Example :<\/strong><\/p>\n<p><strong>Source text:<\/strong> DataFlair is an online, immersive, instructor-led, self-paced technology school for students around the world. DataFlair offers lifetime support, quizzes to sharpen student\u2019s knowledge, and various live project participation. DataFlair machine learning projects are best for students to gain practical knowledge for real-world problems.<\/p>\n<p><strong>Summary:<\/strong> DataFlair is an online school for students around the world. DataFlair offers lifetime support, quizzes, and live projects. DataFlair machine Learning projects are best to gain knowledge for real-world problems.<\/p>\n<p><strong>Abstract Summarization:<\/strong> This is the opposite of Extractive summarization where it takes an exact sentence to generate a summary. Abstract Summarization focuses on the vital information of the original group of sentences and generates a new set of sentences for the summary. This new sentence might not be present in the original sentence.<\/p>\n<p><strong>Example :<\/strong><\/p>\n<p><strong>Source text:<\/strong> DataFlair is an online, immersive, instructor-led, self-paced technology school for students around the world. DataFlair offers lifetime support, quizzes to sharpen student\u2019s knowledge, and various live project participation. DataFlair machine learning projects are best for students to gain practical knowledge for real-world problems.<\/p>\n<p><strong>Summary:<\/strong> DataFlair is an online school where students are offered various quizzes and projects including machine learning to solve real-world problems.<\/p>\n<h3>What is Sequence to Sequence (Seq2Seq) modeling?<\/h3>\n<p>Seq2Seq model is a model that takes a stream of sentences as an input and outputs another stream of sentences. This can be seen in Neural Machine Translation where input sentences is one language and output sentences are translated versions of that language. Encoder and Decoder are the two main techniques used in seq2seq modeling. Let\u2019s see about them.<\/p>\n<p><strong>Encoder Model:<\/strong> Encoder Model is used to encode or transform the input sentences and generate feedback after every step. This feedback can be an internal state i.e hidden state or cell state if we are using the LSTM layer. Encoder models capture the vital information from the input sentences while maintaining the context throughout.<\/p>\n<p>In Neural Machine translation, our input language will be passed into the encoder model where it will capture the contextual information without modifying the meaning of the input sequence. Outputs from the encoder model are then passed into the decoder model to get the output sequences.<\/p>\n<p><strong>Decoder Model:<\/strong> The decoder model is used to decode or predict the target sentences word by word. Decoder input data takes the input of target sentences and predicts the next word which is then fed into the next layer for the prediction. \u2018&lt;start&gt;\u2019 (start of target sentence) and \u2018&lt;end&gt;\u2019 (end of target sentence) are the two words that help the model to know what will be the initial variable to predict the next word and the ending variable to know the ending of the sentence. While training the model, we first provide the word \u2018&lt;start&gt;\u2019, the model then predicts the next word that is the decoder target data. This word is then fed as input data for the next timestep to get the next word prediction.<\/p>\n<p>For example, if our sentence is<strong> \u2018 I Love Python\u2019<\/strong> so we will add \u2018&lt;start&gt;\u2019 at starting and \u2018&lt;end&gt;\u2019 at the ending of the sentence therefore our sentence will be<strong> \u2018 &lt;start&gt; I Love Python &lt;end&gt; \u2019<\/strong> now let\u2019s see how it works.<\/p>\n<table>\n<tbody>\n<tr>\n<td><b>Timestep<\/b><\/td>\n<td><b>Input data<\/b><\/td>\n<td><b>Target data<\/b><\/td>\n<\/tr>\n<tr>\n<td><span style=\"font-weight: 400\">1<\/span><\/td>\n<td><span style=\"font-weight: 400\">&lt;start&gt;<\/span><\/td>\n<td><span style=\"font-weight: 400\">I<\/span><\/td>\n<\/tr>\n<tr>\n<td><span style=\"font-weight: 400\">2<\/span><\/td>\n<td><span style=\"font-weight: 400\">&lt;start&gt; I<\/span><\/td>\n<td><span style=\"font-weight: 400\">Love<\/span><\/td>\n<\/tr>\n<tr>\n<td><span style=\"font-weight: 400\">3<\/span><\/td>\n<td><span style=\"font-weight: 400\">&lt;start&gt; I Love<\/span><\/td>\n<td><span style=\"font-weight: 400\">Python<\/span><\/td>\n<\/tr>\n<tr>\n<td><span style=\"font-weight: 400\">4<\/span><\/td>\n<td><span style=\"font-weight: 400\">&lt;start&gt; I Love Python<\/span><\/td>\n<td><span style=\"font-weight: 400\">&lt;end&gt;<\/span><\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p>As you can see our input data will start from \u2018&lt;start&gt;\u2019 and the target will predict the next word with the help of input data at every timestep. Our input data doesn&#8217;t contain the last word as our target data at the last timestep is \u2018&lt;end&gt;\u2019 which tells us that we have reached the end of our sentence and stop the iteration. The same way our target data will be one-time step ahead as the first word \u2018&lt;start&gt;\u2019 is provided by the Input data.<\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/text-summarization-encoder-decoder-architecture.jpg\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-100305\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/text-summarization-encoder-decoder-architecture.jpg\" alt=\"text summarization encoder decoder architecture\" width=\"924\" height=\"502\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/text-summarization-encoder-decoder-architecture.jpg 924w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/text-summarization-encoder-decoder-architecture-768x417.jpg 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/text-summarization-encoder-decoder-architecture-720x391.jpg 720w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/text-summarization-encoder-decoder-architecture-520x283.jpg 520w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/text-summarization-encoder-decoder-architecture-320x174.jpg 320w\" sizes=\"auto, (max-width: 924px) 100vw, 924px\" \/><\/a><\/p>\n<h3>What is an Attention Mechanism?<\/h3>\n<p>Let&#8217;s take an example to understand the Attention Mechanism. So below is the input text(review) and target text(summary).<\/p>\n<p><strong>Input text:<\/strong> Now that I&#8217;ve learned about machine learning, I&#8217;d like to work on some projects. Can someone recommend the best source for machine learning projects?<\/p>\n<p><strong>Target text:<\/strong> DataFlair is the best source for machine learning projects.<\/p>\n<p>As you can see, we have passed the input text into the model. Rather than focusing on the whole word which is very difficult to remember, we will only focus on specific words for the prediction. In our example, we will only focus on the words like \u2018source\u2019, \u2018machine learning&#8217; and \u2018projects\u2019 to predict the target text.<\/p>\n<p>There are two classes of Attention Mechanisms.<\/p>\n<p>a) Global Attention :<br \/>\nB) Local Attention<\/p>\n<p><strong>Global Attention:<\/strong> In Global attention, all the hidden states of every time step from the encoder model is used to generate the context vector.<\/p>\n<p><strong>Local Attention:<\/strong> In Local attention, some of the hidden states from the encoder model is used to generate the context vector.<\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/ml-attention-types.jpg\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-100306\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/ml-attention-types.jpg\" alt=\"ml attention types\" width=\"1368\" height=\"734\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/ml-attention-types.jpg 1368w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/ml-attention-types-768x412.jpg 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/ml-attention-types-720x386.jpg 720w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/ml-attention-types-520x279.jpg 520w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/ml-attention-types-320x172.jpg 320w\" sizes=\"auto, (max-width: 1368px) 100vw, 1368px\" \/><\/a><\/p>\n<h3>About the project:<\/h3>\n<p>In this project, we will use many to many sequence models using the Abstractive Text Summarization technique to create models that predict the summary of the reviews. The model will be trained and tested on the first 1,00,000 rows of the dataset file \u2018Reviews.csv\u2019. Using the Attention mechanism we will focus on specific keywords while maintaining the context of our sentence.<\/p>\n<h3>Project Prerequisites:<\/h3>\n<p>This project requires you to have a good knowledge of Python, Deep Learning, and Natural Language Processing(NLP). You can install all the modules for this project using following command:<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">pip install numpy, pandas, pickle, nltk, tensorflow, sklearn, bs4\r\n<\/pre>\n<p>The versions which are used in this project for python and its corresponding modules are as follows:<\/p>\n<p>1) python: 3.8.5<br \/>\n2) TensorFlow: 2.3.1 <strong>*Note*<\/strong> : tensorFlow version should be 2.2 or higher in order to use Keras or else install Keras directly<br \/>\n3) sklearn: 0.24.2<br \/>\n4) bs4: 4.6.3<br \/>\n5) pickle: 4.0<br \/>\n6) numpy : 1.19.5<br \/>\n7) pandas: 1.1.5<br \/>\n8) nltk : 3.2.5<\/p>\n<h3>Text Summarizer Dataset<\/h3>\n<p>You can download the dataset file for this project from <a href=\"https:\/\/www.kaggle.com\/snap\/amazon-fine-food-reviews\"><strong>Amazon Fine Food Reviews<\/strong><\/a><\/p>\n<p>Download Text Summarization Project Code<\/p>\n<p>Please download the source code of text summarization with machine learning: <a href=\"https:\/\/drive.google.com\/file\/d\/1HMYB2qnudW9EeaHaZdaqozVE3EI_ajOn\/view?usp=sharing\"><strong>Text Summarization Project Code<\/strong><\/a><\/p>\n<h3>Project Structure :<\/h3>\n<p><strong>Reviews.csv:<\/strong> This is our dataset file which contains amazon food reviews and summaries.<\/p>\n<p><strong>text_summarizer.py:<\/strong> In this file we will create and train our model with input and target to predict the summary.<\/p>\n<p><strong>s2s\/:<\/strong> This directory contains the optimizer, metrics, and weights of our trained model.<\/p>\n<p><strong>contractions.pkl:<\/strong> This file contains a dictionary with keys as shortened word and value as their extended or original word.<\/p>\n<h3>Steps for Text Summarization:<\/h3>\n<h4>1) Import the Libraries<\/h4>\n<p>Firstly we will create a file called \u2018text_summarizer.py\u2019 and import all the libraries which have been shared in the prerequisites section.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">#DataFlair Project\r\n#import all the required libraries\r\nimport numpy as np\r\nimport pandas as pd\r\nimport pickle\r\nfrom statistics import mode\r\nimport nltk\r\nfrom nltk import word_tokenize\r\nfrom nltk.stem import LancasterStemmer\r\nnltk.download('wordnet')\r\nnltk.download('stopwords')\r\nnltk.download('punkt')\r\nfrom nltk.corpus import stopwords\r\nfrom tensorflow.keras.models import Model\r\nfrom tensorflow.keras import models\r\nfrom tensorflow.keras import backend as K\r\nfrom tensorflow.keras.preprocessing.sequence import pad_sequences\r\nfrom tensorflow.keras.preprocessing.text import Tokenizer \r\nfrom tensorflow.keras.utils import plot_model\r\nfrom tensorflow.keras.layers import Input,LSTM,Embedding,Dense,Concatenate,Attention\r\nfrom sklearn.model_selection import train_test_split\r\nfrom bs4 import BeautifulSoup\r\n<\/pre>\n<h4>2) Parse the Dataset file.<\/h4>\n<p>We will traverse to the dataset file i.e. \u2018 Reviews.csv \u2019 and extract all the input and target texts. For this we will be using the first 1,00,000 rows of our dataset for the training and testing part. It can be changed as per requirements. Our input will be the \u2018Text\u2019 column which is the review column and target will be the \u2018Summary\u2019 column. We will also drop the duplicate records and NA values from our dataframe.<\/p>\n<p><strong>Dataset file:<\/strong><\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/dataset-file.png\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-100307\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/dataset-file.png\" alt=\"dataset file\" width=\"1111\" height=\"487\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/dataset-file.png 1111w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/dataset-file-768x337.png 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/dataset-file-720x316.png 720w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/dataset-file-520x228.png 520w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/dataset-file-320x140.png 320w\" sizes=\"auto, (max-width: 1111px) 100vw, 1111px\" \/><\/a><\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">#read the dataset file for text Summarizer\r\ndf=pd.read_csv(\"Reviews.csv\",nrows=100000)\r\n#drop the duplicate and na values from the records\r\ndf.drop_duplicates(subset=['Text'],inplace=True)\r\ndf.dropna(axis=0,inplace=True)\r\ninput_data = df.loc[:,'Text']\r\ntarget_data = df.loc[:,'Summary']\r\ntarget.replace('', np.nan, inplace=True)\r\n<\/pre>\n<h4>3) Preprocessing<\/h4>\n<p>Real-world texts are incomplete and they cannot be sent directly to the model that will cause certain errors. So, we clean all our texts and convert them into a presentable form for prediction tasks. So, firstly we will initialize all the variables and methods.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">input_texts=[]\r\ntarget_texts=[]\r\ninput_words=[]\r\ntarget_words=[]\r\ncontractions=pickle.load(open(\"contractions.pkl\",\"rb\"))['contractions']\r\n#initialize stop words and LancasterStemmer\r\nstop_words=set(stopwords.words('english'))\r\nstemm=LancasterStemmer()\r\n<\/pre>\n<p>Some of our texts are in html format and contain html tags so first we will parse this text and remove all the html tags using \u2018BeautifulSoup library\u2019. After that, we tokenize our texts into words. And also check the following conditions:<\/p>\n<p>1) contains integers<br \/>\n2) are less than 3 characters or<br \/>\n3) they are in stop words<\/p>\n<p>If one of the above conditions matches, we will remove that particular word from the list of input or target words.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">def clean(texts,src):\r\n  #remove the html tags\r\n  texts = BeautifulSoup(texts, \"lxml\").text\r\n  #tokenize the text into words \r\n  words=word_tokenize(texts.lower())\r\n  #filter words which contains \\ \r\n  #integers or their length is less than or equal to 3\r\n  words= list(filter(lambda w:(w.isalpha() and len(w)&gt;=3),words))\r\n<\/pre>\n<p>We also have Contraction words in our input or target texts that are the combinations of two words, shortened using apostrophe or by dropping letters for example \u2018haven\u2019t\u2019 is shortened for \u2018have not\u2019. We will expand these kinds of words using the \u2018contractions.pkl\u2019 file which contains a dictionary having keys as shortened words and values as expanded words. Also we will stem all the input words to their root words.<\/p>\n<p><strong>Stemming:<\/strong> Stemming is the process of reducing words into their root words.<\/p>\n<p>For example, if the text contains word like \u201cchocollate\u201d which might be misspelled for \u201cchocolate\u201d. If we don\u2019t stem our words then the model will treat them as two different words. Stemmer will stem or reduce that error word to its root word i.e. \u201cchocol\u201d. As a result, &#8220;chocol&#8221; is the root word for both \u201cchocolate\u201d and \u201cchocollate\u201d.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">#contraction file to expand shortened words\r\nwords= [contractions[w] if w in contractions else w for w in words ]\r\n#stem the words to their root word and filter stop words\r\nif src==\"inputs\":\r\n  words= [stemm.stem(w) for w in words if w not in stop_words]\r\nelse:\r\n  words= [w for w in words if w not in stop_words]\r\nreturn words\r\n<\/pre>\n<p>We will add \u2018sos\u2019 to the start and \u2018eos\u2019 at the end of target text to tell our model that this is the starting and ending of sentences.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">#pass the input records and taret records\r\nfor in_txt,tr_txt in zip(input_data,target_data):\r\n  in_words= clean(in_txt,\"inputs\")\r\n  input_texts+= [' '.join(in_words)]\r\n  input_words+= in_words\r\n  #add 'sos' at start and 'eos' at end of text\r\n  tr_words= clean(\"sos \"+tr_txt+\" eos\",\"target\")\r\n  target_texts+= [' '.join(tr_words)]\r\n  target_words+= tr_words\r\n<\/pre>\n<p>Now after cleaning the sentences we will filter duplicate words and sort them accordingly. Also we will store the total number of input and target words.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">#store only unique words from input and target list of words\r\ninput_words = sorted(list(set(input_words)))\r\ntarget_words = sorted(list(set(target_words)))\r\nnum_in_words = len(input_words) #total number of input words\r\nnum_tr_words = len(target_words) #total number of target words\r\n \r\n#get the length of the input and target texts which appears most often  \r\nmax_in_len = mode([len(i) for i in input_texts])\r\nmax_tr_len = mode([len(i) for i in target_texts])\r\n \r\nprint(\"number of input words : \",num_in_words)\r\nprint(\"number of target words : \",num_tr_words)\r\nprint(\"maximum input length : \",max_in_len)\r\nprint(\"maximum target length : \",max_tr_len)\r\n<\/pre>\n<h4>4) Splitting the records<\/h4>\n<p>Split the dataset records into training and testing sets. We will be splitting in the 80:20 ratio where 80% record will be for training sets and 20% for testing sets.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">#split the input and target text into 80:20 ratio or testing size of 20%.\r\nx_train,x_test,y_train,y_test=train_test_split(input_texts,target_texts,test_size=0.2,random_state=0) \r\n<\/pre>\n<h4>5) Text Vectorization<\/h4>\n<p>We will convert our word into integer sequence using vectorization technique.<\/p>\n<p>For example,<\/p>\n<p>L = [ \u2018what doing\u2019, \u2018how are you\u2019, \u2019good \u2019]<\/p>\n<p>Tokenize all the elements of list \u2018L\u2019 and make a dictionary having key as tokens and value as the counter number. So after the data is fit we get a dictionary as<\/p>\n<p>D = { \u2018what\u2019 : 1 , \u2018doing\u2019 :2 , \u2018how\u2019 : 3 , \u2018are\u2019 : 4 , \u2018you\u2019 :5 , \u2018good\u2019 : 6 }<\/p>\n<p>So we have fit our data, now let\u2019s transform the below list \u2018J\u2019 into integer sequence using our tokenizer.<\/p>\n<p>J = [ \u2018what are you doing\u2019, \u2018you are good\u2019 ]<\/p>\n<p>Transformed (Vectorized) J : [ [ 1 , 4 , 5 , 2 ] , [ 5 , 4 , 6 ] ]<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">#train the tokenizer with all the words\r\nin_tokenizer = Tokenizer()\r\nin_tokenizer.fit_on_texts(x_train)\r\ntr_tokenizer = Tokenizer()\r\ntr_tokenizer.fit_on_texts(y_train)\r\n \r\n#convert text into sequence of integers\r\n#where the integer will be the index of that word\r\nx_train= in_tokenizer.texts_to_sequences(x_train) \r\ny_train= tr_tokenizer.texts_to_sequences(y_train)\r\n<\/pre>\n<p>After converting to integer sequence we will also make all the input and target texts to the same length for our model. So we will take the length of input sentences which has the highest frequency and store it in the \u2018max_in_length\u2019 variable, and repeat the same for target data also. Now we will pad arrays of 0\u2019s to the texts if it is less than the assigned maximum input length.<\/p>\n<p>Our encoder input data will be padded \u2018x_train\u2019 and decoder input data will be padded \u2018y_train\u2019 but we will not include the last word i.e \u2018eos\u2019. Decoder target data will be same as decoder input data but it will be one timestep ahead as it will not include the start word i.e. \u2018sos\u2019 of our target sentence.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">#pad array of 0's if the length is less than the maximum length \r\nen_in_data= pad_sequences(x_train,  maxlen=max_in_len, padding='post') \r\ndec_data= pad_sequences(y_train,  maxlen=max_tr_len, padding='post')\r\n \r\n#decoder input data will not include the last word \r\n#i.e. 'eos' in decoder input data\r\ndec_in_data = dec_data[:,:-1]\r\n#decoder target data will be one time step ahead as it will not include\r\n# the first word i.e 'sos'\r\ndec_tr_data = dec_data.reshape(len(dec_data),max_tr_len,1)[:,1:]\r\n<\/pre>\n<h4>6) Build the model.<\/h4>\n<p>We are using Stacked LSTM containing 3 layers of LSTM stacked on top of each other. This will make our prediction much better. As per your requirement, you can have more also. Let\u2019s understand our encoder model and decoder model.<\/p>\n<p><strong>Encoder:<\/strong> We will initialize the encoder input tensor using the \u2018Input\u2019 object. The expected shape of the batch will be 74 (maximum input length)-dimensions. Then we will create an \u2018Embedding Layer\u2019 which will have the total number of input words as the first argument and a shape of 500 which is the latent(hidden) dimension.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">K.clear_session() \r\nlatent_dim = 500\r\n \r\n#create input object of total number of encoder words\r\nen_inputs = Input(shape=(max_in_len,)) \r\nen_embedding = Embedding(num_in_words+1, latent_dim)(en_inputs) \r\n<\/pre>\n<p><strong>LSTM:<\/strong> Now we will create 3 stacked LSTM layers where the first LSTM layer will have input of encoder and like that create a continuous sequence of LSTM layers.<\/p>\n<p>The LSTM layer will capture all the contextual information present in the input sequence. We will return hidden state output and also states i.e. hidden state and cell state after execution of every LSTM layer.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">#create 3 stacked LSTM layer with the shape of hidden dimension for text summarizer using deep learning\r\n#LSTM 1\r\nen_lstm1= LSTM(latent_dim, return_state=True, return_sequences=True) \r\nen_outputs1, state_h1, state_c1= en_lstm1(en_embedding) \r\n \r\n#LSTM2\r\nen_lstm2= LSTM(latent_dim, return_state=True, return_sequences=True) \r\nen_outputs2, state_h2, state_c2= en_lstm2(en_outputs1) \r\n \r\n#LSTM3\r\nen_lstm3= LSTM(latent_dim,return_sequences=True,return_state=True)\r\nen_outputs3 , state_h3 , state_c3= en_lstm3(en_outputs2)\r\n \r\n#encoder states\r\nen_states= [state_h3, state_c3]\r\n<\/pre>\n<p><strong>Decoder:<\/strong> Like Encoder we will initialize the decoder input tensor and then pass it to the only LSTM. Here, the decoder will also have the initial state where we will pass the hidden state and cell state values that we have obtained from the encoder\u2019s LSTM layer.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\"># Decoder. \r\ndec_inputs = Input(shape=(None,)) \r\ndec_emb_layer = Embedding(num_tr_words+1, latent_dim) \r\ndec_embedding = dec_emb_layer(dec_inputs) \r\n \r\n#initialize decoder's LSTM layer with the output states of encoder\r\ndec_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)\r\ndec_outputs, *_ = dec_lstm(dec_embedding,initial_state=en_states) \r\n<\/pre>\n<p><strong>Attention Layer:<\/strong> We will pass the encoder and decoder outputs into the attention layer and then we will concatenate attention layer outputs with the decoder outputs.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">#Attention layer\r\nattention =Attention()\r\nattn_out = attention([dec_outputs,en_outputs3])\r\n \r\n#Concatenate the attention output with the decoder outputs\r\nmerge=Concatenate(axis=-1, name='concat_layer1')([dec_outputs,attn_out])\r\n<\/pre>\n<p>Now we will create our Dense Layer that is the output layer for our model. It will have the shape of the total number of target words and a softmax activation function.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">#Dense layer (output layer)\r\ndec_dense = Dense(num_tr_words+1, activation='softmax') \r\ndec_outputs = dec_dense(merge) \r\n<\/pre>\n<h4>5) Train the model.<\/h4>\n<p>Finally, we will initialize our Model class with input and output data from the encoder and decoder layers. We can plot the model layers and also get the summary of our model.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">#Model class and model summary for text Summarizer\r\nmodel = Model([en_inputs, dec_inputs], dec_outputs) \r\nmodel.summary()\r\nplot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)\r\n<\/pre>\n<p>Model Summary and plot:<\/p>\n<p>Model: &#8220;model&#8221;<br \/>\n___________________________________________________________________________________Layer (type) Output Shape Param # Connected to<br \/>\n===================================================================================input_5 (InputLayer) [(None, 74)] 0<br \/>\n___________________________________________________________________________________embedding (Embedding) (None, 74, 500) 16066000 input_5[0][0]<br \/>\n___________________________________________________________________________________lstm (LSTM) [(None, 74, 500), (N 2002000 embedding[0][0]<br \/>\n___________________________________________________________________________________input_6 (InputLayer) [(None, None)] 0<br \/>\n___________________________________________________________________________________lstm_1 (LSTM) [(None, 74, 500), (N 2002000 lstm[0][0]<br \/>\n___________________________________________________________________________________embedding_1 (Embedding) (None, None, 500) 7079000 input_6[0][0]<br \/>\n___________________________________________________________________________________lstm_2 (LSTM) [(None, 74, 500), (N 2002000 lstm_1[0][0]<br \/>\n___________________________________________________________________________________lstm_3 (LSTM) [(None, None, 500), 2002000 embedding_1[0][0]<br \/>\nlstm_2[0][1]<br \/>\nlstm_2[0][2]<br \/>\n___________________________________________________________________________________attention (Attention) (None, None, 500) 0 lstm_3[0][0]<br \/>\nlstm_2[0][0]<br \/>\n___________________________________________________________________________________concat_layer1 (Concatenate) (None, None, 1000) 0 lstm_3[0][0]<br \/>\nattention[0][0]<br \/>\n___________________________________________________________________________________dense (Dense) (None, None, 14158) 14172158 concat_layer1[0][0]<br \/>\n===================================================================================Total params: 45,325,158<\/p>\n<p>Trainable params: 45,325,158<\/p>\n<p>Non-trainable params: 0<\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/ml-model-plot.png\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-100308\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/ml-model-plot.png\" alt=\"ml model plot\" width=\"1088\" height=\"959\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/ml-model-plot.png 1088w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/ml-model-plot-768x677.png 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/ml-model-plot-720x635.png 720w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/ml-model-plot-520x458.png 520w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/ml-model-plot-320x282.png 320w\" sizes=\"auto, (max-width: 1088px) 100vw, 1088px\" \/><\/a><\/p>\n<p>We will pass the data and train our model with \u2018512\u2019 batch size, epoch of \u201810\u2019 and we will be using \u2018RMSprop\u2019 Optimizer to train our model. You can increase or decrease the epoch but take care of the validation loss.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">model.compile( \r\n    optimizer=\"rmsprop\", loss=\"sparse_categorical_crossentropy\", metrics=[\"accuracy\"] ) \r\nmodel.fit( \r\n    [en_in_data, dec_in_data],\r\n    dec_tr_data, \r\n    batch_size=512, \r\n    epochs=10, \r\n    validation_split=0.1,\r\n    )\r\n \r\n#Save model\r\nmodel.save(\"s2s\")\r\n<\/pre>\n<p>After our model gets trained we will get a directory as \u2018s2s\/\u2019 with \u2018saved_model.pb\u2019 which includes optimizer, losses, and metrics of our model. The weights are saved in the variables\/ directory.<\/p>\n<h4>6) Inference Model<\/h4>\n<p>We will be using the saved model to create an inference architecture for the encoder and decoder model. The inference model is used to test the new sentences for which the target sequence is not known.<\/p>\n<p><strong>Encoder Inference:<\/strong> Input for the inference encoder model will be 0th layer i.e. Input object that we have created ( you can check it from the above summary and model plot ) and output will be the output of the last LSTM which is the 6th layer.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\"># encoder inference\r\nlatent_dim=500\r\n#\/content\/gdrive\/MyDrive\/Text Summarizer\/\r\n#load the model\r\nmodel = models.load_model(\"s2s\")\r\n \r\n#construct encoder model from the output of 6 layer i.e.last LSTM layer\r\nen_outputs,state_h_enc,state_c_enc = model.layers[6].output\r\nen_states=[state_h_enc,state_c_enc]\r\n#add input and state from the layer.\r\nen_model = Model(model.input[0],[en_outputs]+en_states)\r\n<\/pre>\n<p><strong>Decoder Inference:<\/strong> Same as the Encoder inference model we will get the input, embedding, and LSTM layers from the saved model. Initialize the decoder hidden input and the other two states with the shape of latent (hidden) dimensions.<br \/>\n<strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\"># decoder inference\r\n#create Input object for hidden and cell state for decoder\r\n#shape of layer with hidden or latent dimension\r\ndec_state_input_h = Input(shape=(latent_dim,))\r\ndec_state_input_c = Input(shape=(latent_dim,))\r\ndec_hidden_state_input = Input(shape=(max_in_len,latent_dim))\r\n \r\n# Get the embeddings and input layer from the model\r\ndec_inputs = model.input[1]\r\ndec_emb_layer = model.layers[5]\r\ndec_lstm = model.layers[7]\r\ndec_embedding= dec_emb_layer(dec_inputs)\r\n \r\n#add input and initialize LSTM layer with encoder LSTM states.\r\ndec_outputs2, state_h2, state_c2 = dec_lstm(dec_embedding, initial_state=[dec_state_input_h,dec_state_input_c])\r\n<\/pre>\n<p><strong>Attention Inference:<\/strong> In our case, the 8th layer is the attention layer. We will fetch it and pass the inference decoder output with the hidden state-input that we have initialized earlier. Then we will concatenate the decoder output with the attention layer output.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">#Attention layer\r\nattention = model.layers[8]\r\nattn_out2 = attention([dec_outputs2,dec_hidden_state_input])\r\n \r\nmerge2 = Concatenate(axis=-1)([dec_outputs2, attn_out2])\r\n<\/pre>\n<p>And same for the Dense layer (output layer) which is the 10th layer of our saved model. Initialize the Inference Model class with the above data.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">#Dense layer\r\ndec_dense = model.layers[10]\r\ndec_outputs2 = dec_dense(merge2)\r\n \r\n# Finally define the Model Class\r\ndec_model = Model(\r\n[dec_inputs] + [dec_hidden_state_input,dec_state_input_h,dec_state_input_c],\r\n[dec_outputs2] + [state_h2, state_c2])\r\n<\/pre>\n<p>Encode the input sequence as state vectors. Create an empty array of the target sequence and generate the start word i.e \u2018sos\u2019 in our case for every pair. Use this state value along with the input sequence to predict the output index. Use reverse target word index to get the word from the output index and append to the decoded sequence.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">#create a dictionary with a key as index and value as words.\r\nreverse_target_word_index = tr_tokenizer.index_word\r\nreverse_source_word_index = in_tokenizer.index_word\r\ntarget_word_index = tr_tokenizer.word_index\r\nreverse_target_word_index[0]=' '\r\n \r\ndef decode_sequence(input_seq):\r\n    #get the encoder output and states by passing the input sequence\r\n    en_out, en_h, en_c= en_model.predict(input_seq)\r\n \r\n    #target sequence with initial word as 'sos'\r\n    target_seq = np.zeros((1, 1))\r\n    target_seq[0, 0] = target_word_index['sos']\r\n \r\n    #if the iteration reaches the end of text than it will be stop the iteration\r\n    stop_condition = False\r\n    #append every predicted word in decoded sentence\r\n    decoded_sentence = \"\"\r\n    while not stop_condition: \r\n        #get predicted output, hidden and cell state.\r\n        output_words, dec_h, dec_c= dec_model.predict([target_seq] + [en_out,en_h, en_c])\r\n        \r\n        #get the index and from the dictionary get the word for that index.\r\n        word_index = np.argmax(output_words[0, -1, :])\r\n        text_word = reverse_target_word_index[word_index]\r\n        decoded_sentence += text_word +\" \"\r\n<\/pre>\n<p>Assign the index of our word to the target sequence so for the next iteration our target sequence will be having a vector of the previous word. Iterate until our word is equal to the last word i.e \u2018eos\u2019 in our case or max length of the target text.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\"># Exit condition: either hit max length\r\n      # or find a stop word or last word.\r\n      if text_word == \"eos\" or len(decoded_sentence) &gt; max_tr_len:\r\n        stop_condition = True\r\n      #update target sequence to the current word index.\r\n      target_seq = np.zeros((1, 1))\r\n      target_seq[0, 0] = word_index\r\n      en_h, en_c = dec_h, dec_c\r\n  #return the decoded sentence\r\n  return decoded_sentence\r\n<\/pre>\n<p>Finally, we have done all the processes and now we can predict the summary for the input review.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">inp_review = input(\"Enter : \")\r\nprint(\u201cReview :\u201d,inp_review)\r\ninp_review = clean(inp_review,\"inputs\")\r\ninp_review = ' '.join(inp_review)\r\ninp_x= in_tokenizer.texts_to_sequences([inp_review]) \r\ninp_x= pad_sequences(inp_x,  maxlen=max_in_len, padding='post')\r\n \r\nsummary=decode_sequence(inp_x.reshape(1,max_in_len))\r\nif 'eos' in summary :\r\n  summary=summary.replace('eos','')\r\nprint(\"\\nPredicted summary:\",summary);print(\"\\n\")\r\n<\/pre>\n<h3>Text Summarizer Output<\/h3>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/text-summarizer-output.png\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-100309\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/text-summarizer-output.png\" alt=\"text summarizer output\" width=\"913\" height=\"439\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/text-summarizer-output.png 913w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/text-summarizer-output-768x369.png 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/text-summarizer-output-720x346.png 720w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/text-summarizer-output-520x250.png 520w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/text-summarizer-output-320x154.png 320w\" sizes=\"auto, (max-width: 913px) 100vw, 913px\" \/><\/a><\/p>\n<h3>Summary<\/h3>\n<p>Text summarization is the process of shortening long texts into concise summaries without losing key information. It is very useful in news apps, research tools, and customer service. There are two types of summarization\u2014extractive (selecting important lines) and abstractive (rewriting the text in new words). This machine learning project teaches how to handle language data, train models, and work with NLP pipelines.<\/p>\n<p>In this project, we have developed a Text Summarizer model which generates the summary from the provided review using the LSTM model and Attention Mechanism. We got an accuracy of 87.82% which is good as we have taken only 1,00,000 records for training and testing sets.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>Today you will learn how to create a Text Summarizer Project using Deep Learning. Let&#8217;s start!!! What is Text Summarization? The process of producing summaries from the huge sets of information while maintaining the&#46;&#46;&#46;<\/p>\n","protected":false},"author":5,"featured_media":100310,"comment_status":"open","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[36],"tags":[20697,20623,24954,24859,24860,24861,24862],"class_list":["post-99494","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-machine-learning","tag-machine-learning-project","tag-ml-project","tag-text-summarization-ml-project","tag-text-summarizer","tag-text-summarizer-project","tag-text-summarizer-project-code","tag-text-summarizer-using-deep-learning"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v27.4 - https:\/\/yoast.com\/product\/yoast-seo-wordpress\/ -->\n<title>Text Summarization using Machine Learning - DataFlair<\/title>\n<meta name=\"description\" content=\"Process of producing summary from huge set of information maintaining actual context is Text Summarization. Create your own text summarizer.\" \/>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/data-flair.training\/blogs\/machine-learning-text-summarization\/\" \/>\n<meta property=\"og:locale\" content=\"en_US\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"Text Summarization using Machine Learning - DataFlair\" \/>\n<meta property=\"og:description\" content=\"Process of producing summary from huge set of information maintaining actual context is Text Summarization. Create your own text summarizer.\" \/>\n<meta property=\"og:url\" content=\"https:\/\/data-flair.training\/blogs\/machine-learning-text-summarization\/\" \/>\n<meta property=\"og:site_name\" content=\"DataFlair\" \/>\n<meta property=\"article:publisher\" content=\"https:\/\/www.facebook.com\/DataFlairWS\/\" \/>\n<meta property=\"article:published_time\" content=\"2021-08-13T03:30:02+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2025-07-28T10:34:15+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/machine-learning-project-text-summarization.jpg\" \/>\n\t<meta property=\"og:image:width\" content=\"1200\" \/>\n\t<meta property=\"og:image:height\" content=\"628\" \/>\n\t<meta property=\"og:image:type\" content=\"image\/jpeg\" \/>\n<meta name=\"author\" content=\"DataFlair Team\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:creator\" content=\"@DataFlairWS\" \/>\n<meta name=\"twitter:site\" content=\"@DataFlairWS\" \/>\n<meta name=\"twitter:label1\" content=\"Written by\" \/>\n\t<meta name=\"twitter:data1\" content=\"DataFlair Team\" \/>\n\t<meta name=\"twitter:label2\" content=\"Est. reading time\" \/>\n\t<meta name=\"twitter:data2\" content=\"14 minutes\" \/>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"Text Summarization using Machine Learning - DataFlair","description":"Process of producing summary from huge set of information maintaining actual context is Text Summarization. Create your own text summarizer.","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/data-flair.training\/blogs\/machine-learning-text-summarization\/","og_locale":"en_US","og_type":"article","og_title":"Text Summarization using Machine Learning - DataFlair","og_description":"Process of producing summary from huge set of information maintaining actual context is Text Summarization. Create your own text summarizer.","og_url":"https:\/\/data-flair.training\/blogs\/machine-learning-text-summarization\/","og_site_name":"DataFlair","article_publisher":"https:\/\/www.facebook.com\/DataFlairWS\/","article_published_time":"2021-08-13T03:30:02+00:00","article_modified_time":"2025-07-28T10:34:15+00:00","og_image":[{"width":1200,"height":628,"url":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/machine-learning-project-text-summarization.jpg","type":"image\/jpeg"}],"author":"DataFlair Team","twitter_card":"summary_large_image","twitter_creator":"@DataFlairWS","twitter_site":"@DataFlairWS","twitter_misc":{"Written by":"DataFlair Team","Est. reading time":"14 minutes"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"Article","@id":"https:\/\/data-flair.training\/blogs\/machine-learning-text-summarization\/#article","isPartOf":{"@id":"https:\/\/data-flair.training\/blogs\/machine-learning-text-summarization\/"},"author":{"name":"DataFlair Team","@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/person\/7f83c342f5d1632d6f7b4b0b0f447823"},"headline":"Text Summarization using Machine Learning","datePublished":"2021-08-13T03:30:02+00:00","dateModified":"2025-07-28T10:34:15+00:00","mainEntityOfPage":{"@id":"https:\/\/data-flair.training\/blogs\/machine-learning-text-summarization\/"},"wordCount":2824,"commentCount":5,"publisher":{"@id":"https:\/\/data-flair.training\/blogs\/#organization"},"image":{"@id":"https:\/\/data-flair.training\/blogs\/machine-learning-text-summarization\/#primaryimage"},"thumbnailUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/machine-learning-project-text-summarization.jpg","keywords":["machine learning project","ML project","text summarization ml project","Text Summarizer","Text Summarizer project","Text Summarizer project code","Text Summarizer using deep learning"],"articleSection":["Machine Learning Tutorials"],"inLanguage":"en-US","potentialAction":[{"@type":"CommentAction","name":"Comment","target":["https:\/\/data-flair.training\/blogs\/machine-learning-text-summarization\/#respond"]}]},{"@type":"WebPage","@id":"https:\/\/data-flair.training\/blogs\/machine-learning-text-summarization\/","url":"https:\/\/data-flair.training\/blogs\/machine-learning-text-summarization\/","name":"Text Summarization using Machine Learning - DataFlair","isPartOf":{"@id":"https:\/\/data-flair.training\/blogs\/#website"},"primaryImageOfPage":{"@id":"https:\/\/data-flair.training\/blogs\/machine-learning-text-summarization\/#primaryimage"},"image":{"@id":"https:\/\/data-flair.training\/blogs\/machine-learning-text-summarization\/#primaryimage"},"thumbnailUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/machine-learning-project-text-summarization.jpg","datePublished":"2021-08-13T03:30:02+00:00","dateModified":"2025-07-28T10:34:15+00:00","description":"Process of producing summary from huge set of information maintaining actual context is Text Summarization. Create your own text summarizer.","breadcrumb":{"@id":"https:\/\/data-flair.training\/blogs\/machine-learning-text-summarization\/#breadcrumb"},"inLanguage":"en-US","potentialAction":[{"@type":"ReadAction","target":["https:\/\/data-flair.training\/blogs\/machine-learning-text-summarization\/"]}]},{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/data-flair.training\/blogs\/machine-learning-text-summarization\/#primaryimage","url":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/machine-learning-project-text-summarization.jpg","contentUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2021\/08\/machine-learning-project-text-summarization.jpg","width":1200,"height":628},{"@type":"BreadcrumbList","@id":"https:\/\/data-flair.training\/blogs\/machine-learning-text-summarization\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Blog Home","item":"https:\/\/data-flair.training\/blogs\/"},{"@type":"ListItem","position":2,"name":"Machine Learning Tutorials","item":"https:\/\/data-flair.training\/blogs\/category\/machine-learning\/"},{"@type":"ListItem","position":3,"name":"Text Summarization using Machine Learning"}]},{"@type":"WebSite","@id":"https:\/\/data-flair.training\/blogs\/#website","url":"https:\/\/data-flair.training\/blogs\/","name":"DataFlair","description":"Learn Today. Lead Tomorrow.","publisher":{"@id":"https:\/\/data-flair.training\/blogs\/#organization"},"potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/data-flair.training\/blogs\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"en-US"},{"@type":"Organization","@id":"https:\/\/data-flair.training\/blogs\/#organization","name":"DataFlair","url":"https:\/\/data-flair.training\/blogs\/","logo":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/logo\/image\/","url":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/Data-Flair.png","contentUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/Data-Flair.png","width":106,"height":48,"caption":"DataFlair"},"image":{"@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/logo\/image\/"},"sameAs":["https:\/\/www.facebook.com\/DataFlairWS\/","https:\/\/x.com\/DataFlairWS","https:\/\/www.linkedin.com\/company\/dataflair-web-services-pvt-ltd\/","https:\/\/www.youtube.com\/user\/DataFlairWS"]},{"@type":"Person","@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/person\/7f83c342f5d1632d6f7b4b0b0f447823","name":"DataFlair Team","image":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/secure.gravatar.com\/avatar\/4cf3a74600d131330b8c481d519afd1574093ed89f6d3396a95393ad223eb7cd?s=96&d=mm&r=g","url":"https:\/\/secure.gravatar.com\/avatar\/4cf3a74600d131330b8c481d519afd1574093ed89f6d3396a95393ad223eb7cd?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/4cf3a74600d131330b8c481d519afd1574093ed89f6d3396a95393ad223eb7cd?s=96&d=mm&r=g","caption":"DataFlair Team"},"description":"DataFlair Team creates expert-level guides on programming, Java, Python, C++, DSA, AI, ML, data Science, Android, Flutter, MERN, Web Development, and technology. Our goal is to empower learners with easy-to-understand content. Explore our resources for career growth and practical learning.","url":"https:\/\/data-flair.training\/blogs\/author\/dfteam1\/"}]}},"amp_enabled":false,"_links":{"self":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts\/99494","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/users\/5"}],"replies":[{"embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/comments?post=99494"}],"version-history":[{"count":5,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts\/99494\/revisions"}],"predecessor-version":[{"id":146284,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts\/99494\/revisions\/146284"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/media\/100310"}],"wp:attachment":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/media?parent=99494"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/categories?post=99494"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/tags?post=99494"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}