

{"id":74043,"date":"2019-12-28T14:11:58","date_gmt":"2019-12-28T08:41:58","guid":{"rendered":"https:\/\/data-flair.training\/blogs\/?p=74043"},"modified":"2021-06-21T12:19:11","modified_gmt":"2021-06-21T06:49:11","slug":"django-project-news-aggregator-app","status":"publish","type":"post","link":"https:\/\/data-flair.training\/blogs\/django-project-news-aggregator-app\/","title":{"rendered":"Python Django Project &#8211; Learn to Build your own News Aggregator Web App"},"content":{"rendered":"<div class='__iawmlf-post-loop-links' style='display:none;' data-iawmlf-post-links='[{&quot;id&quot;:1269,&quot;href&quot;:&quot;https:\\\/\\\/drive.google.com\\\/file\\\/d\\\/1xEXUfyHTALf4o64DO_h5IbLB4ZT3Y0Wl\\\/view&quot;,&quot;archived_href&quot;:&quot;http:\\\/\\\/web-wp.archive.org\\\/web\\\/20250412143532\\\/https:\\\/\\\/drive.google.com\\\/file\\\/d\\\/1xEXUfyHTALf4o64DO_h5IbLB4ZT3Y0Wl\\\/view&quot;,&quot;redirect_href&quot;:&quot;&quot;,&quot;checks&quot;:[{&quot;date&quot;:&quot;2025-12-09 04:25:53&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-12 06:50:09&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-16 05:23:49&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-19 14:43:21&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-24 04:42:33&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-27 08:44:12&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-30 09:50:33&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-02 13:11:11&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-05 15:07:11&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-09 01:45:34&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-13 02:36:14&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-16 09:03:31&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-21 12:43:37&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-25 02:32:27&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-28 13:35:01&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-31 14:29:50&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-04 07:28:55&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-08 10:20:40&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-11 16:19:57&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-15 16:27:31&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-19 11:35:10&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-22 21:27:57&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-27 02:37:42&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-02 15:31:59&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-06 03:50:39&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-09 19:00:31&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-13 09:07:55&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-16 14:37:02&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-19 20:04:01&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-23 04:25:21&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-26 05:29:25&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-29 15:58:35&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-02 03:34:47&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-05 15:05:33&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2026-04-09 17:21:13&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-15 08:24:48&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-19 17:45:32&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-22 19:38:47&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-27 12:55:25&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-30 13:28:13&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-04 11:40:28&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-08 09:57:55&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-13 06:11:27&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-16 06:58:44&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-26 01:25:26&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-29 04:19:59&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-06-01 04:38:19&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-06-04 14:36:44&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-06-07 15:54:08&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-06-11 01:47:28&quot;,&quot;http_code&quot;:200}],&quot;broken&quot;:false,&quot;last_checked&quot;:{&quot;date&quot;:&quot;2026-06-11 01:47:28&quot;,&quot;http_code&quot;:200},&quot;process&quot;:&quot;done&quot;},{&quot;id&quot;:1270,&quot;href&quot;:&quot;https:\\\/\\\/www.theonion.com&quot;,&quot;archived_href&quot;:&quot;http:\\\/\\\/web-wp.archive.org\\\/web\\\/20251209033551\\\/https:\\\/\\\/theonion.com\\\/&quot;,&quot;redirect_href&quot;:&quot;https:\\\/\\\/theonion.com\\\/&quot;,&quot;checks&quot;:[{&quot;date&quot;:&quot;2025-12-10 07:34:21&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-19 05:13:34&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-22 09:59:40&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-26 11:21:40&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-30 04:23:13&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-02 22:39:04&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-06 06:43:53&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-09 16:15:47&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-13 02:36:21&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-17 12:13:47&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-22 05:07:55&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-25 17:02:54&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-29 20:25:14&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-03 09:38:34&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-09 03:20:39&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-12 19:32:26&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-18 04:55:53&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-21 07:47:28&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-24 14:24:03&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-28 03:30:44&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-03 08:41:17&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-09 01:51:55&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-13 09:08:02&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-16 14:37:37&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-23 05:07:15&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-26 05:29:49&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-31 17:01:01&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-05 15:06:24&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2026-04-09 17:21:31&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-15 08:26:19&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-19 17:45:37&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-22 19:40:28&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-27 12:55:31&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-30 13:28:26&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-04 11:40:41&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-08 09:58:05&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-13 06:11:41&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-16 06:58:49&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-27 18:00:29&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-31 16:43:46&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-06-04 14:36:49&quot;,&quot;http_code&quot;:200}],&quot;broken&quot;:false,&quot;last_checked&quot;:{&quot;date&quot;:&quot;2026-06-04 14:36:49&quot;,&quot;http_code&quot;:200},&quot;process&quot;:&quot;done&quot;},{&quot;id&quot;:1246,&quot;href&quot;:&quot;https:\\\/\\\/www.djangoproject.com&quot;,&quot;archived_href&quot;:&quot;http:\\\/\\\/web-wp.archive.org\\\/web\\\/20251208010225\\\/https:\\\/\\\/www.djangoproject.com\\\/&quot;,&quot;redirect_href&quot;:&quot;&quot;,&quot;checks&quot;:[{&quot;date&quot;:&quot;2025-12-09 04:15:25&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-12 06:50:17&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-15 08:09:29&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-18 08:29:53&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-21 12:48:10&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-24 17:29:55&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-27 18:33:51&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-30 20:13:42&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-02 22:39:05&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-06 00:32:33&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-09 01:45:09&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-12 02:16:20&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-15 05:26:11&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-18 06:34:26&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-21 07:06:27&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-24 08:35:24&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-27 11:07:29&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-30 13:33:21&quot;,&quot;http_code&quot;:404},{&quot;date&quot;:&quot;2026-02-02 13:50:22&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-05 17:18:24&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-09 04:54:23&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-12 05:53:50&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-15 06:19:17&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-18 07:25:09&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-21 07:39:22&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-24 10:18:09&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-27 11:03:34&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-02 12:35:12&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-05 13:19:47&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-08 17:33:33&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-11 19:44:06&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-14 21:17:25&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-18 03:06:50&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-21 13:57:05&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-24 14:25:08&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-27 14:29:13&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-30 19:17:47&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-03 06:28:13&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-06 14:26:09&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-09 17:17:54&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-13 08:22:52&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-16 08:54:17&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-19 17:45:34&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-22 19:40:39&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-26 05:52:12&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-29 06:55:09&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-02 09:28:51&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-05 17:31:02&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-08 17:47:15&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-12 04:11:05&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-15 07:45:55&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-18 10:46:46&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-21 11:47:35&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-24 15:16:59&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-27 16:17:12&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-30 18:32:36&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-06-03 02:42:16&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-06-06 03:37:56&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-06-09 13:48:13&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-06-12 21:24:13&quot;,&quot;http_code&quot;:206}],&quot;broken&quot;:false,&quot;last_checked&quot;:{&quot;date&quot;:&quot;2026-06-12 21:24:13&quot;,&quot;http_code&quot;:206},&quot;process&quot;:&quot;done&quot;}]'><\/div>\n<p>After gaining knowledge from the Django tutorials, it&#8217;s time to implement and showcase that. In this Python django project, you will learn to build your own news aggregator web application by integrating Django with other technologies.<\/p>\n<p>Although, some prerequisite is important.<\/p>\n<h4>Prerequisite<\/h4>\n<p>You need to have some basic knowledge of these libraries:<\/p>\n<ul>\n<li>Django Framework<\/li>\n<li>BeautifulSoup<\/li>\n<li>requests module<\/li>\n<\/ul>\n<h4>What is a News Aggregator?<\/h4>\n<p>It is a web application which aggregates data (news articles) from multiple websites. Then presents the data in one location.<\/p>\n<p>News aggregator service is a very important start of the day.<\/p>\n<p>There are various publications and news sites online. They publish their content on multiple platforms. Now, imagine when you open 10-20 news sites every day. The time you waste to gain information. Information gain is everything in today\u2019s world.<\/p>\n<p>It can give you leverage over those who don\u2019t have it. Now, is there a way we can make it easier? Yes!!<\/p>\n<p>A news aggregator makes this task easier. In a news aggregator, you can select the websites you want to follow. Then the news aggregator collects the articles for you. And, you are just a click away to get information from various websites.<\/p>\n<p>This task otherwise takes too much time on our schedule.<\/p>\n<h4>About the Django Project<\/h4>\n<p>A news aggregator is a combination of web crawlers and web applications. Both of these technologies have their implementation in Python. That makes it easier for us.<\/p>\n<p>So, our news aggregator will work in 3 steps:<\/p>\n<ol>\n<li>It scrapes the web for the articles. (In this Django project, we are scraping a website called theonion)<\/li>\n<li>Then it stores the article&#8217;s images, links, and title.<\/li>\n<li>The stored objects in the database are served to the client. The client gets information in a nice template.<\/li>\n<\/ol>\n<p>So, that\u2019s how our web app will work.<\/p>\n<p>You can find the complete source code of this Django project in this Github repository:<\/p>\n<p><a href=\"https:\/\/drive.google.com\/file\/d\/1xEXUfyHTALf4o64DO_h5IbLB4ZT3Y0Wl\/view\"><strong>News Aggregator Files<\/strong><\/a><\/p>\n<p>This is a screenshot of the page.<\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news-aggregator-interface.png\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-74049\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news-aggregator-interface.png\" alt=\"news aggregator interface - django project\" width=\"1920\" height=\"1030\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news-aggregator-interface.png 1920w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news-aggregator-interface-150x80.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news-aggregator-interface-300x161.png 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news-aggregator-interface-768x412.png 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news-aggregator-interface-1024x549.png 1024w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news-aggregator-interface-520x279.png 520w\" sizes=\"auto, (max-width: 1920px) 100vw, 1920px\" \/><\/a><\/p>\n<p>This might not look very interesting. There are lots of things we will need to do before getting this page.<\/p>\n<p>Also, check out the page of <em>theonion<\/em> website before proceeding.<\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/theonion-website-page.png\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-74050\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/theonion-website-page.png\" alt=\"theonion website page - django project\" width=\"1920\" height=\"943\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/theonion-website-page.png 1920w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/theonion-website-page-150x74.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/theonion-website-page-300x147.png 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/theonion-website-page-768x377.png 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/theonion-website-page-1024x503.png 1024w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/theonion-website-page-520x255.png 520w\" sizes=\"auto, (max-width: 1920px) 100vw, 1920px\" \/><\/a><\/p>\n<p>So, let\u2019s get started.<\/p>\n<h4>Steps to Build Django Project on News Aggregator App<\/h4>\n<p>Before starting, we will need to install some of the libraries. We will install the requests and BeautifulSoup libraries. You can install them using pip.<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">pip install bs4\r\npip install requests<\/pre>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/libraries-installation.png\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-74051\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/libraries-installation.png\" alt=\"libraries installation - django project\" width=\"1920\" height=\"1030\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/libraries-installation.png 1920w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/libraries-installation-150x80.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/libraries-installation-300x161.png 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/libraries-installation-768x412.png 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/libraries-installation-1024x549.png 1024w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/libraries-installation-520x279.png 520w\" sizes=\"auto, (max-width: 1920px) 100vw, 1920px\" \/><\/a><\/p>\n<p>Now, we will make a new Python Django project named <strong>DataFlair_NewsAggregator<\/strong>. Then we will make new application <strong>news<\/strong>.<\/p>\n<p><strong>Commands:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">django-admin startproject DataFlair_NewsAggregator<\/pre>\n<p>Move into the folder where manage.py is present.<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">python manage.py startapp news<\/pre>\n<h4>Writing Models<\/h4>\n<p>We will be storing the urls and articles in our database. For that, we will need the model.<\/p>\n<p>In <strong>news\/models.py<\/strong>, create these models.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">from django.db import models\r\nclass Headline(models.Model):\r\n  title = models.CharField(max_length=200)\r\n  image = models.URLField(null=True, blank=True)\r\n  url = models.TextField()\r\n\r\n  def __str__(self):\r\n    return self.title\r\n<\/pre>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/class-headline.png\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-74052\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/class-headline.png\" alt=\"class headline - django project\" width=\"1920\" height=\"1030\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/class-headline.png 1920w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/class-headline-150x80.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/class-headline-300x161.png 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/class-headline-768x412.png 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/class-headline-1024x549.png 1024w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/class-headline-520x279.png 520w\" sizes=\"auto, (max-width: 1920px) 100vw, 1920px\" \/><\/a><\/p>\n<p>Our models will be able to store three things:<\/p>\n<ol>\n<li>Title of the article<\/li>\n<li>URL of the origin or source<\/li>\n<li>URL of the article image<\/li>\n<\/ol>\n<p>We are using simple model fields for that purpose. Also, the image field can be blank. The <strong>__str__()<\/strong> method will return the string representation of the object. These are simple Django concepts.<\/p>\n<p>Now, let&#8217;s start with the steps for web crawlers.<\/p>\n<h4>Step 1 \u2013 Scrape the website<\/h4>\n<p>We will be scraping the website for getting articles. Web-Scraping means extracting data from the websites. We extract meaningful data from the websites. In this case, we will be extracting the articles from the\u00a0<em>theonion<\/em> website.<\/p>\n<p>To scrape the website, we will use <strong>beautifulsoup<\/strong> and <strong>requests module<\/strong>. These libraries are the bs4 and requests and modules are used for web crawling.<\/p>\n<p>Open <strong>news\/views.py<\/strong> file.<\/p>\n<p>First, import these libraries before using them.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">import requests\r\nfrom django.shortcuts import render, redirect\r\nfrom bs4 import BeautifulSoup as BSoup\r\nfrom news.models import Headline<\/pre>\n<p>We will be making the first view function as <strong>scrape()<\/strong>.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">def scrape(request):\r\n  session = requests.Session()\r\n  session.headers = {\"User-Agent\": \"Googlebot\/2.1 (+http:\/\/www.google.com\/bot.html)\"}\r\n  url = \"https:\/\/www.theonion.com\/\"\r\n\r\n  content = session.get(url, verify=False).content\r\n  soup = BSoup(content, \"html.parser\")\r\n  News = soup.find_all('div', {\"class\":\"curation-module__item\"})\r\n  for artcile in News:\r\n    main = artcile.find_all('a')[0]\r\n    link = main['href']\r\n    image_src = str(main.find('img')['srcset']).split(\" \")[-4]\r\n    title = main['title']\r\n    new_headline = Headline()\r\n    new_headline.title = title\r\n    new_headline.url = link\r\n    new_headline.image = image_src\r\n    new_headline.save()\r\n  return redirect(\"..\/\")<\/pre>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/def-scrape.png\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-74053\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/def-scrape.png\" alt=\"def scrape - django project\" width=\"1920\" height=\"1030\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/def-scrape.png 1920w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/def-scrape-150x80.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/def-scrape-300x161.png 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/def-scrape-768x412.png 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/def-scrape-1024x549.png 1024w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/def-scrape-520x279.png 520w\" sizes=\"auto, (max-width: 1920px) 100vw, 1920px\" \/><\/a><\/p>\n<p>This view function uses modules like <strong>requests, bs4 and Django\u2019s shortcuts<\/strong>.<\/p>\n<p>We have imported the model <strong>Headline<\/strong> from <strong>news.models<\/strong>. Also, we have other libraries.<\/p>\n<p>The first line of the function is a setting for requests framework. These settings are necessary. They will prevent the errors to stop the execution of the program. Then we write our view function <strong>scrape()<\/strong>. The scrape() method will scrape the news articles from the URL <strong>\u201ctheonion.com\u201d<\/strong>.<\/p>\n<p>The first variable is the session object of the requests module. These are essential to make a connection to the server. This is the abstraction provided by requests framework.<\/p>\n<p>The session variables have headers as HTTP headers. These headers are used by our function to request the webpage. The scrapper acts like a normal http client to the news site. The <strong>User-Agent<\/strong> key is important here.<\/p>\n<p>This HTTP header will tell the server information about the client. We are using Google bot for that purpose. When our client requests anything on the server, the server sees our request coming as a Google bot. You can configure it look like a browser User-Agent.<\/p>\n<p>That won\u2019t affect our use-case though. After that, we introduce the content variable. We store the webpage or response given by the server in content. Now, the beautifulsoup comes in.<\/p>\n<p>The beautiful soup is a library that can extract data from HTML web pages. We create a soup object where we pass the HTML page. Alongside the HTML page, we also pass HTML parser as a parameter.<\/p>\n<p>The HTML parser will parse the HTML as a BeautifulSoup object. In this object, we can access HTML elements and their texts.<\/p>\n<p>In the <strong>News<\/strong> object, we return the <strong>&lt;div&gt;<\/strong> of a particular class. We selected this class from the webpage inspection. We inspected the webpage of the website <a href=\"https:\/\/www.theonion.com\/\">theonion<\/a>. Now, we select the elements which have the information we need.<\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/div-class.png\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-74054\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/div-class.png\" alt=\"div class - djnago project\" width=\"1920\" height=\"943\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/div-class.png 1920w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/div-class-150x74.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/div-class-300x147.png 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/div-class-768x377.png 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/div-class-1024x503.png 1024w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/div-class-520x255.png 520w\" sizes=\"auto, (max-width: 1920px) 100vw, 1920px\" \/><\/a><\/p>\n<p>As you can see from this image, by inspecting the element, we find a common class. The rest is just extracting information from that element.<\/p>\n<p>Now we get 3 elements of this class. That means that the three articles are present in this class. These articles have a very general structure. Now, we will extract the information which we need. In this case, we have to extract the <strong>title, link, <\/strong>and<strong> image link<\/strong>.<\/p>\n<p>Using a for loop, we can iterate over soup objects. In the for loop, the main variable will hold the link to the origin webpage. The main attribute gets the anchor tag. Since, the &lt;div&gt;s returned only have one &lt;a&gt;tag, we get most of our work done here.<\/p>\n<p>The &lt;a&gt; tag contains title and href of the original link. We can access the href in &lt;a&gt; tag by writing <strong>main[\u2018href\u2019]<\/strong>.<\/p>\n<p>Similarly, we can extract the title by <strong>main[\u2018title\u2019]<\/strong>. Remember the <strong>main<\/strong> is the &lt;a&gt; tag beautifulsoup object.<\/p>\n<p>Then we find the image URL. To get the <strong>image_src<\/strong>, we find the image in the main. This is all according to the webpage layout. We are not doing this because of syntax.<\/p>\n<p>These are how the website has made its webpage. We are simply finding the elements and accessing them appropriately. You need to have some basics clear of beautiful soup and HTML.<\/p>\n<p>So, once we get the image, we extract the srcset attribute from the same.<\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/img-srcset.png\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-74055\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/img-srcset.png\" alt=\"img srcset - python django project\" width=\"1920\" height=\"943\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/img-srcset.png 1920w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/img-srcset-150x74.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/img-srcset-300x147.png 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/img-srcset-768x377.png 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/img-srcset-1024x503.png 1024w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/img-srcset-520x255.png 520w\" sizes=\"auto, (max-width: 1920px) 100vw, 1920px\" \/><\/a><\/p>\n<p>The srcset attribute contains various sizes of images, as we can see in the image. There we have to extract the size of the image which is big enough for us. We select the one with 800 width.<\/p>\n<p>We get a string that has the source of the image and its width. And, we can travel over that list using Python indexing. As you can see in the code, we use the split() on the string to get a list. There we use index [-4]. That will give us the URL of 80 width image. That is stored as string in the image_src variable.<\/p>\n<h4>Step 2 \u2013 Store the data in the database<\/h4>\n<p>We have made our model Headline for this purpose. Now we will be performing the standard storing procedure. We create a new Headline() object. There we fill the corresponding fields.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">new_headline = Headline()\r\nnew_headline.title = title\r\nnew_headline.url = link\r\nnew_headline.image = image_src\r\nnew_headline.save()<\/pre>\n<p>This the standard code for storing in the database.<\/p>\n<h4>Step 3 \u2013 Serve the stored database objects<\/h4>\n<p>This step is very easy too. We create a new view function for this purpose. That is <strong>news_list()<\/strong> method. The code lies in the file<strong> news\/views.py<\/strong> file.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">def news_list(request):\r\n    headlines = Headline.objects.all()[::-1]\r\n    context = {\r\n        'object_list': headlines,\r\n    }\r\n    return render(request, \"news\/home.html\", context)<\/pre>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/def-news_list.png\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-74056\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/def-news_list.png\" alt=\"def news_list\" width=\"1920\" height=\"1030\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/def-news_list.png 1920w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/def-news_list-150x80.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/def-news_list-300x161.png 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/def-news_list-768x412.png 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/def-news_list-1024x549.png 1024w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/def-news_list-520x279.png 520w\" sizes=\"auto, (max-width: 1920px) 100vw, 1920px\" \/><\/a><\/p>\n<p>Here is a simple <a href=\"https:\/\/www.djangoproject.com\/\">Django<\/a> code. We simply extract all the elements from the database. Since we want the latest info on top, we reverse the list. Then we simply pass the list in a context. The context is then given to <strong>home.html<\/strong> in folder <strong>news\/template\/news<\/strong>.<\/p>\n<h4>Writing Templates<\/h4>\n<p>Here is the code for <strong>home.html<\/strong>. In this template, we are using bootstrap and HTML. The code in the home.html:<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">&lt;!DOCTYPE html&gt;\r\n&lt;html&gt;\r\n&lt;head&gt;\r\n    &lt;title&gt;&lt;\/title&gt;\r\n    &lt;link rel=\"stylesheet\" href=\"https:\/\/maxcdn.bootstrapcdn.com\/bootstrap\/4.0.0\/css\/bootstrap.min.css\" integrity=\"sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW\/dAiS6JXm\" crossorigin=\"anonymous\"&gt;\r\n&lt;\/head&gt;\r\n&lt;body&gt;\r\n    &lt;div class=\"jumbotron\"&gt;\r\n        &lt;center&gt;&lt;h1&gt;DataFlair News Aggregator&lt;\/h1&gt;\r\n            &lt;a href=\"{% url 'scrape' %}\" class=\"btn btn-success\"&gt;Get my morning news&lt;\/a&gt;\r\n        &lt;\/form&gt;\r\n    &lt;\/center&gt;\r\n    &lt;\/div&gt;\r\n  &lt;div class=\"card-columns\" style=\"padding: 10px; margin: 20px;\"&gt;\r\n    {% for object in object_list %}\r\n    &lt;div class=\"card\" style=\"width: 18rem;border:5px black solid;\"&gt;\r\n  &lt;img class=\"card-img-top\" src = \"{{ object.image }}\"&gt;\r\n  &lt;div class=\"card-body\"&gt;\r\n    &lt;h5 class=\"card-title\"&gt;&lt;div class=\"card-body\"&gt;\r\n      &lt;a href=\"{{object.url}}\"&gt;&lt;h5 class=\"card-title\"&gt;{{object.title}}&lt;\/h5&gt;&lt;\/a&gt;\r\n    &lt;\/div&gt;&lt;\/h5&gt;\r\n    &lt;\/div&gt;\r\n  &lt;\/div&gt;\r\n  {% endfor %}\r\n&lt;\/div&gt;\r\n&lt;\/div&gt;\r\n    &lt;script\r\nsrc=\"http:\/\/code.jquery.com\/jquery-3.3.1.min.js\"\r\nintegrity=\"sha256-FgpCb\/KJQlLNfOu91ta32o\/NMZxltwRo8QtmkMRdAu8=\"\r\n    crossorigin=\"anonymous\"&gt;&lt;\/script&gt;\r\n    &lt;script src=\"https:\/\/cdnjs.cloudflare.com\/ajax\/libs\/popper.js\/1.12.9\/umd\/popper.min.js\" integrity=\"sha384-ApNbgh9B+Y1QKtv3Rn7W3mgPxhU9K\/ScQsAP7hUibX39j7fakFPskvXusvfa0b4Q\" crossorigin=\"anonymous\"&gt;&lt;\/script&gt;\r\n    &lt;script src=\"https:\/\/maxcdn.bootstrapcdn.com\/bootstrap\/4.0.0\/js\/bootstrap.min.js\" integrity=\"sha384-JZR6Spejh4U02d8jOt6vLEHfe\/JQGiRRSQQxSfFWpi1MquVdAyjUar5+76PVCmYl\" crossorigin=\"anonymous\"&gt;&lt;\/script&gt;\r\n&lt;\/body&gt;\r\n&lt;\/html&gt;<\/pre>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/DOCTYPE-html.png\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-74057\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/DOCTYPE-html.png\" alt=\"DOCTYPE html - django project\" width=\"1920\" height=\"1030\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/DOCTYPE-html.png 1920w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/DOCTYPE-html-150x80.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/DOCTYPE-html-300x161.png 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/DOCTYPE-html-768x412.png 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/DOCTYPE-html-1024x549.png 1024w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/DOCTYPE-html-520x279.png 520w\" sizes=\"auto, (max-width: 1920px) 100vw, 1920px\" \/><\/a><\/p>\n<p>The basic knowledge of bootstrap and HTML can help here. It\u2019s a simple Django template.<\/p>\n<p>We have provided a link to the scrape view function. At line 10, the link to the scrape view function is provided. We will be defining our urls and then you will have a clearer picture.<\/p>\n<p>Then at line 15, our news logic is written. Here we print the news objects one by one. The for loop is used for that purpose.<\/p>\n<h4>Configuring urls.py<\/h4>\n<p>Last, we configure our urls.py file. Make a file <strong>news\/urls.py.<\/strong> Paste this code inside the urls.py.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">from django.urls import path\r\nfrom news.views import scrape, news_list\r\nurlpatterns = [\r\n  path('scrape\/', scrape, name=\"scrape\"),\r\n  path('', news_list, name=\"home\"),\r\n]\r\n\r\n<\/pre>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/from-django.urls_.png\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-74058\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/from-django.urls_.png\" alt=\"from django.urls\" width=\"1920\" height=\"1030\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/from-django.urls_.png 1920w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/from-django.urls_-150x80.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/from-django.urls_-300x161.png 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/from-django.urls_-768x412.png 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/from-django.urls_-1024x549.png 1024w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/from-django.urls_-520x279.png 520w\" sizes=\"auto, (max-width: 1920px) 100vw, 1920px\" \/><\/a><\/p>\n<p>Then we also need to connect this to main urls.py. Open <strong>DataFlair_NewsAggregator\/urls.py<\/strong> file and paste this code inside that or update it.<\/p>\n<p><strong>Code:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">from django.contrib import admin\r\nfrom django.urls import path, include\r\n\r\nurlpatterns = [\r\n    path('admin\/', admin.site.urls),\r\n    path('', include(\"news.urls\")),\r\n]<\/pre>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/from-django.contrib.png\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-74059\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/from-django.contrib.png\" alt=\"from django.contrib - intermediate django project\" width=\"1920\" height=\"1030\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/from-django.contrib.png 1920w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/from-django.contrib-150x80.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/from-django.contrib-300x161.png 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/from-django.contrib-768x412.png 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/from-django.contrib-1024x549.png 1024w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/from-django.contrib-520x279.png 520w\" sizes=\"auto, (max-width: 1920px) 100vw, 1920px\" \/><\/a><\/p>\n<p>This is the normal Django code to connect urls.<\/p>\n<p>So, our Python example project is complete. Let\u2019s run it and see the homepage. In this case, when we open server and run <strong>news_list<\/strong> view.<\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news_list.png\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-74060\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news_list.png\" alt=\"news_list - django project\" width=\"1904\" height=\"930\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news_list.png 1904w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news_list-150x73.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news_list-300x147.png 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news_list-768x375.png 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news_list-1024x500.png 1024w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news_list-520x254.png 520w\" sizes=\"auto, (max-width: 1904px) 100vw, 1904px\" \/><\/a><\/p>\n<p><strong>Output:<\/strong><\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news-aggregator-page.png\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-74061\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news-aggregator-page.png\" alt=\"news aggregator page\" width=\"1920\" height=\"943\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news-aggregator-page.png 1920w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news-aggregator-page-150x74.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news-aggregator-page-300x147.png 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news-aggregator-page-768x377.png 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news-aggregator-page-1024x503.png 1024w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/news-aggregator-page-520x255.png 520w\" sizes=\"auto, (max-width: 1920px) 100vw, 1920px\" \/><\/a><\/p>\n<p>You can click on the links. That will take you to the original article page.<\/p>\n<p>Now, you can configure this to gather your favorite article websites. Although, be wary of blocks. Many times, bots are not legally allowed to scrape content. So, web scraping comes at its own cost.<\/p>\n<p>But, for our purpose, we now know some very cool basics. We also have a very interesting project to showcase. You can enhance this Django application as much as you can.<\/p>\n<h4>Summary<\/h4>\n<p>We have successfully completed the first project in Django. We are using web scraping and Django. This integration is as easy as invoking a function in Python.<\/p>\n<p>You can make some more projects in Django using the same concepts. Django lets you integrate machine learning too.<\/p>\n<p>How was your experience working on the Django project? Do share in the comment section.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>After gaining knowledge from the Django tutorials, it&#8217;s time to implement and showcase that. In this Python django project, you will learn to build your own news aggregator web application by integrating Django with&#46;&#46;&#46;<\/p>\n","protected":false},"author":6,"featured_media":74070,"comment_status":"open","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[19149],"tags":[21614,21611,19201,21610,21609,21612,21613],"class_list":["post-74043","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-django","tag-django-example-project","tag-django-mini-project","tag-django-project","tag-django-projects-with-source-code","tag-news-aggregator-app","tag-project-in-django","tag-python-django-project"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v27.4 - https:\/\/yoast.com\/product\/yoast-seo-wordpress\/ -->\n<title>Python Django Project - Learn to Build your own News Aggregator Web App - DataFlair<\/title>\n<meta name=\"description\" content=\"Django Project on News Aggregator Web App - Learn to build a news aggregator application by using requests module &amp; BeautifulSoup libraries and following some easy steps.\" \/>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/data-flair.training\/blogs\/django-project-news-aggregator-app\/\" \/>\n<meta property=\"og:locale\" content=\"en_US\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"Python Django Project - Learn to Build your own News Aggregator Web App - DataFlair\" \/>\n<meta property=\"og:description\" content=\"Django Project on News Aggregator Web App - Learn to build a news aggregator application by using requests module &amp; BeautifulSoup libraries and following some easy steps.\" \/>\n<meta property=\"og:url\" content=\"https:\/\/data-flair.training\/blogs\/django-project-news-aggregator-app\/\" \/>\n<meta property=\"og:site_name\" content=\"DataFlair\" \/>\n<meta property=\"article:publisher\" content=\"https:\/\/www.facebook.com\/DataFlairWS\/\" \/>\n<meta property=\"article:published_time\" content=\"2019-12-28T08:41:58+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2021-06-21T06:49:11+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/Django-project-news-aggregator-web-app.jpg\" \/>\n\t<meta property=\"og:image:width\" content=\"802\" \/>\n\t<meta property=\"og:image:height\" content=\"420\" \/>\n\t<meta property=\"og:image:type\" content=\"image\/jpeg\" \/>\n<meta name=\"author\" content=\"DataFlair Team\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:creator\" content=\"@DataFlairWS\" \/>\n<meta name=\"twitter:site\" content=\"@DataFlairWS\" \/>\n<meta name=\"twitter:label1\" content=\"Written by\" \/>\n\t<meta name=\"twitter:data1\" content=\"DataFlair Team\" \/>\n\t<meta name=\"twitter:label2\" content=\"Est. reading time\" \/>\n\t<meta name=\"twitter:data2\" content=\"12 minutes\" \/>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"Python Django Project - Learn to Build your own News Aggregator Web App - DataFlair","description":"Django Project on News Aggregator Web App - Learn to build a news aggregator application by using requests module & BeautifulSoup libraries and following some easy steps.","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/data-flair.training\/blogs\/django-project-news-aggregator-app\/","og_locale":"en_US","og_type":"article","og_title":"Python Django Project - Learn to Build your own News Aggregator Web App - DataFlair","og_description":"Django Project on News Aggregator Web App - Learn to build a news aggregator application by using requests module & BeautifulSoup libraries and following some easy steps.","og_url":"https:\/\/data-flair.training\/blogs\/django-project-news-aggregator-app\/","og_site_name":"DataFlair","article_publisher":"https:\/\/www.facebook.com\/DataFlairWS\/","article_published_time":"2019-12-28T08:41:58+00:00","article_modified_time":"2021-06-21T06:49:11+00:00","og_image":[{"width":802,"height":420,"url":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/Django-project-news-aggregator-web-app.jpg","type":"image\/jpeg"}],"author":"DataFlair Team","twitter_card":"summary_large_image","twitter_creator":"@DataFlairWS","twitter_site":"@DataFlairWS","twitter_misc":{"Written by":"DataFlair Team","Est. reading time":"12 minutes"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"Article","@id":"https:\/\/data-flair.training\/blogs\/django-project-news-aggregator-app\/#article","isPartOf":{"@id":"https:\/\/data-flair.training\/blogs\/django-project-news-aggregator-app\/"},"author":{"name":"DataFlair Team","@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/person\/2c58ecb4f73a39f0ef993f1ddfcd7b89"},"headline":"Python Django Project &#8211; Learn to Build your own News Aggregator Web App","datePublished":"2019-12-28T08:41:58+00:00","dateModified":"2021-06-21T06:49:11+00:00","mainEntityOfPage":{"@id":"https:\/\/data-flair.training\/blogs\/django-project-news-aggregator-app\/"},"wordCount":1758,"commentCount":31,"publisher":{"@id":"https:\/\/data-flair.training\/blogs\/#organization"},"image":{"@id":"https:\/\/data-flair.training\/blogs\/django-project-news-aggregator-app\/#primaryimage"},"thumbnailUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/Django-project-news-aggregator-web-app.jpg","keywords":["django example project","django mini project","Django Project","django projects with source code","news aggregator app","project in django","python django project"],"articleSection":["Django Tutorials"],"inLanguage":"en-US","potentialAction":[{"@type":"CommentAction","name":"Comment","target":["https:\/\/data-flair.training\/blogs\/django-project-news-aggregator-app\/#respond"]}]},{"@type":"WebPage","@id":"https:\/\/data-flair.training\/blogs\/django-project-news-aggregator-app\/","url":"https:\/\/data-flair.training\/blogs\/django-project-news-aggregator-app\/","name":"Python Django Project - Learn to Build your own News Aggregator Web App - DataFlair","isPartOf":{"@id":"https:\/\/data-flair.training\/blogs\/#website"},"primaryImageOfPage":{"@id":"https:\/\/data-flair.training\/blogs\/django-project-news-aggregator-app\/#primaryimage"},"image":{"@id":"https:\/\/data-flair.training\/blogs\/django-project-news-aggregator-app\/#primaryimage"},"thumbnailUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/Django-project-news-aggregator-web-app.jpg","datePublished":"2019-12-28T08:41:58+00:00","dateModified":"2021-06-21T06:49:11+00:00","description":"Django Project on News Aggregator Web App - Learn to build a news aggregator application by using requests module & BeautifulSoup libraries and following some easy steps.","breadcrumb":{"@id":"https:\/\/data-flair.training\/blogs\/django-project-news-aggregator-app\/#breadcrumb"},"inLanguage":"en-US","potentialAction":[{"@type":"ReadAction","target":["https:\/\/data-flair.training\/blogs\/django-project-news-aggregator-app\/"]}]},{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/data-flair.training\/blogs\/django-project-news-aggregator-app\/#primaryimage","url":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/Django-project-news-aggregator-web-app.jpg","contentUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/12\/Django-project-news-aggregator-web-app.jpg","width":802,"height":420,"caption":"Django project on news aggregator web app"},{"@type":"BreadcrumbList","@id":"https:\/\/data-flair.training\/blogs\/django-project-news-aggregator-app\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Blog Home","item":"https:\/\/data-flair.training\/blogs\/"},{"@type":"ListItem","position":2,"name":"Django Tutorials","item":"https:\/\/data-flair.training\/blogs\/category\/django\/"},{"@type":"ListItem","position":3,"name":"Python Django Project &#8211; Learn to Build your own News Aggregator Web App"}]},{"@type":"WebSite","@id":"https:\/\/data-flair.training\/blogs\/#website","url":"https:\/\/data-flair.training\/blogs\/","name":"DataFlair","description":"Learn Today. Lead Tomorrow.","publisher":{"@id":"https:\/\/data-flair.training\/blogs\/#organization"},"potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/data-flair.training\/blogs\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"en-US"},{"@type":"Organization","@id":"https:\/\/data-flair.training\/blogs\/#organization","name":"DataFlair","url":"https:\/\/data-flair.training\/blogs\/","logo":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/logo\/image\/","url":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/Data-Flair.png","contentUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/Data-Flair.png","width":106,"height":48,"caption":"DataFlair"},"image":{"@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/logo\/image\/"},"sameAs":["https:\/\/www.facebook.com\/DataFlairWS\/","https:\/\/x.com\/DataFlairWS","https:\/\/www.linkedin.com\/company\/dataflair-web-services-pvt-ltd\/","https:\/\/www.youtube.com\/user\/DataFlairWS"]},{"@type":"Person","@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/person\/2c58ecb4f73a39f0ef993f1ddfcd7b89","name":"DataFlair Team","image":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/secure.gravatar.com\/avatar\/1ce4a0e3e542444fc73bbebf83e89e8b73e2d95ccb1fcee64da9945f078b97c5?s=96&d=mm&r=g","url":"https:\/\/secure.gravatar.com\/avatar\/1ce4a0e3e542444fc73bbebf83e89e8b73e2d95ccb1fcee64da9945f078b97c5?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/1ce4a0e3e542444fc73bbebf83e89e8b73e2d95ccb1fcee64da9945f078b97c5?s=96&d=mm&r=g","caption":"DataFlair Team"},"description":"The DataFlair Team provides industry-driven content on programming, Java, Python, C++, DSA, AI, ML, data Science, Android, Flutter, MERN, Web Development, and technology. Our expert educators focus on delivering value-packed, easy-to-follow resources for tech enthusiasts and professionals.","url":"https:\/\/data-flair.training\/blogs\/author\/dfteam2\/"}]}},"amp_enabled":true,"_links":{"self":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts\/74043","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/users\/6"}],"replies":[{"embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/comments?post=74043"}],"version-history":[{"count":12,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts\/74043\/revisions"}],"predecessor-version":[{"id":97536,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts\/74043\/revisions\/97536"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/media\/74070"}],"wp:attachment":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/media?parent=74043"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/categories?post=74043"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/tags?post=74043"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}