

{"id":69927,"date":"2019-09-18T12:52:31","date_gmt":"2019-09-18T07:22:31","guid":{"rendered":"https:\/\/data-flair.training\/blogs\/?p=69927"},"modified":"2025-07-29T18:37:56","modified_gmt":"2025-07-29T13:07:56","slug":"project-in-python-breast-cancer-classification","status":"publish","type":"post","link":"https:\/\/data-flair.training\/blogs\/project-in-python-breast-cancer-classification\/","title":{"rendered":"Project in Python &#8211; Breast Cancer Classification with Deep Learning"},"content":{"rendered":"<div class='__iawmlf-post-loop-links' style='display:none;' data-iawmlf-post-links='[{&quot;id&quot;:1301,&quot;href&quot;:&quot;https:\\\/\\\/www.kaggle.com\\\/paultimothymooney\\\/breast-histopathology-images&quot;,&quot;archived_href&quot;:&quot;http:\\\/\\\/web-wp.archive.org\\\/web\\\/20230530182805\\\/https:\\\/\\\/www.kaggle.com\\\/paultimothymooney\\\/breast-histopathology-images&quot;,&quot;redirect_href&quot;:&quot;&quot;,&quot;checks&quot;:[{&quot;date&quot;:&quot;2025-12-09 04:47:04&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-12 06:42:35&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-15 07:50:48&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-18 08:26:25&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-21 08:49:28&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-24 09:16:58&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-27 10:23:17&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-30 14:14:31&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-02 14:18:40&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-05 14:32:06&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-08 14:52:23&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-11 15:54:27&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-14 23:08:41&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-18 06:31:06&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-21 10:31:36&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-24 12:58:01&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-27 14:13:37&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-30 14:23:03&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-02 15:39:34&quot;,&quot;http_code&quot;:404},{&quot;date&quot;:&quot;2026-02-05 18:59:55&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-09 05:30:22&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-12 07:39:00&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-15 11:06:02&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-18 11:54:56&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-21 12:47:42&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-24 14:17:58&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-27 14:52:31&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-02 15:05:16&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-05 17:40:28&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-08 17:43:53&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-12 03:35:44&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-15 07:36:46&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-18 09:27:31&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-21 11:20:30&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-24 11:21:51&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-27 11:45:23&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-31 04:49:47&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-03 09:17:17&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-06 10:59:51&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2026-04-09 13:27:23&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-12 16:59:40&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-15 21:18:39&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-18 22:25:02&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-22 04:06:44&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-25 10:48:58&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-28 10:51:42&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-01 15:50:53&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-04 16:07:20&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-07 17:49:45&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-10 22:48:12&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-14 01:10:36&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-17 04:54:46&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-20 09:30:52&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-23 16:00:24&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-26 16:05:54&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-29 17:19:14&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-06-01 19:49:17&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-06-04 20:24:14&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-06-08 11:00:25&quot;,&quot;http_code&quot;:200}],&quot;broken&quot;:false,&quot;last_checked&quot;:{&quot;date&quot;:&quot;2026-06-08 11:00:25&quot;,&quot;http_code&quot;:200},&quot;process&quot;:&quot;done&quot;},{&quot;id&quot;:1399,&quot;href&quot;:&quot;https:\\\/\\\/drive.google.com\\\/open?id=1nEkiRNIdYUSi0Eyci19KceLJjObGB25m&quot;,&quot;archived_href&quot;:&quot;&quot;,&quot;redirect_href&quot;:&quot;https:\\\/\\\/drive.google.com\\\/file\\\/d\\\/1nEkiRNIdYUSi0Eyci19KceLJjObGB25m\\\/view?usp=drive_open&quot;,&quot;checks&quot;:[],&quot;broken&quot;:false,&quot;last_checked&quot;:null,&quot;process&quot;:&quot;done&quot;},{&quot;id&quot;:196,&quot;href&quot;:&quot;https:\\\/\\\/data-flair.training\\\/python-course&quot;,&quot;archived_href&quot;:&quot;http:\\\/\\\/web-wp.archive.org\\\/web\\\/20221202094432\\\/https:\\\/\\\/data-flair.training\\\/python-course\\\/&quot;,&quot;redirect_href&quot;:&quot;&quot;,&quot;checks&quot;:[{&quot;date&quot;:&quot;2025-12-07 05:24:18&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2025-12-10 05:34:57&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2025-12-13 06:12:26&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2025-12-16 06:21:42&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2025-12-19 06:48:48&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2025-12-22 07:16:54&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2025-12-25 08:05:05&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2025-12-28 08:49:49&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2025-12-31 09:46:45&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-01-03 10:12:54&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-01-06 10:42:33&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-01-09 10:57:01&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-01-12 11:54:23&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-01-15 12:14:26&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-01-18 13:17:10&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2026-01-21 13:52:15&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-01-24 14:11:40&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-01-27 14:14:11&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-01-30 14:16:41&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-02-02 14:52:55&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-02-05 15:01:05&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-02-08 16:06:39&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-02-11 16:11:33&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-02-14 16:19:45&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2026-02-17 16:28:30&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-02-20 17:54:17&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-02-23 20:19:55&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2026-02-26 20:21:47&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-03-01 22:14:32&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-03-04 23:17:20&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2026-03-07 23:32:19&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-03-10 23:52:41&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-03-14 02:20:25&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-03-17 03:02:13&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-03-20 06:41:26&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-03-23 07:49:18&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-03-26 08:11:47&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2026-03-29 09:42:35&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2026-04-01 09:56:38&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2026-04-04 11:37:14&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2026-04-07 11:40:09&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2026-04-10 13:29:08&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2026-04-13 14:18:19&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-04-16 15:12:43&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2026-04-19 15:52:15&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-04-22 15:59:16&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-04-25 17:16:29&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2026-04-28 17:26:04&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2026-05-01 17:59:04&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-05-04 18:22:12&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-05-07 18:32:35&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2026-05-10 19:03:53&quot;,&quot;http_code&quot;:404},{&quot;date&quot;:&quot;2026-05-13 19:27:05&quot;,&quot;http_code&quot;:404},{&quot;date&quot;:&quot;2026-05-16 20:34:56&quot;,&quot;http_code&quot;:404},{&quot;date&quot;:&quot;2026-05-19 22:34:22&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2026-05-22 23:36:38&quot;,&quot;http_code&quot;:403},{&quot;date&quot;:&quot;2026-05-26 01:26:46&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2026-05-29 03:26:33&quot;,&quot;http_code&quot;:404},{&quot;date&quot;:&quot;2026-06-01 04:50:36&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2026-06-04 05:22:51&quot;,&quot;http_code&quot;:404},{&quot;date&quot;:&quot;2026-06-07 05:24:39&quot;,&quot;http_code&quot;:403}],&quot;broken&quot;:true,&quot;last_checked&quot;:{&quot;date&quot;:&quot;2026-06-07 05:24:39&quot;,&quot;http_code&quot;:403},&quot;process&quot;:&quot;done&quot;}]'><\/div>\n<p>If you want to master Python programming language then you can&#8217;t skip projects in Python. After publishing 4 advanced python projects, DataFlair today came with another one that is the Breast Cancer Classification project in Python. To crack your next Python Interview, practice these projects thoroughly and if you face any confusion, do comment, DataFlair is always ready to help you.<\/p>\n<p>Before we begin this Breast Cancer Classification Project in Python, let me provide you the list of advanced python projects published by DataFlair:<\/p>\n<ol>\n<li><a href=\"https:\/\/data-flair.training\/blogs\/advanced-python-project-detecting-fake-news\/\">Fake News Detection Python Project\u00a0<\/a><\/li>\n<li><a href=\"https:\/\/data-flair.training\/blogs\/python-machine-learning-project-detecting-parkinson-disease\/\">Parkinson\u2019s Disease Detection Python Project\u00a0<\/a><\/li>\n<li><a href=\"https:\/\/data-flair.training\/blogs\/project-in-python-colour-detection\/\">Color Detection Python Project<\/a><\/li>\n<li><a href=\"https:\/\/data-flair.training\/blogs\/python-mini-project-speech-emotion-recognition\/\">Speech Emotion Recognition Python Project\u00a0<\/a><\/li>\n<li>Breast Cancer Classification Python Project<\/li>\n<li><a href=\"https:\/\/data-flair.training\/blogs\/python-project-gender-age-detection\/\">Age and Gender Detection Python Project\u00a0<\/a><\/li>\n<li><a href=\"https:\/\/data-flair.training\/blogs\/python-deep-learning-project-handwritten-digit-recognition\/\">Handwritten Digit Recognition Python Project<\/a><\/li>\n<li><a href=\"https:\/\/data-flair.training\/blogs\/python-chatbot-project\/\">Chatbot Python Project<\/a><\/li>\n<li><a href=\"https:\/\/data-flair.training\/blogs\/python-project-driver-drowsiness-detection-system\/\">Driver Drowsiness Detection Python Project<\/a><\/li>\n<li><a href=\"https:\/\/data-flair.training\/blogs\/python-project-traffic-signs-recognition\/\">Traffic Signs Recognition Python Project<\/a><\/li>\n<li><a href=\"https:\/\/data-flair.training\/blogs\/python-based-project-image-caption-generator-cnn\/\">Image Caption Generator Python Project<\/a><\/li>\n<\/ol>\n<h3>Breast Cancer Classification Project in Python<\/h3>\n<p>Get aware with the terms used in Breast Cancer Classification project in Python<\/p>\n<h4>What is Deep Learning?<\/h4>\n<p>An intensive approach to <a href=\"https:\/\/data-flair.training\/blogs\/machine-learning-tutorials-home\/\"><em><strong>Machine Learning<\/strong><\/em><\/a>, Deep Learning is inspired by the workings of the human brain and its biological neural networks. Architectures as deep neural networks, recurrent neural networks, convolutional neural networks, and deep belief networks are made of multiple layers for the data to pass through before finally producing the output. Deep Learning serves to improve AI and make many of its applications possible; it is applied to many such fields of computer vision, speech recognition, natural language processing, audio recognition, and drug design.<\/p>\n<h4>What is Keras?<\/h4>\n<p>Keras is an open-source neural-network library written in Python. It is a high-level API and can run on top of TensorFlow, CNTK, and Theano. Keras is all about enabling fast experimentation and prototyping while running seamlessly on CPU and GPU. It is user-friendly, modular, and extensible.<\/p>\n<h4>Breast Cancer Classification &#8211; Objective<\/h4>\n<p>To build a breast cancer classifier on an IDC dataset that can accurately classify a histology image as benign or malignant.<\/p>\n<h4>Breast Cancer Classification &#8211; About the Python Project<\/h4>\n<p>In this project in python, we\u2019ll build a classifier to train on 80% of a breast cancer histology image dataset. Of this, we\u2019ll keep 10% of the data for validation. Using Keras, we\u2019ll define a <a href=\"https:\/\/data-flair.training\/blogs\/convolutional-neural-networks-tutorial\/\"><em><strong>CNN (Convolutional Neural Network)<\/strong><\/em><\/a>, call it CancerNet, and train it on our images. We\u2019ll then derive a confusion matrix to analyze the performance of the model.<\/p>\n<p>IDC is Invasive Ductal Carcinoma; cancer that develops in a milk duct and invades the fibrous or fatty breast tissue outside the duct; it is the most common form of breast cancer forming 80% of all breast cancer diagnoses. And histology is the study of the microscopic structure of tissues.<\/p>\n<h4>The Dataset<\/h4>\n<p>We&#8217;ll use the IDC_regular dataset (the breast cancer histology image dataset) from Kaggle. This dataset holds 2,77,524 patches of size 50&#215;50 extracted from 162 whole mount slide images of breast cancer specimens scanned at 40x. Of these, 1,98,738 test negative and 78,786 test positive with IDC. The dataset is available in public domain and you can <a href=\"https:\/\/www.kaggle.com\/paultimothymooney\/breast-histopathology-images\/\" target=\"_blank\" rel=\"noopener noreferrer\"><strong>download it here<\/strong><\/a>. You\u2019ll need a minimum of 3.02GB of disk space for this.<\/p>\n<p>Filenames in this dataset look like this:<\/p>\n<p style=\"text-align: center\">8863_idx5_x451_y1451_class0<\/p>\n<p>Here, 8863_idx5 is the patient ID, 451 and 1451 are the x- and y- coordinates of the crop, and 0 is the class label (0 denotes absence of IDC).<\/p>\n<h4>Prerequisites<\/h4>\n<p>You\u2019ll need to install some <a href=\"https:\/\/data-flair.training\/blogs\/python-packages\/\"><em><strong>python packages<\/strong><\/em><\/a> to be able to run this advanced python project. You can do this with pip-<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">pip install numpy opencv-python pillow tensorflow keras imutils scikit-learn matplotlib<\/pre>\n<h3>Steps for Advanced Project in Python &#8211; Breast Cancer Classification<\/h3>\n<p>1. <a href=\"https:\/\/drive.google.com\/open?id=1nEkiRNIdYUSi0Eyci19KceLJjObGB25m\"><em><strong>Download this zip<\/strong><\/em><\/a>. Unzip it at your preferred location, get there.<\/p>\n<p><strong>Screenshot:<\/strong><\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/breast-cancer-detection-python-project.jpg\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-69941\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/breast-cancer-detection-python-project.jpg\" alt=\"breast cancer detection python project\" width=\"977\" height=\"171\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/breast-cancer-detection-python-project.jpg 977w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/breast-cancer-detection-python-project-150x26.jpg 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/breast-cancer-detection-python-project-300x53.jpg 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/breast-cancer-detection-python-project-768x134.jpg 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/breast-cancer-detection-python-project-520x91.jpg 520w\" sizes=\"auto, (max-width: 977px) 100vw, 977px\" \/><\/a><\/p>\n<p>2. Now, inside the inner breast-cancer-classification directory, create directory datasets- inside this, create directory original:<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">mkdir datasets\r\nmkdir datasets\\original<\/pre>\n<p>3. Download the dataset.<\/p>\n<p>4. Unzip the dataset in the original directory. To observe the structure of this directory, we\u2019ll use the tree command:<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">cd breast-cancer-classification\\breast-cancer-classification\\datasets\\original\r\ntree<\/pre>\n<p><strong>Output Screenshot:<\/strong><\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/original-structure-project-in-python.jpg\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-69942\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/original-structure-project-in-python.jpg\" alt=\"original structure project in python\" width=\"981\" height=\"480\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/original-structure-project-in-python.jpg 981w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/original-structure-project-in-python-150x73.jpg 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/original-structure-project-in-python-300x147.jpg 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/original-structure-project-in-python-768x376.jpg 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/original-structure-project-in-python-520x254.jpg 520w\" sizes=\"auto, (max-width: 981px) 100vw, 981px\" \/><\/a><\/p>\n<p>We have a directory for each patient ID. And in each such directory, we have the 0 and 1 directories for images with benign and malignant content.<\/p>\n<h4>config.py:<\/h4>\n<p>This holds some configuration we\u2019ll need for building the dataset and training the model. You\u2019ll find this in the cancernet directory.<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">import os\r\n\r\nINPUT_DATASET = \"datasets\/original\"\r\n\r\nBASE_PATH = \"datasets\/idc\"\r\nTRAIN_PATH = os.path.sep.join([BASE_PATH, \"training\"])\r\nVAL_PATH = os.path.sep.join([BASE_PATH, \"validation\"])\r\nTEST_PATH = os.path.sep.join([BASE_PATH, \"testing\"])\r\n\r\nTRAIN_SPLIT = 0.8\r\nVAL_SPLIT = 0.1<\/pre>\n<p><strong>Screenshot:<\/strong><\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/config-project-in-python.jpg\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-69943\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/config-project-in-python.jpg\" alt=\"breast cancer classification - python project\" width=\"673\" height=\"393\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/config-project-in-python.jpg 673w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/config-project-in-python-150x88.jpg 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/config-project-in-python-300x175.jpg 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/config-project-in-python-520x304.jpg 520w\" sizes=\"auto, (max-width: 673px) 100vw, 673px\" \/><\/a><\/p>\n<p>Here, we declare the path to the input dataset (datasets\/original), that for the new directory (datasets\/idc), and the paths for the training, validation, and testing directories using the base path. We also declare that 80% of the entire dataset will be used for training, and of that, 10% will be used for validation.<\/p>\n<h4>build_dataset.py:<\/h4>\n<p>This will split our dataset into training, validation, and testing sets in the ratio mentioned above- 80% for training (of that, 10% for validation) and 20% for testing. With the ImageDataGenerator from Keras, we will extract batches of images to avoid making space for the entire dataset in memory at once.<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">from cancernet import config\r\nfrom imutils import paths\r\nimport random, shutil, os\r\n\r\noriginalPaths=list(paths.list_images(config.INPUT_DATASET))\r\nrandom.seed(7)\r\nrandom.shuffle(originalPaths)\r\n\r\nindex=int(len(originalPaths)*config.TRAIN_SPLIT)\r\ntrainPaths=originalPaths[:index]\r\ntestPaths=originalPaths[index:]\r\n\r\nindex=int(len(trainPaths)*config.VAL_SPLIT)\r\nvalPaths=trainPaths[:index]\r\ntrainPaths=trainPaths[index:]\r\n\r\ndatasets=[(\"training\", trainPaths, config.TRAIN_PATH),\r\n          (\"validation\", valPaths, config.VAL_PATH),\r\n          (\"testing\", testPaths, config.TEST_PATH)\r\n]\r\n\r\nfor (setType, originalPaths, basePath) in datasets:\r\n        print(f'Building {setType} set')\r\n\r\n        if not os.path.exists(basePath):\r\n                print(f'Building directory {base_path}')\r\n                os.makedirs(basePath)\r\n\r\n        for path in originalPaths:\r\n                file=path.split(os.path.sep)[-1]\r\n                label=file[-5:-4]\r\n\r\n                labelPath=os.path.sep.join([basePath,label])\r\n                if not os.path.exists(labelPath):\r\n                        print(f'Building directory {labelPath}')\r\n                        os.makedirs(labelPath)\r\n\r\n                newPath=os.path.sep.join([labelPath, file])\r\n                shutil.copy2(inputPath, newPath)\r\n\r\n<\/pre>\n<p><strong>Screenshot:<\/strong><\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/build-dataset-python-project.jpg\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-69938\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/build-dataset-python-project.jpg\" alt=\"python machine learning project\" width=\"668\" height=\"720\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/build-dataset-python-project.jpg 668w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/build-dataset-python-project-139x150.jpg 139w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/build-dataset-python-project-278x300.jpg 278w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/build-dataset-python-project-520x560.jpg 520w\" sizes=\"auto, (max-width: 668px) 100vw, 668px\" \/><\/a><\/p>\n<p>In this, we&#8217;ll import from config, imutils, random, shutil, and os. We&#8217;ll build a list of original paths to the images, then shuffle the list. Then, we calculate an index by multiplying the length of this list by 0.8 so we can slice this list to get sublists for the training and testing datasets. Next, we further calculate an index saving 10% of the list for the training dataset for validation and keeping the rest for training itself.<\/p>\n<p>Now, datasets is a list with <em><strong><a href=\"https:\/\/data-flair.training\/blogs\/python-tuple\/\">tuples<\/a><\/strong><\/em> for information about the training, validation, and testing sets. These hold the paths and the base path for each. For each setType, path, and base path in this list, we&#8217;ll print, say, &#8216;Building testing set&#8217;. If the base path does not exist, we&#8217;ll create the directory. And for each path in originalPaths, we&#8217;ll extract the filename and the class label. We&#8217;ll build the path to the label directory(0 or 1)- if it doesn&#8217;t exist yet, we&#8217;ll explicitly create this directory. Now, we&#8217;ll build the path to the resulting image and copy the image here- where it belongs.<\/p>\n<p>5. Run the script build_dataset.py:<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">py build_dataset.py<\/pre>\n<p><strong>Output Screenshot:<\/strong><br \/>\n<a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/project-in-python-build-dataset.jpg\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-69945\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/project-in-python-build-dataset.jpg\" alt=\"build dataset\" width=\"976\" height=\"242\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/project-in-python-build-dataset.jpg 976w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/project-in-python-build-dataset-150x37.jpg 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/project-in-python-build-dataset-300x74.jpg 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/project-in-python-build-dataset-768x190.jpg 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/project-in-python-build-dataset-520x129.jpg 520w\" sizes=\"auto, (max-width: 976px) 100vw, 976px\" \/><\/a><\/p>\n<h4>cancernet.py:<\/h4>\n<p>The network we\u2019ll build will be a CNN (Convolutional Neural Network) and call it CancerNet. This network performs the following operations:<\/p>\n<ul>\n<li>Use 3&#215;3 CONV filters<\/li>\n<li>Stack these filters on top of each other<\/li>\n<li>Perform max-pooling<\/li>\n<li>Use depthwise separable convolution (more efficient, takes up less memory)<\/li>\n<\/ul>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">from keras.models import Sequential\r\nfrom keras.layers.normalization import BatchNormalization\r\nfrom keras.layers.convolutional import SeparableConv2D\r\nfrom keras.layers.convolutional import MaxPooling2D\r\nfrom keras.layers.core import Activation\r\nfrom keras.layers.core import Flatten\r\nfrom keras.layers.core import Dropout\r\nfrom keras.layers.core import Dense\r\nfrom keras import backend as K\r\n\r\nclass CancerNet:\r\n  @staticmethod\r\n  def build(width,height,depth,classes):\r\n    model=Sequential()\r\n    shape=(height,width,depth)\r\n    channelDim=-1\r\n\r\n    if K.image_data_format()==\"channels_first\":\r\n      shape=(depth,height,width)\r\n      channelDim=1\r\n\r\n    model.add(SeparableConv2D(32, (3,3), padding=\"same\",input_shape=shape))\r\n    model.add(Activation(\"relu\"))\r\n    model.add(BatchNormalization(axis=channelDim))\r\n    model.add(MaxPooling2D(pool_size=(2,2)))\r\n    model.add(Dropout(0.25))\r\n\r\n    model.add(SeparableConv2D(64, (3,3), padding=\"same\"))\r\n    model.add(Activation(\"relu\"))\r\n    model.add(BatchNormalization(axis=channelDim))\r\n    model.add(SeparableConv2D(64, (3,3), padding=\"same\"))\r\n    model.add(Activation(\"relu\"))\r\n    model.add(BatchNormalization(axis=channelDim))\r\n    model.add(MaxPooling2D(pool_size=(2,2)))\r\n    model.add(Dropout(0.25))\r\n\r\n    model.add(SeparableConv2D(128, (3,3), padding=\"same\"))\r\n    model.add(Activation(\"relu\"))\r\n    model.add(BatchNormalization(axis=channelDim))\r\n    model.add(SeparableConv2D(128, (3,3), padding=\"same\"))\r\n    model.add(Activation(\"relu\"))\r\n    model.add(BatchNormalization(axis=channelDim))\r\n    model.add(SeparableConv2D(128, (3,3), padding=\"same\"))\r\n    model.add(Activation(\"relu\"))\r\n    model.add(BatchNormalization(axis=channelDim))\r\n    model.add(MaxPooling2D(pool_size=(2,2)))\r\n    model.add(Dropout(0.25))\r\n\r\n    model.add(Flatten())\r\n    model.add(Dense(256))\r\n    model.add(Activation(\"relu\"))\r\n    model.add(BatchNormalization())\r\n    model.add(Dropout(0.5))\r\n\r\n    model.add(Dense(classes))\r\n    model.add(Activation(\"softmax\"))\r\n\r\n    return model<\/pre>\n<p><strong>Screenshot:<\/strong><\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/cancernet-CNN-python-mini-project.jpg\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-69946\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/cancernet-CNN-python-mini-project.jpg\" alt=\"cancernet CNN python mini project\" width=\"822\" height=\"706\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/cancernet-CNN-python-mini-project.jpg 822w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/cancernet-CNN-python-mini-project-150x129.jpg 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/cancernet-CNN-python-mini-project-300x258.jpg 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/cancernet-CNN-python-mini-project-768x660.jpg 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/cancernet-CNN-python-mini-project-520x447.jpg 520w\" sizes=\"auto, (max-width: 822px) 100vw, 822px\" \/><\/a><\/p>\n<p><strong>Screenshot:<\/strong><\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/cancernet-CNN-interesting-python-project.jpg\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-69947\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/cancernet-CNN-interesting-python-project.jpg\" alt=\"cancernet CNN interesting python project\" width=\"825\" height=\"310\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/cancernet-CNN-interesting-python-project.jpg 825w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/cancernet-CNN-interesting-python-project-150x56.jpg 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/cancernet-CNN-interesting-python-project-300x113.jpg 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/cancernet-CNN-interesting-python-project-768x289.jpg 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/cancernet-CNN-interesting-python-project-520x195.jpg 520w\" sizes=\"auto, (max-width: 825px) 100vw, 825px\" \/><\/a><\/p>\n<p>We use the Sequential API to build CancerNet and SeparableConv2D to implement depthwise convolutions. The class CancerNet has a static method build that takes four parameters- width and height of the image, its depth (the number of color channels in each image), and the number of classes the network will predict between, which, for us, is 2 (0 and 1).<\/p>\n<p>In this method, we initialize model and shape. When using channels_first, we update the shape and the channel dimension.<\/p>\n<p>Now, we&#8217;ll define three DEPTHWISE_CONV =&gt; RELU =&gt; POOL layers; each with a higher stacking and a greater number of filters. The softmax classifier outputs prediction percentages for each class. In the end, we return the model.<\/p>\n<h4>train_model.py:<\/h4>\n<p>This trains and evaluates our model. Here, we\u2019ll import from keras, sklearn, cancernet, config, imutils, matplotlib, numpy, and os.<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"null\">import matplotlib\r\nmatplotlib.use(\"Agg\")\r\n\r\nfrom keras.preprocessing.image import ImageDataGenerator\r\nfrom keras.callbacks import LearningRateScheduler\r\nfrom keras.optimizers import Adagrad\r\nfrom keras.utils import np_utils\r\nfrom sklearn.metrics import classification_report\r\nfrom sklearn.metrics import confusion_matrix\r\nfrom cancernet.cancernet import CancerNet\r\nfrom cancernet import config\r\nfrom imutils import paths\r\nimport matplotlib.pyplot as plt\r\nimport numpy as np\r\nimport os\r\n\r\nNUM_EPOCHS=40; INIT_LR=1e-2; BS=32\r\n\r\ntrainPaths=list(paths.list_images(config.TRAIN_PATH))\r\nlenTrain=len(trainPaths)\r\nlenVal=len(list(paths.list_images(config.VAL_PATH)))\r\nlenTest=len(list(paths.list_images(config.TEST_PATH)))\r\n\r\ntrainLabels=[int(p.split(os.path.sep)[-2]) for p in trainPaths]\r\ntrainLabels=np_utils.to_categorical(trainLabels)\r\nclassTotals=trainLabels.sum(axis=0)\r\nclassWeight=classTotals.max()\/classTotals\r\n\r\ntrainAug = ImageDataGenerator(\r\n  rescale=1\/255.0,\r\n  rotation_range=20,\r\n  zoom_range=0.05,\r\n  width_shift_range=0.1,\r\n  height_shift_range=0.1,\r\n  shear_range=0.05,\r\n  horizontal_flip=True,\r\n  vertical_flip=True,\r\n  fill_mode=\"nearest\")\r\n\r\nvalAug=ImageDataGenerator(rescale=1 \/ 255.0)\r\n\r\ntrainGen = trainAug.flow_from_directory(\r\n  config.TRAIN_PATH,\r\n  class_mode=\"categorical\",\r\n  target_size=(48,48),\r\n  color_mode=\"rgb\",\r\n  shuffle=True,\r\n  batch_size=BS)\r\nvalGen = valAug.flow_from_directory(\r\n  config.VAL_PATH,\r\n  class_mode=\"categorical\",\r\n  target_size=(48,48),\r\n  color_mode=\"rgb\",\r\n  shuffle=False,\r\n  batch_size=BS)\r\ntestGen = valAug.flow_from_directory(\r\n  config.TEST_PATH,\r\n  class_mode=\"categorical\",\r\n  target_size=(48,48),\r\n  color_mode=\"rgb\",\r\n  shuffle=False,\r\n  batch_size=BS)\r\n\r\nmodel=CancerNet.build(width=48,height=48,depth=3,classes=2)\r\nopt=Adagrad(lr=INIT_LR,decay=INIT_LR\/NUM_EPOCHS)\r\nmodel.compile(loss=\"binary_crossentropy\",optimizer=opt,metrics=[\"accuracy\"])\r\n\r\n\r\nM=model.fit_generator(\r\n  trainGen,\r\n  steps_per_epoch=lenTrain\/\/BS,\r\n  validation_data=valGen,\r\n  validation_steps=lenVal\/\/BS,\r\n  class_weight=classWeight,\r\n  epochs=NUM_EPOCHS)\r\n\r\nprint(\"Now evaluating the model\")\r\ntestGen.reset()\r\npred_indices=model.predict_generator(testGen,steps=(lenTest\/\/BS)+1)\r\n\r\npred_indices=np.argmax(pred_indices,axis=1)\r\n\r\nprint(classification_report(testGen.classes, pred_indices, target_names=testGen.class_indices.keys()))\r\n\r\ncm=confusion_matrix(testGen.classes,pred_indices)\r\ntotal=sum(sum(cm))\r\naccuracy=(cm[0,0]+cm[1,1])\/total\r\nspecificity=cm[1,1]\/(cm[1,0]+cm[1,1])\r\nsensitivity=cm[0,0]\/(cm[0,0]+cm[0,1])\r\nprint(cm)\r\nprint(f'Accuracy: {accuracy}')\r\nprint(f'Specificity: {specificity}')\r\nprint(f'Sensitivity: {sensitivity}')\r\n\r\nN = NUM_EPOCHS\r\nplt.style.use(\"ggplot\")\r\nplt.figure()\r\nplt.plot(np.arange(0,N), M.history[\"loss\"], label=\"train_loss\")\r\nplt.plot(np.arange(0,N), M.history[\"val_loss\"], label=\"val_loss\")\r\nplt.plot(np.arange(0,N), M.history[\"acc\"], label=\"train_acc\")\r\nplt.plot(np.arange(0,N), M.history[\"val_acc\"], label=\"val_acc\")\r\nplt.title(\"Training Loss and Accuracy on the IDC Dataset\")\r\nplt.xlabel(\"Epoch No.\")\r\nplt.ylabel(\"Loss\/Accuracy\")\r\nplt.legend(loc=\"lower left\")\r\nplt.savefig('plot.png')<\/pre>\n<p><strong>Screenshot:<\/strong><\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-model-python-open-source-project.jpg\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-69948\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-model-python-open-source-project.jpg\" alt=\"python open source projects\" width=\"894\" height=\"694\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-model-python-open-source-project.jpg 894w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-model-python-open-source-project-150x116.jpg 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-model-python-open-source-project-300x233.jpg 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-model-python-open-source-project-768x596.jpg 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-model-python-open-source-project-520x404.jpg 520w\" sizes=\"auto, (max-width: 894px) 100vw, 894px\" \/><\/a><\/p>\n<p><strong>Screenshot:<\/strong><\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-model-intermediate-python-project.jpg\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-69949\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-model-intermediate-python-project.jpg\" alt=\"intermediate python projects\" width=\"913\" height=\"649\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-model-intermediate-python-project.jpg 913w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-model-intermediate-python-project-150x107.jpg 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-model-intermediate-python-project-300x213.jpg 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-model-intermediate-python-project-768x546.jpg 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-model-intermediate-python-project-520x370.jpg 520w\" sizes=\"auto, (max-width: 913px) 100vw, 913px\" \/><\/a><\/p>\n<p><strong>Screenshot:<\/strong><\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-model-python-project-examples.jpg\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-69950\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-model-python-project-examples.jpg\" alt=\"python project example\" width=\"917\" height=\"675\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-model-python-project-examples.jpg 917w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-model-python-project-examples-150x110.jpg 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-model-python-project-examples-300x221.jpg 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-model-python-project-examples-768x565.jpg 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-model-python-project-examples-520x383.jpg 520w\" sizes=\"auto, (max-width: 917px) 100vw, 917px\" \/><\/a><\/p>\n<p>In this script, first, we set initial values for the number of epochs, the learning rate, and the batch size. We&#8217;ll get the number of paths in the three directories for training, validation, and testing. Then, we&#8217;ll get the class weight for the training data so we can deal with the imbalance.<\/p>\n<p>Now, we initialize the training data augmentation object. This is a process of regularization that helps generalize the model. This is where we slightly modify the training examples to avoid the need for more training data. We&#8217;ll initialize the validation and testing data augmentation objects.<\/p>\n<p>We&#8217;ll initialize the training, validation, and testing generators so they can generate batches of images of size batch_size. Then, we&#8217;ll initialize the model using the Adagrad optimizer and compile it with a binary_crossentropy loss function. Now, to fit the model, we make a call to fit_generator().<\/p>\n<p>We have successfully trained our model. Now, let&#8217;s evaluate the model on our testing data. We&#8217;ll reset the generator and make predictions on the data. Then, for images from the testing set, we get the indices of the labels with the corresponding largest predicted probability. And we&#8217;ll display a classification report.<\/p>\n<p>Now, we&#8217;ll compute the confusion matrix and get the raw accuracy, specificity, and sensitivity, and display all values. Finally, we&#8217;ll plot the training loss and accuracy.<\/p>\n<p><strong>Output Screenshot:<\/strong><\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-1-learning-python-project.jpg\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-69951\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-1-learning-python-project.jpg\" alt=\"learning python projects\" width=\"979\" height=\"487\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-1-learning-python-project.jpg 979w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-1-learning-python-project-150x75.jpg 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-1-learning-python-project-300x149.jpg 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-1-learning-python-project-768x382.jpg 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-1-learning-python-project-520x259.jpg 520w\" sizes=\"auto, (max-width: 979px) 100vw, 979px\" \/><\/a><\/p>\n<p><strong>Output Screenshot:<\/strong><\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-advanced-python-projects.jpg\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-69952\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-advanced-python-projects.jpg\" alt=\"advanced python projects\" width=\"978\" height=\"490\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-advanced-python-projects.jpg 978w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-advanced-python-projects-150x75.jpg 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-advanced-python-projects-300x150.jpg 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-advanced-python-projects-768x385.jpg 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/train-advanced-python-projects-520x261.jpg 520w\" sizes=\"auto, (max-width: 978px) 100vw, 978px\" \/><\/a><\/p>\n<p><strong>Output:<\/strong><\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/plot-project-in-python.png\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter size-full wp-image-69953\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/plot-project-in-python.png\" alt=\"Python project with source code\" width=\"640\" height=\"480\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/plot-project-in-python.png 640w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/plot-project-in-python-150x113.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/plot-project-in-python-300x225.png 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/plot-project-in-python-520x390.png 520w\" sizes=\"auto, (max-width: 640px) 100vw, 640px\" \/><\/a><\/p>\n<h3>Summary<\/h3>\n<p>Breast cancer is one of the most common cancers in women. Detecting it early can save lives. Using machine learning in Python, we can build a classification model to predict if a tumor is malignant (harmful) or benign (safe). This project uses a well-known dataset from the UCI Machine Learning Repository. It includes important features like the size, shape, and texture of cell nuclei from breast mass samples.<\/p>\n<p>In this project in python, we learned to build a breast cancer classifier on the IDC dataset (with histology images for Invasive Ductal Carcinoma) and created the network CancerNet for the same. We used Keras to implement the same. Hope you enjoyed this Python project.<\/p>\n<p>This project is widely used in the healthcare domain to support doctors and improve decision-making. It\u2019s beginner-friendly, yet teaches powerful ML concepts such as classification, data visualization, and model evaluation. By completing this project, you\u2019ll learn to apply machine learning to real-world medical problems and make an impact using data and Python.<\/p>\n<p class=\"df-text-bold df-text-red\" style=\"text-align: center\">Want to become a Data Scientist?<\/p>\n<p class=\"df-text-bold\" style=\"text-align: center\">Start learning Python in detail with DataFlair <a href=\"https:\/\/data-flair.training\/python-course\/\">Python Online Training<\/a> and achieve success.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>If you want to master Python programming language then you can&#8217;t skip projects in Python. After publishing 4 advanced python projects, DataFlair today came with another one that is the Breast Cancer Classification project&#46;&#46;&#46;<\/p>\n","protected":false},"author":6,"featured_media":69965,"comment_status":"open","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[36],"tags":[21092,21091,21067,21066,21065,21073],"class_list":["post-69927","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-machine-learning","tag-intermediate-python-projects","tag-projects-in-python","tag-python-data-science-projects","tag-python-machine-learning-projects","tag-python-mini-projects","tag-python-projects"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v27.4 - https:\/\/yoast.com\/product\/yoast-seo-wordpress\/ -->\n<title>Project in Python - Breast Cancer Classification with Deep Learning - DataFlair<\/title>\n<meta name=\"description\" content=\"Breast cancer classification project in python will help you to revise the concepts of ML, data science, AI and Python. Become the next Python developer.\" \/>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/data-flair.training\/blogs\/project-in-python-breast-cancer-classification\/\" \/>\n<meta property=\"og:locale\" content=\"en_US\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"Project in Python - Breast Cancer Classification with Deep Learning - DataFlair\" \/>\n<meta property=\"og:description\" content=\"Breast cancer classification project in python will help you to revise the concepts of ML, data science, AI and Python. Become the next Python developer.\" \/>\n<meta property=\"og:url\" content=\"https:\/\/data-flair.training\/blogs\/project-in-python-breast-cancer-classification\/\" \/>\n<meta property=\"og:site_name\" content=\"DataFlair\" \/>\n<meta property=\"article:publisher\" content=\"https:\/\/www.facebook.com\/DataFlairWS\/\" \/>\n<meta property=\"article:published_time\" content=\"2019-09-18T07:22:31+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2025-07-29T13:07:56+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/project-in-python-breast-cancer-classification-.jpg\" \/>\n\t<meta property=\"og:image:width\" content=\"802\" \/>\n\t<meta property=\"og:image:height\" content=\"420\" \/>\n\t<meta property=\"og:image:type\" content=\"image\/jpeg\" \/>\n<meta name=\"author\" content=\"DataFlair Team\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:creator\" content=\"@DataFlairWS\" \/>\n<meta name=\"twitter:site\" content=\"@DataFlairWS\" \/>\n<meta name=\"twitter:label1\" content=\"Written by\" \/>\n\t<meta name=\"twitter:data1\" content=\"DataFlair Team\" \/>\n\t<meta name=\"twitter:label2\" content=\"Est. reading time\" \/>\n\t<meta name=\"twitter:data2\" content=\"10 minutes\" \/>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"Project in Python - Breast Cancer Classification with Deep Learning - DataFlair","description":"Breast cancer classification project in python will help you to revise the concepts of ML, data science, AI and Python. Become the next Python developer.","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/data-flair.training\/blogs\/project-in-python-breast-cancer-classification\/","og_locale":"en_US","og_type":"article","og_title":"Project in Python - Breast Cancer Classification with Deep Learning - DataFlair","og_description":"Breast cancer classification project in python will help you to revise the concepts of ML, data science, AI and Python. Become the next Python developer.","og_url":"https:\/\/data-flair.training\/blogs\/project-in-python-breast-cancer-classification\/","og_site_name":"DataFlair","article_publisher":"https:\/\/www.facebook.com\/DataFlairWS\/","article_published_time":"2019-09-18T07:22:31+00:00","article_modified_time":"2025-07-29T13:07:56+00:00","og_image":[{"width":802,"height":420,"url":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/project-in-python-breast-cancer-classification-.jpg","type":"image\/jpeg"}],"author":"DataFlair Team","twitter_card":"summary_large_image","twitter_creator":"@DataFlairWS","twitter_site":"@DataFlairWS","twitter_misc":{"Written by":"DataFlair Team","Est. reading time":"10 minutes"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"Article","@id":"https:\/\/data-flair.training\/blogs\/project-in-python-breast-cancer-classification\/#article","isPartOf":{"@id":"https:\/\/data-flair.training\/blogs\/project-in-python-breast-cancer-classification\/"},"author":{"name":"DataFlair Team","@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/person\/2c58ecb4f73a39f0ef993f1ddfcd7b89"},"headline":"Project in Python &#8211; Breast Cancer Classification with Deep Learning","datePublished":"2019-09-18T07:22:31+00:00","dateModified":"2025-07-29T13:07:56+00:00","mainEntityOfPage":{"@id":"https:\/\/data-flair.training\/blogs\/project-in-python-breast-cancer-classification\/"},"wordCount":1615,"commentCount":68,"publisher":{"@id":"https:\/\/data-flair.training\/blogs\/#organization"},"image":{"@id":"https:\/\/data-flair.training\/blogs\/project-in-python-breast-cancer-classification\/#primaryimage"},"thumbnailUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/project-in-python-breast-cancer-classification-.jpg","keywords":["intermediate python projects","Projects in python","Python data science projects","python machine learning projects","python mini projects","Python Projects"],"articleSection":["Machine Learning Tutorials"],"inLanguage":"en-US","potentialAction":[{"@type":"CommentAction","name":"Comment","target":["https:\/\/data-flair.training\/blogs\/project-in-python-breast-cancer-classification\/#respond"]}]},{"@type":"WebPage","@id":"https:\/\/data-flair.training\/blogs\/project-in-python-breast-cancer-classification\/","url":"https:\/\/data-flair.training\/blogs\/project-in-python-breast-cancer-classification\/","name":"Project in Python - Breast Cancer Classification with Deep Learning - DataFlair","isPartOf":{"@id":"https:\/\/data-flair.training\/blogs\/#website"},"primaryImageOfPage":{"@id":"https:\/\/data-flair.training\/blogs\/project-in-python-breast-cancer-classification\/#primaryimage"},"image":{"@id":"https:\/\/data-flair.training\/blogs\/project-in-python-breast-cancer-classification\/#primaryimage"},"thumbnailUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/project-in-python-breast-cancer-classification-.jpg","datePublished":"2019-09-18T07:22:31+00:00","dateModified":"2025-07-29T13:07:56+00:00","description":"Breast cancer classification project in python will help you to revise the concepts of ML, data science, AI and Python. Become the next Python developer.","breadcrumb":{"@id":"https:\/\/data-flair.training\/blogs\/project-in-python-breast-cancer-classification\/#breadcrumb"},"inLanguage":"en-US","potentialAction":[{"@type":"ReadAction","target":["https:\/\/data-flair.training\/blogs\/project-in-python-breast-cancer-classification\/"]}]},{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/data-flair.training\/blogs\/project-in-python-breast-cancer-classification\/#primaryimage","url":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/project-in-python-breast-cancer-classification-.jpg","contentUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2019\/09\/project-in-python-breast-cancer-classification-.jpg","width":802,"height":420,"caption":"project in python - breast cancer classification"},{"@type":"BreadcrumbList","@id":"https:\/\/data-flair.training\/blogs\/project-in-python-breast-cancer-classification\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Blog Home","item":"https:\/\/data-flair.training\/blogs\/"},{"@type":"ListItem","position":2,"name":"Machine Learning Tutorials","item":"https:\/\/data-flair.training\/blogs\/category\/machine-learning\/"},{"@type":"ListItem","position":3,"name":"Project in Python &#8211; Breast Cancer Classification with Deep Learning"}]},{"@type":"WebSite","@id":"https:\/\/data-flair.training\/blogs\/#website","url":"https:\/\/data-flair.training\/blogs\/","name":"DataFlair","description":"Learn Today. Lead Tomorrow.","publisher":{"@id":"https:\/\/data-flair.training\/blogs\/#organization"},"potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/data-flair.training\/blogs\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"en-US"},{"@type":"Organization","@id":"https:\/\/data-flair.training\/blogs\/#organization","name":"DataFlair","url":"https:\/\/data-flair.training\/blogs\/","logo":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/logo\/image\/","url":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/Data-Flair.png","contentUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/Data-Flair.png","width":106,"height":48,"caption":"DataFlair"},"image":{"@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/logo\/image\/"},"sameAs":["https:\/\/www.facebook.com\/DataFlairWS\/","https:\/\/x.com\/DataFlairWS","https:\/\/www.linkedin.com\/company\/dataflair-web-services-pvt-ltd\/","https:\/\/www.youtube.com\/user\/DataFlairWS"]},{"@type":"Person","@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/person\/2c58ecb4f73a39f0ef993f1ddfcd7b89","name":"DataFlair Team","image":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/secure.gravatar.com\/avatar\/1ce4a0e3e542444fc73bbebf83e89e8b73e2d95ccb1fcee64da9945f078b97c5?s=96&d=mm&r=g","url":"https:\/\/secure.gravatar.com\/avatar\/1ce4a0e3e542444fc73bbebf83e89e8b73e2d95ccb1fcee64da9945f078b97c5?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/1ce4a0e3e542444fc73bbebf83e89e8b73e2d95ccb1fcee64da9945f078b97c5?s=96&d=mm&r=g","caption":"DataFlair Team"},"description":"The DataFlair Team provides industry-driven content on programming, Java, Python, C++, DSA, AI, ML, data Science, Android, Flutter, MERN, Web Development, and technology. Our expert educators focus on delivering value-packed, easy-to-follow resources for tech enthusiasts and professionals.","url":"https:\/\/data-flair.training\/blogs\/author\/dfteam2\/"}]}},"amp_enabled":true,"_links":{"self":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts\/69927","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/users\/6"}],"replies":[{"embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/comments?post=69927"}],"version-history":[{"count":23,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts\/69927\/revisions"}],"predecessor-version":[{"id":146324,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts\/69927\/revisions\/146324"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/media\/69965"}],"wp:attachment":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/media?parent=69927"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/categories?post=69927"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/tags?post=69927"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}