

{"id":21384,"date":"2018-07-17T04:00:08","date_gmt":"2018-07-17T04:00:08","guid":{"rendered":"https:\/\/data-flair.training\/blogs\/?p=21384"},"modified":"2026-04-25T14:44:25","modified_gmt":"2026-04-25T09:14:25","slug":"python-data-cleansing","status":"publish","type":"post","link":"https:\/\/data-flair.training\/blogs\/python-data-cleansing\/","title":{"rendered":"Python Data Cleansing by Pandas &amp; Numpy | Python Data Operations"},"content":{"rendered":"<div class='__iawmlf-post-loop-links' style='display:none;' data-iawmlf-post-links='[{&quot;id&quot;:1540,&quot;href&quot;:&quot;https:\\\/\\\/pandas.pydata.org&quot;,&quot;archived_href&quot;:&quot;http:\\\/\\\/web-wp.archive.org\\\/web\\\/20251011085401\\\/https:\\\/\\\/pandas.pydata.org\\\/&quot;,&quot;redirect_href&quot;:&quot;&quot;,&quot;checks&quot;:[{&quot;date&quot;:&quot;2025-12-09 10:37:21&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-14 09:06:34&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-17 13:11:54&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-22 09:38:20&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-26 15:42:58&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-31 04:38:39&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-03 08:59:31&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-12 11:59:57&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-16 05:20:35&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-19 06:17:51&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-25 05:47:20&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-28 21:00:54&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-01 16:46:42&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-05 08:01:25&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-08 20:28:29&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-12 06:32:33&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-15 18:25:52&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-19 10:19:48&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-22 14:14:48&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-26 04:22:39&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-01 14:08:43&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-08 08:27:46&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-11 10:12:26&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-15 07:27:01&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-20 02:49:08&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-24 09:19:58&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-27 12:26:52&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-31 01:25:42&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-03 14:06:54&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-07 00:07:55&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-11 11:43:57&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-15 18:53:40&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-20 10:50:54&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-24 09:51:42&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-28 11:57:30&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-03 06:29:37&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-06 13:23:17&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-11 05:43:59&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-16 04:55:23&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-19 18:47:29&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-23 07:25:52&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-26 11:38:14&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-06-01 05:03:03&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-06-04 10:26:54&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-06-09 10:06:47&quot;,&quot;http_code&quot;:206}],&quot;broken&quot;:false,&quot;last_checked&quot;:{&quot;date&quot;:&quot;2026-06-09 10:06:47&quot;,&quot;http_code&quot;:206},&quot;process&quot;:&quot;done&quot;},{&quot;id&quot;:1538,&quot;href&quot;:&quot;http:\\\/\\\/www.numpy.org&quot;,&quot;archived_href&quot;:&quot;http:\\\/\\\/web-wp.archive.org\\\/web\\\/20251209101347\\\/https:\\\/\\\/numpy.org\\\/&quot;,&quot;redirect_href&quot;:&quot;&quot;,&quot;checks&quot;:[{&quot;date&quot;:&quot;2025-12-09 10:37:15&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-18 03:36:32&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-24 16:07:09&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-03 14:06:20&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-13 10:32:47&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-19 06:17:51&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-25 05:47:23&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-04 01:34:21&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-12 06:32:33&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-16 16:33:07&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-24 15:54:28&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-03 15:15:51&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-15 07:27:02&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-29 08:40:05&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-02 02:56:03&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-06 11:24:04&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-11 11:43:57&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-20 13:40:21&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-24 09:51:43&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-30 10:04:42&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-06 13:23:18&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-16 10:42:57&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-22 14:06:09&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-06-01 22:59:12&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-06-11 22:01:38&quot;,&quot;http_code&quot;:206}],&quot;broken&quot;:false,&quot;last_checked&quot;:{&quot;date&quot;:&quot;2026-06-11 22:01:38&quot;,&quot;http_code&quot;:206},&quot;process&quot;:&quot;done&quot;},{&quot;id&quot;:1879,&quot;href&quot;:&quot;https:\\\/\\\/en.wikipedia.org\\\/wiki\\\/Python_(programming_language)&quot;,&quot;archived_href&quot;:&quot;http:\\\/\\\/web-wp.archive.org\\\/web\\\/20251203015133\\\/https:\\\/\\\/en.wikipedia.org\\\/wiki\\\/Python_%28programming_language%29&quot;,&quot;redirect_href&quot;:&quot;&quot;,&quot;checks&quot;:[{&quot;date&quot;:&quot;2025-12-10 06:26:40&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-18 03:36:47&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2025-12-27 15:18:05&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-03 14:06:37&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-13 10:33:04&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-01-19 06:17:57&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-05 09:47:53&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-02-16 16:33:18&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-03 15:31:34&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-03-15 07:27:19&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-13 12:46:54&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-20 13:40:26&quot;,&quot;http_code&quot;:429},{&quot;date&quot;:&quot;2026-04-24 09:53:13&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-30 10:06:01&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-06 13:23:26&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-16 10:43:44&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-22 14:10:42&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-06-01 22:59:23&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-06-11 22:01:37&quot;,&quot;http_code&quot;:404}],&quot;broken&quot;:false,&quot;last_checked&quot;:{&quot;date&quot;:&quot;2026-06-11 22:01:37&quot;,&quot;http_code&quot;:404},&quot;process&quot;:&quot;done&quot;}]'><\/div>\n<p>In our last Python tutorial, we studied\u00a0<strong><a href=\"https:\/\/data-flair.training\/blogs\/data-wrangling-with-python\/\">Aggregation and Data Wrangling with Python<\/a><\/strong>. Today, we will discuss the Python Data Cleansing tutorial, which aims to deliver a brief introduction to the operations of data cleansing and how to clean your data in <a href=\"https:\/\/data-flair.training\/blogs\/python-tutorial-for-beginners\/\"><strong>Python Programming<\/strong><\/a>. For this purpose, we will use two libraries- pandas and numpy. Moreover, we will discuss different ways to cleanse the missing data.<\/p>\n<p>So, let&#8217;s start the\u00a0Python Data Cleansing.<\/p>\n<h3>Python Data Cleansing &#8211; Prerequisites<\/h3>\n<p>As mentioned earlier, we will need two <strong><a href=\"https:\/\/data-flair.training\/blogs\/python-library\/\">libraries for Python<\/a><\/strong> Data Cleansing: Python pandas and Python numpy.<\/p>\n<p><strong>The important steps to keep in mind while performing data cleansing:<\/strong><\/p>\n<ul>\n<li><strong>Remove unwanted data:<\/strong> Things like duplicate data, irrelevant entries, or data that is no longer needed should be removed.<\/li>\n<li><strong>Fix errors:<\/strong> Standardized tools should be used to format data and variable types to maintain consistency.<\/li>\n<li><strong>Manage results:<\/strong> The values must be checked, and the bug must be cleared before getting the results.<\/li>\n<\/ul>\n<h4 class=\"western\">a. Pandas in Python<\/h4>\n<p>Python pandas is an excellent software library for manipulating data and analyzing it. It will let us manipulate numerical tables and time series using data structures and operations.<\/p>\n<div id=\"attachment_21563\" style=\"width: 828px\" class=\"wp-caption aligncenter\"><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/pandas-3.png\"><img loading=\"lazy\" decoding=\"async\" aria-describedby=\"caption-attachment-21563\" class=\"wp-image-21563 size-full\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/pandas-3.png\" alt=\"Python Data Cleansing by pandas &amp; numpy | Python Data Operations\" width=\"818\" height=\"163\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/pandas-3.png 818w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/pandas-3-150x30.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/pandas-3-300x60.png 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/pandas-3-768x153.png 768w\" sizes=\"auto, (max-width: 818px) 100vw, 818px\" \/><\/a><p id=\"caption-attachment-21563\" class=\"wp-caption-text\">Python Data Cleansing &#8211; Python Pandas<\/p><\/div>\n<p>You can install it using pip-<\/p>\n<pre class=\"EnlighterJSRAW\">C:\\Users\\lifei&gt;pip install pandas<\/pre>\n<p><strong><a href=\"https:\/\/data-flair.training\/blogs\/exception-handling-in-python\/\">Do You Know What is Exception Handling in Python Programming<\/a><\/strong><\/p>\n<h4 class=\"western\">b. Numpy in Python<\/h4>\n<p>Python numpy is another library we will use here. It lets us handle arrays and matrices, especially those multidimensional. It also provides several high-level mathematical functions to help us operate on these.<\/p>\n<div id=\"attachment_21565\" style=\"width: 1610px\" class=\"wp-caption aligncenter\"><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/numpy.png\"><img loading=\"lazy\" decoding=\"async\" aria-describedby=\"caption-attachment-21565\" class=\"wp-image-21565 size-full\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/numpy.png\" alt=\"Python Data Cleansing\u00a0- Python numpy\" width=\"1600\" height=\"635\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/numpy.png 1600w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/numpy-150x60.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/numpy-300x119.png 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/numpy-768x305.png 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/numpy-1024x406.png 1024w\" sizes=\"auto, (max-width: 1600px) 100vw, 1600px\" \/><\/a><p id=\"caption-attachment-21565\" class=\"wp-caption-text\">Python Data Cleansing\u00a0&#8211; Python numpy<\/p><\/div>\n<p>Use the following command in the command prompt to install Python numpy on your machine-<\/p>\n<pre class=\"EnlighterJSRAW\">C:\\Users\\lifei&gt;pip install numpy<\/pre>\n<h3>Python Data Cleansing Operations on Data using NumPy<\/h3>\n<p>Using Python NumPy, let\u2019s create an array (an n-dimensional array).<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; import numpy as np\r\n&gt;&gt;&gt; np.array(['a','b','c','d','e'],ndmin=2)<\/pre>\n<p>array([[&#8216;a&#8217;, &#8216;b&#8217;, &#8216;c&#8217;, &#8216;d&#8217;, &#8216;e&#8217;]], dtype='&lt;U1&#8242;)<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; np.array([['a','b'],['c','d','e']])<span style=\"background-color: #fafafa;color: #333333;font-family: Verdana, Geneva, sans-serif;font-size: 16px;font-weight: inherit\">\u00a0<\/span>\r\n<\/pre>\n<p>array([list([&#8216;a&#8217;, &#8216;b&#8217;]), list([&#8216;c&#8217;, &#8216;d&#8217;, &#8216;e&#8217;])], dtype=object)<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; np.array(['a','b','c','d','e'],ndmin=1)<span style=\"background-color: #fafafa;color: #333333;font-family: Verdana, Geneva, sans-serif;font-size: 16px;font-weight: inherit\">\u00a0<\/span>\r\n<\/pre>\n<p>array([&#8216;a&#8217;, &#8216;b&#8217;, &#8216;c&#8217;, &#8216;d&#8217;, &#8216;e&#8217;], dtype='&lt;U1&#8242;)<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; np.array([1,2,7,9,8],dtype=complex)<span style=\"background-color: #fafafa;color: #333333;font-family: Verdana, Geneva, sans-serif;font-size: 16px;font-weight: inherit\">\u00a0<\/span>\r\n<\/pre>\n<p>array([1.+0.j, 2.+0.j, 7.+0.j, 9.+0.j, 8.+0.j])<\/p>\n<p>While <i>dtype<\/i> lets us tell the interpreter the data type to use, admin lets us define the minimum dimension.The following parameters will give us information about the array-<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; a=np.array(['a','b',2,'3.0'])\r\n&gt;&gt;&gt; a<\/pre>\n<p>array([&#8216;a&#8217;, &#8216;b&#8217;, &#8216;2&#8217;, &#8216;3.0&#8217;], dtype='&lt;U3&#8242;)<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; type(a)\r\n&lt;class 'numpy.ndarray'&gt;\r\n&gt;&gt;&gt; a.ndim<\/pre>\n<p>1<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; a.shape\r\n(4,)\r\n&gt;&gt;&gt; a.size<\/pre>\n<p>4<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; a.dtype\r\ndtype('&lt;U3')<\/pre>\n<p><strong><a href=\"https:\/\/data-flair.training\/blogs\/python-generator-vs-iterator\/\">Let&#8217;s Explore the Comparison Between Python Iterators and Generators<\/a><\/strong><\/p>\n<p><strong>We can also perform operations like:<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; b=np.array([[1,2,3],[4,5,6]])\r\n&gt;&gt;&gt; b\r\n<\/pre>\n<p>array([[1, 2, 3],<br \/>\n[4, 5, 6]])<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; b.flatten()<\/pre>\n<p>array([1, 2, 3, 4, 5, 6])<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; b.reshape(3,2)<\/pre>\n<p>array([[1, 2],<br \/>\n[3, 4],<br \/>\n[5, 6]])<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; b[:2,::2]<\/pre>\n<p>array([[1, 3],<br \/>\n[4, 6]])<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; b-4<\/pre>\n<p>array([[-3, -2, -1],<br \/>\n[ 0, 1, 2]])<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; b.sum()<\/pre>\n<p>21<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; b-2*b<\/pre>\n<p>array([[-1, -2, -3],<br \/>\n[-4, -5, -6]])<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; np.sort(np.array([[3,2,1],[5,2,4]]))<\/pre>\n<p>array([[1, 2, 3],<br \/>\n[2, 4, 5]])<\/p>\n<h3>Python Data Cleansing Operations on Data Using pandas<\/h3>\n<p>Pandas use three types to hold data- DataFrame, Panel, and Series.<\/p>\n<div id=\"attachment_21584\" style=\"width: 1210px\" class=\"wp-caption aligncenter\"><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/Operations-on-Data-Using-Panda-01.jpg\"><img loading=\"lazy\" decoding=\"async\" aria-describedby=\"caption-attachment-21584\" class=\"wp-image-21584 size-full\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/Operations-on-Data-Using-Panda-01.jpg\" alt=\"Operations on Data Using Python pandas\" width=\"1200\" height=\"628\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/Operations-on-Data-Using-Panda-01.jpg 1200w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/Operations-on-Data-Using-Panda-01-150x79.jpg 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/Operations-on-Data-Using-Panda-01-300x157.jpg 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/Operations-on-Data-Using-Panda-01-768x402.jpg 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/Operations-on-Data-Using-Panda-01-1024x536.jpg 1024w\" sizes=\"auto, (max-width: 1200px) 100vw, 1200px\" \/><\/a><p id=\"caption-attachment-21584\" class=\"wp-caption-text\">Operations on Data Using Python pandas<\/p><\/div>\n<h4 class=\"western\">a. DataFrame in Pandas<\/h4>\n<p>Pandas DataFrame is a data structure that holds data in two dimensions- as rows and columns. We have the following syntax-<\/p>\n<pre class=\"EnlighterJSRAW\">pandas.DataFrame(data, index, columns, dtype, copy)<\/pre>\n<p>Now let\u2019s try an example-<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; import pandas as pd\r\n&gt;&gt;&gt; data={'Element':['Silver','Gold','Platinum','Copper'],'Atomic Number':[47,79,78,29]}\r\n&gt;&gt;&gt; frame=pd.DataFrame(data,index=['element 1','element 2','element 3','element 4'])\r\n&gt;&gt;&gt; frame<\/pre>\n<div id=\"attachment_21566\" style=\"width: 443px\" class=\"wp-caption aligncenter\"><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/dataframe.png\"><img loading=\"lazy\" decoding=\"async\" aria-describedby=\"caption-attachment-21566\" class=\"wp-image-21566 size-full\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/dataframe.png\" alt=\"Python Pandas - DataFrame\" width=\"433\" height=\"151\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/dataframe.png 433w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/dataframe-150x52.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/dataframe-300x105.png 300w\" sizes=\"auto, (max-width: 433px) 100vw, 433px\" \/><\/a><p id=\"caption-attachment-21566\" class=\"wp-caption-text\">Python Pandas &#8211; DataFrame<\/p><\/div>\n<p><strong><a href=\"https:\/\/data-flair.training\/blogs\/python-inheritance\/\">Have a Look at Python Inheritance, Method Overloading and Method Overriding<\/a><\/strong><\/p>\n<h4 class=\"western\">b. Panel in Pandas<\/h4>\n<p>Pandas panel holds data in three dimensions. Etymologically, the term <i>pan<\/i>el data &#8221; comes from one source for the name pandas. A panel has the following syntax:<\/p>\n<p><strong>pandas.Panel(data, items, major_axis, minor_axis, dtype, copy)<\/strong><\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; data={'Red':pd.DataFrame(np.random.randn(4,2)),\r\n\t'Blue':pd.DataFrame(np.random.randn(4,3))}\r\n&gt;&gt;&gt; pd.Panel(data)<\/pre>\n<p>&lt;class &#8216;pandas.core.panel.Panel&#8217;&gt;<\/p>\n<p>Dimensions: 2 (items) x 4 (major_axis) x 3 (minor_axis)<\/p>\n<p>Items axis: Blue to Red<\/p>\n<p>Major_axis axis: 0 to 3<\/p>\n<p>Minor_axis axis: 0 to 2<\/p>\n<h4>c. Series in Pandas<\/h4>\n<p>Pandas Series holds data in one dimension, in a labeled format. The <i>index<\/i> is the set of axis labels we use.<br \/>\nIt has the following syntax-<\/p>\n<pre class=\"EnlighterJSRAW\">pandas.Series(data, index, dtype, copy)<\/pre>\n<p>Let\u2019s take an example.<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; data=np.array([1,2,3,3,4])\r\n&gt;&gt;&gt; pd.Series(data)<\/pre>\n<p>0 1<br \/>\n1 2<br \/>\n2 3<br \/>\n3 3<br \/>\n4 4<br \/>\ndtype: int32<br \/>\nLet\u2019s take another example.<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; pd.Series(np.array(['a','c','b']))<\/pre>\n<p>0 a<br \/>\n1 c<br \/>\n2 b<br \/>\ndtype: object<br \/>\nUsing these data structures, we can manipulate data in many ways-<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; frame.iloc[0:2,:]<\/pre>\n<div id=\"attachment_21567\" style=\"width: 418px\" class=\"wp-caption aligncenter\"><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/head2.png\"><img loading=\"lazy\" decoding=\"async\" aria-describedby=\"caption-attachment-21567\" class=\"wp-image-21567 size-full\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/head2.png\" alt=\"\" width=\"408\" height=\"97\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/head2.png 408w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/head2-150x36.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/head2-300x71.png 300w\" sizes=\"auto, (max-width: 408px) 100vw, 408px\" \/><\/a><p id=\"caption-attachment-21567\" class=\"wp-caption-text\">Python Data Cleansing by pandas &amp; numpy | Python Data Operations<\/p><\/div>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; frame.describe()<\/pre>\n<div id=\"attachment_21569\" style=\"width: 267px\" class=\"wp-caption aligncenter\"><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/describe.png\"><img loading=\"lazy\" decoding=\"async\" aria-describedby=\"caption-attachment-21569\" class=\"wp-image-21569 size-full\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/describe.png\" alt=\"Python Data Cleansing by pandas &amp; numpy | Python Data Operations\" width=\"257\" height=\"235\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/describe.png 257w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/describe-150x137.png 150w\" sizes=\"auto, (max-width: 257px) 100vw, 257px\" \/><\/a><p id=\"caption-attachment-21569\" class=\"wp-caption-text\">Python Pandas &#8211;\u00a0Series &#8220;Describe&#8221;<\/p><\/div>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; frame.rank()<\/pre>\n<div id=\"attachment_21570\" style=\"width: 428px\" class=\"wp-caption aligncenter\"><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/rank.png\"><img loading=\"lazy\" decoding=\"async\" aria-describedby=\"caption-attachment-21570\" class=\"wp-image-21570 size-full\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/rank.png\" alt=\"Python Data Cleansing by pandas &amp; numpy | Python Data Operations\" width=\"418\" height=\"145\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/rank.png 418w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/rank-150x52.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/rank-300x104.png 300w\" sizes=\"auto, (max-width: 418px) 100vw, 418px\" \/><\/a><p id=\"caption-attachment-21570\" class=\"wp-caption-text\">Python Pandas &#8211;\u00a0Series &#8220;Rank&#8221;<\/p><\/div>\n<p>This is all for now; we will learn about the libraries <strong><a href=\"https:\/\/pandas.pydata.org\/\">pandas<\/a><\/strong> and <strong><a href=\"http:\/\/www.numpy.org\/\">numpy<\/a><\/strong> in their own tutorials.<br \/>\n<strong><a href=\"https:\/\/data-flair.training\/blogs\/python-iterables\/\">Read about Python Iterables and Python Itertools with Examples<\/a><\/strong><\/p>\n<h3 class=\"western\">Python Data Cleansing<\/h3>\n<p>When some part of our data is missing, due to whatever reason, the accuracy of our predictions plummets. In our <a href=\"https:\/\/data-flair.training\/blogs\/data-wrangling-with-python\/\"><strong>article on data wrangling and aggregation<\/strong><\/a>, we discussed missing data and how to drop it. Let\u2019s see how we can deal with this issue.<\/p>\n<p>In real-time situations, like the comment section of our website. The name and email are mandatory, but the input for \u2018website\u2019 can be left empty. Some users may not run a website to be eligible to fill in this information. In ways like this and others, we may end up with missing data in some places. How should we go about this? Let\u2019s find out.<\/p>\n<p>Python Pandas will depict a missing value as NaN, which is short for Not a Number. Simply using the reindex() method will fill in NaN for blank values.<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; frame=pd.DataFrame(np.random.randn(4,3),index=[1,2,4,7],columns=['A','B','C'])\r\n&gt;&gt;&gt; frame.reindex([1,2,3,4,5,6,7])<\/pre>\n<div id=\"attachment_21571\" style=\"width: 429px\" class=\"wp-caption aligncenter\"><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/missing.png\"><img loading=\"lazy\" decoding=\"async\" aria-describedby=\"caption-attachment-21571\" class=\"wp-image-21571 size-full\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/missing.png\" alt=\"Python Data Cleansing by pandas &amp; numpy | Python Data Operations\" width=\"419\" height=\"215\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/missing.png 419w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/missing-150x77.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/missing-300x154.png 300w\" sizes=\"auto, (max-width: 419px) 100vw, 419px\" \/><\/a><p id=\"caption-attachment-21571\" class=\"wp-caption-text\">Python Data Cleansing<\/p><\/div>\n<h4 class=\"western\">a. Finding which columns have missing values<\/h4>\n<p>In the tutorial on wrangling, we saw how to find out which columns have missing values-<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; frame=frame.reindex([1,2,3,4,5,6,7])\r\n&gt;&gt;&gt; frame['B'].isnull()<\/pre>\n<p>1\u00a0 \u00a0False<\/p>\n<p>2\u00a0 \u00a0False<\/p>\n<p>3\u00a0 \u00a0True<\/p>\n<p>4\u00a0 \u00a0False<\/p>\n<p>5\u00a0 \u00a0True<\/p>\n<p>6\u00a0 \u00a0True<\/p>\n<p>7\u00a0 \u00a0False<\/p>\n<p>Name: B, dtype: bool<\/p>\n<h3 class=\"western\">Ways to Cleanse Missing Data in Python<\/h3>\n<p>To perform Python data cleansing, you can drop the missing values, replace them, replace each NaN with a scalar value, or fill forward or backward.<\/p>\n<div id=\"attachment_21583\" style=\"width: 1210px\" class=\"wp-caption aligncenter\"><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/Ways-to-Cleanse-Missing-Data-in-Python-01.jpg\"><img loading=\"lazy\" decoding=\"async\" aria-describedby=\"caption-attachment-21583\" class=\"wp-image-21583 size-full\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/Ways-to-Cleanse-Missing-Data-in-Python-01.jpg\" alt=\"Ways to Cleanse Missing Data in Python\" width=\"1200\" height=\"628\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/Ways-to-Cleanse-Missing-Data-in-Python-01.jpg 1200w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/Ways-to-Cleanse-Missing-Data-in-Python-01-150x79.jpg 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/Ways-to-Cleanse-Missing-Data-in-Python-01-300x157.jpg 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/Ways-to-Cleanse-Missing-Data-in-Python-01-768x402.jpg 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/Ways-to-Cleanse-Missing-Data-in-Python-01-1024x536.jpg 1024w\" sizes=\"auto, (max-width: 1200px) 100vw, 1200px\" \/><\/a><p id=\"caption-attachment-21583\" class=\"wp-caption-text\">Ways to Cleanse Missing Data in Python<\/p><\/div>\n<h4 class=\"western\">a. Dropping Missing Values<\/h4>\n<p>You can exclude missing values from your dataset using the dropna() method.<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; frame.dropna()<\/pre>\n<div id=\"attachment_21572\" style=\"width: 405px\" class=\"wp-caption aligncenter\"><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/drop.png\"><img loading=\"lazy\" decoding=\"async\" aria-describedby=\"caption-attachment-21572\" class=\"wp-image-21572 size-full\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/drop.png\" alt=\"Ways for Python Data Cleansing -\u00a0Dropping Missing Values\" width=\"395\" height=\"142\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/drop.png 395w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/drop-150x54.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/drop-300x108.png 300w\" sizes=\"auto, (max-width: 395px) 100vw, 395px\" \/><\/a><p id=\"caption-attachment-21572\" class=\"wp-caption-text\">Ways for Python Data Cleansing &#8211;\u00a0Dropping Missing Values<\/p><\/div>\n<p>This defaults to dropping on axis=0, which excludes an entire row for an NaN value.<br \/>\n<strong><a href=\"https:\/\/data-flair.training\/blogs\/python-modules-vs-packages\/\">Do you know the Python Modules vs Packages<\/a><\/strong><\/p>\n<h4 class=\"western\">b. Replacing Missing Values<\/h4>\n<p>To replace each NaN we have in the dataset, we can use the replace() method.<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; from numpy import NaN\r\n&gt;&gt;&gt; frame.replace({NaN:0.00})<\/pre>\n<div id=\"attachment_21573\" style=\"width: 395px\" class=\"wp-caption aligncenter\"><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/replace.png\"><img loading=\"lazy\" decoding=\"async\" aria-describedby=\"caption-attachment-21573\" class=\"wp-image-21573 size-full\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/replace.png\" alt=\"Ways for Python Data Cleansing - Replacing Missing Values\" width=\"385\" height=\"217\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/replace.png 385w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/replace-150x85.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/replace-300x169.png 300w\" sizes=\"auto, (max-width: 385px) 100vw, 385px\" \/><\/a><p id=\"caption-attachment-21573\" class=\"wp-caption-text\">Ways for Python Data Cleansing &#8211; Replacing Missing Values<\/p><\/div>\n<p>This way, we can also replace any value that we find enough times in the dataset.<\/p>\n<h4>c. Replacing with a Scalar Value<\/h4>\n<p>We can use the fillna() method for this.<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; frame.fillna(7)<\/pre>\n<div id=\"attachment_21574\" style=\"width: 398px\" class=\"wp-caption aligncenter\"><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/fillna.png\"><img loading=\"lazy\" decoding=\"async\" aria-describedby=\"caption-attachment-21574\" class=\"wp-image-21574 size-full\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/fillna.png\" alt=\"Ways for Python Data Cleansing - Replacing with a Scalar Value\" width=\"388\" height=\"220\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/fillna.png 388w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/fillna-150x85.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/fillna-300x170.png 300w\" sizes=\"auto, (max-width: 388px) 100vw, 388px\" \/><\/a><p id=\"caption-attachment-21574\" class=\"wp-caption-text\">Ways for Python Data Cleansing &#8211; Replacing with a Scalar Value<\/p><\/div>\n<h4 class=\"western\">d. Filling Forward or Backward<\/h4>\n<p>If we supply a <i>method<\/i> parameter to the fillna() method, we can fill forward or backward as we need. To fill forward, use the methods <i>pad<\/i> or <i>fill<\/i>, and to fill backward, use <i>bfill<\/i> and <i>backfill<\/i>.<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; frame.fillna(method='pad')<\/pre>\n<div id=\"attachment_21575\" style=\"width: 402px\" class=\"wp-caption aligncenter\"><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/pad.png\"><img loading=\"lazy\" decoding=\"async\" aria-describedby=\"caption-attachment-21575\" class=\"wp-image-21575 size-full\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/pad.png\" alt=\"Ways for Python Data Cleansing - Filling Forward or Backward\" width=\"392\" height=\"217\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/pad.png 392w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/pad-150x83.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/pad-300x166.png 300w\" sizes=\"auto, (max-width: 392px) 100vw, 392px\" \/><\/a><p id=\"caption-attachment-21575\" class=\"wp-caption-text\">Ways for Python Data Cleansing &#8211; Filling Forward or Backward<\/p><\/div>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; frame.fillna(method='backfill')<\/pre>\n<div id=\"attachment_21576\" style=\"width: 441px\" class=\"wp-caption aligncenter\"><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/backfill.png\"><img loading=\"lazy\" decoding=\"async\" aria-describedby=\"caption-attachment-21576\" class=\"wp-image-21576 size-full\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/backfill.png\" alt=\"Ways for Python Data Cleansing - Filling Forward or Backward\" width=\"431\" height=\"220\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/backfill.png 431w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/backfill-150x77.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/backfill-300x153.png 300w\" sizes=\"auto, (max-width: 431px) 100vw, 431px\" \/><\/a><p id=\"caption-attachment-21576\" class=\"wp-caption-text\">Ways for Python Data Cleansing &#8211; Filling Forward or Backward<\/p><\/div>\n<p><strong><a href=\"https:\/\/data-flair.training\/blogs\/python-property-problem-solution\/\">Follow the link to know about Python Property \u2013 The Problem and Solution<\/a><\/strong><\/p>\n<h3 class=\"western\">Python Data Cleansing &#8211; Other Operations<\/h3>\n<p>While cleaning data, we may also need to find out more about it and manipulate it. Below, we make use of some of these operations.<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; data={'Element':['Silver','Gold','Platinum','Copper'],'Atomic Number':[47,79,78,29]}\r\n&gt;&gt;&gt; frame=pd.DataFrame(data,index=['element 1','element 2','element 3','element 4'])\r\n&gt;&gt;&gt; frame<\/pre>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/dataframe-1.png\"><img loading=\"lazy\" decoding=\"async\" width=\"433\" height=\"151\" class=\"wp-image-21577 size-full\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/dataframe-1.png\" alt=\"&quot;&quot;&gt;&gt;\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/dataframe-1.png 433w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/dataframe-1-150x52.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/dataframe-1-300x105.png 300w\" sizes=\"auto, (max-width: 433px) 100vw, 433px\" \/><\/a><\/p>\n<p>True<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; frame.head()<\/pre>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/head-1.png\"><img loading=\"lazy\" decoding=\"async\" width=\"439\" height=\"149\" class=\"wp-image-21578 size-full\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/head-1.png\" alt=\"&quot;&quot;&gt;&gt;\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/head-1.png 439w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/head-1-150x51.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/head-1-300x102.png 300w\" sizes=\"auto, (max-width: 439px) 100vw, 439px\" \/><\/a><\/p>\n<div id=\"attachment_21579\" style=\"width: 418px\" class=\"wp-caption aligncenter\"><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/head2-1.png\"><img loading=\"lazy\" decoding=\"async\" aria-describedby=\"caption-attachment-21579\" class=\"wp-image-21579 size-full\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/head2-1.png\" alt=\"Python Data Cleansing by pandas &amp;amp; numpy | Python Data Operations\" width=\"408\" height=\"97\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/head2-1.png 408w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/head2-1-150x36.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/head2-1-300x71.png 300w\" sizes=\"auto, (max-width: 408px) 100vw, 408px\" \/><\/a><p id=\"caption-attachment-21579\" class=\"wp-caption-text\">Python Data Cleansing by pandas &amp; numpy | Python Data Operations<\/p><\/div>\n<p>Data Cleansing Operations in\u00a0Python<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; frame.tail(3)<\/pre>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/tail3.png\"><img loading=\"lazy\" decoding=\"async\" width=\"436\" height=\"127\" class=\"wp-image-21580 size-full\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/tail3.png\" alt=\"&quot;&quot;&gt;&gt;\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/tail3.png 436w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/tail3-150x44.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/tail3-300x87.png 300w\" sizes=\"auto, (max-width: 436px) 100vw, 436px\" \/><\/a><\/p>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/loc.png\"><img loading=\"lazy\" decoding=\"async\" width=\"374\" height=\"96\" class=\"wp-image-21581 size-full\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/loc.png\" alt=\"&quot;&quot;&gt;&gt;\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/loc.png 374w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/loc-150x39.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/loc-300x77.png 300w\" sizes=\"auto, (max-width: 374px) 100vw, 374px\" \/><\/a><\/p>\n<h4 class=\"western\">a. Renaming Columns<\/h4>\n<p>To rename a column, you can use the rename() method.<\/p>\n<pre class=\"EnlighterJSRAW\">&gt;&gt;&gt; frame.rename(columns={'Atomic Number':'Number','Element':'Name'},inplace=True)\r\n&gt;&gt;&gt; frame<\/pre>\n<p><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/rename.png\"><img loading=\"lazy\" decoding=\"async\" width=\"350\" height=\"193\" class=\"wp-image-21582 size-full\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/rename.png\" alt=\"&quot;&lt;yoastmark\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/rename.png 350w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/rename-150x83.png 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/rename-300x165.png 300w\" sizes=\"auto, (max-width: 350px) 100vw, 350px\" \/><\/a><\/p>\n<p><strong><a class=\"in-cell-link\" href=\"https:\/\/data-flair.training\/blogs\/python-multithreading\/\" target=\"_blank\" rel=\"noopener noreferrer\">Let&#8217;s revise Python Multithreading: A Comprehensive Tutorial<\/a><\/strong><\/p>\n<h4>b. Making Changes Stay<\/h4>\n<p>Also, throughout this tutorial, &#8220;Python Data Cleansing&#8221;, the changes that we have made to the frames did not actually modify them. To make this happen, you can set the inplace=True parameter.<br \/>\nSo, this was all about Python Data Cleansing Tutorial. Hope you like our explanation.<\/p>\n<h3 class=\"western\">Conclusion<\/h3>\n<p>Hence, in this Python Data Cleansing, we learned how data is cleaned in the Python Programming Language. For this purpose, we use two libraries- pandas and numpy. Since data scientists spend 80% of their time cleaning and manipulating data, that makes it is an essential skill to learn in data science.<\/p>\n<p>A clean and well-prepared dataset not only boosts model performance but also helps you explain your results clearly to non-technical stakeholders.<\/p>\n<p>Tell us what you think in the comments below.<\/p>\n<p>See Also &#8211;<strong><a href=\"https:\/\/data-flair.training\/blogs\/python-send-email\/\"> How\u00a0Python Send Email Via SMTP<\/a><\/strong><br \/>\n<strong><a href=\"https:\/\/en.wikipedia.org\/wiki\/Python_(programming_language)\">For reference<\/a><\/strong><\/p>\n","protected":false},"excerpt":{"rendered":"<p>In our last Python tutorial, we studied\u00a0Aggregation and Data Wrangling with Python. Today, we will discuss the Python Data Cleansing tutorial, which aims to deliver a brief introduction to the operations of data cleansing&#46;&#46;&#46;<\/p>\n","protected":false},"author":5,"featured_media":21541,"comment_status":"open","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[46],"tags":[2560,2561,3298,3299,9394,9397,9398,10455,10456,10544,10727,10755],"class_list":["post-21384","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-python","tag-clean-python","tag-cleaning-text-data-python","tag-data-cleaning-steps-in-python","tag-data-cleansing-in-python","tag-pandas-dataframe","tag-pandas-panel","tag-pandas-series","tag-python-data-cleansing","tag-python-data-cleansing-tutorial","tag-python-for-data-analysis","tag-python-numpy","tag-python-pandas"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v27.4 - https:\/\/yoast.com\/product\/yoast-seo-wordpress\/ -->\n<title>Python Data Cleansing by Pandas &amp; Numpy | Python Data Operations - DataFlair<\/title>\n<meta name=\"description\" content=\"A clean dataset boosts model performance and also explains your results clearly. Pandas and Numpy are used for data cleansing in Python.\" \/>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/data-flair.training\/blogs\/python-data-cleansing\/\" \/>\n<meta property=\"og:locale\" content=\"en_US\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"Python Data Cleansing by Pandas &amp; Numpy | Python Data Operations - DataFlair\" \/>\n<meta property=\"og:description\" content=\"A clean dataset boosts model performance and also explains your results clearly. Pandas and Numpy are used for data cleansing in Python.\" \/>\n<meta property=\"og:url\" content=\"https:\/\/data-flair.training\/blogs\/python-data-cleansing\/\" \/>\n<meta property=\"og:site_name\" content=\"DataFlair\" \/>\n<meta property=\"article:publisher\" content=\"https:\/\/www.facebook.com\/DataFlairWS\/\" \/>\n<meta property=\"article:published_time\" content=\"2018-07-17T04:00:08+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2026-04-25T09:14:25+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/Python-Data-cleansing-by-Pandas-NumPy-01-1.jpg\" \/>\n\t<meta property=\"og:image:width\" content=\"1200\" \/>\n\t<meta property=\"og:image:height\" content=\"628\" \/>\n\t<meta property=\"og:image:type\" content=\"image\/jpeg\" \/>\n<meta name=\"author\" content=\"DataFlair Team\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:creator\" content=\"@DataFlairWS\" \/>\n<meta name=\"twitter:site\" content=\"@DataFlairWS\" \/>\n<meta name=\"twitter:label1\" content=\"Written by\" \/>\n\t<meta name=\"twitter:data1\" content=\"DataFlair Team\" \/>\n\t<meta name=\"twitter:label2\" content=\"Est. reading time\" \/>\n\t<meta name=\"twitter:data2\" content=\"10 minutes\" \/>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"Python Data Cleansing by Pandas &amp; Numpy | Python Data Operations - DataFlair","description":"A clean dataset boosts model performance and also explains your results clearly. Pandas and Numpy are used for data cleansing in Python.","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/data-flair.training\/blogs\/python-data-cleansing\/","og_locale":"en_US","og_type":"article","og_title":"Python Data Cleansing by Pandas &amp; Numpy | Python Data Operations - DataFlair","og_description":"A clean dataset boosts model performance and also explains your results clearly. Pandas and Numpy are used for data cleansing in Python.","og_url":"https:\/\/data-flair.training\/blogs\/python-data-cleansing\/","og_site_name":"DataFlair","article_publisher":"https:\/\/www.facebook.com\/DataFlairWS\/","article_published_time":"2018-07-17T04:00:08+00:00","article_modified_time":"2026-04-25T09:14:25+00:00","og_image":[{"width":1200,"height":628,"url":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/Python-Data-cleansing-by-Pandas-NumPy-01-1.jpg","type":"image\/jpeg"}],"author":"DataFlair Team","twitter_card":"summary_large_image","twitter_creator":"@DataFlairWS","twitter_site":"@DataFlairWS","twitter_misc":{"Written by":"DataFlair Team","Est. reading time":"10 minutes"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"Article","@id":"https:\/\/data-flair.training\/blogs\/python-data-cleansing\/#article","isPartOf":{"@id":"https:\/\/data-flair.training\/blogs\/python-data-cleansing\/"},"author":{"name":"DataFlair Team","@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/person\/7f83c342f5d1632d6f7b4b0b0f447823"},"headline":"Python Data Cleansing by Pandas &amp; Numpy | Python Data Operations","datePublished":"2018-07-17T04:00:08+00:00","dateModified":"2026-04-25T09:14:25+00:00","mainEntityOfPage":{"@id":"https:\/\/data-flair.training\/blogs\/python-data-cleansing\/"},"wordCount":1367,"commentCount":0,"publisher":{"@id":"https:\/\/data-flair.training\/blogs\/#organization"},"image":{"@id":"https:\/\/data-flair.training\/blogs\/python-data-cleansing\/#primaryimage"},"thumbnailUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/Python-Data-cleansing-by-Pandas-NumPy-01-1.jpg","keywords":["clean() python","cleaning text data python","data cleaning steps in python","Data Cleansing in Python","Pandas DataFrame","Pandas Panel","Pandas Series","Python Data Cleansing","Python Data Cleansing Tutorial","python for data analysis","Python NumPy","Python Pandas"],"articleSection":["Python Tutorials"],"inLanguage":"en-US","potentialAction":[{"@type":"CommentAction","name":"Comment","target":["https:\/\/data-flair.training\/blogs\/python-data-cleansing\/#respond"]}]},{"@type":"WebPage","@id":"https:\/\/data-flair.training\/blogs\/python-data-cleansing\/","url":"https:\/\/data-flair.training\/blogs\/python-data-cleansing\/","name":"Python Data Cleansing by Pandas &amp; Numpy | Python Data Operations - DataFlair","isPartOf":{"@id":"https:\/\/data-flair.training\/blogs\/#website"},"primaryImageOfPage":{"@id":"https:\/\/data-flair.training\/blogs\/python-data-cleansing\/#primaryimage"},"image":{"@id":"https:\/\/data-flair.training\/blogs\/python-data-cleansing\/#primaryimage"},"thumbnailUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/Python-Data-cleansing-by-Pandas-NumPy-01-1.jpg","datePublished":"2018-07-17T04:00:08+00:00","dateModified":"2026-04-25T09:14:25+00:00","description":"A clean dataset boosts model performance and also explains your results clearly. Pandas and Numpy are used for data cleansing in Python.","breadcrumb":{"@id":"https:\/\/data-flair.training\/blogs\/python-data-cleansing\/#breadcrumb"},"inLanguage":"en-US","potentialAction":[{"@type":"ReadAction","target":["https:\/\/data-flair.training\/blogs\/python-data-cleansing\/"]}]},{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/data-flair.training\/blogs\/python-data-cleansing\/#primaryimage","url":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/Python-Data-cleansing-by-Pandas-NumPy-01-1.jpg","contentUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2018\/07\/Python-Data-cleansing-by-Pandas-NumPy-01-1.jpg","width":1200,"height":628,"caption":"Python Data Cleansing by pandas &amp; numpy | Python Data Operations"},{"@type":"BreadcrumbList","@id":"https:\/\/data-flair.training\/blogs\/python-data-cleansing\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Blog Home","item":"https:\/\/data-flair.training\/blogs\/"},{"@type":"ListItem","position":2,"name":"Python Tutorials","item":"https:\/\/data-flair.training\/blogs\/category\/python\/"},{"@type":"ListItem","position":3,"name":"Python Data Cleansing by Pandas &amp; Numpy | Python Data Operations"}]},{"@type":"WebSite","@id":"https:\/\/data-flair.training\/blogs\/#website","url":"https:\/\/data-flair.training\/blogs\/","name":"DataFlair","description":"Learn Today. Lead Tomorrow.","publisher":{"@id":"https:\/\/data-flair.training\/blogs\/#organization"},"potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/data-flair.training\/blogs\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"en-US"},{"@type":"Organization","@id":"https:\/\/data-flair.training\/blogs\/#organization","name":"DataFlair","url":"https:\/\/data-flair.training\/blogs\/","logo":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/logo\/image\/","url":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/Data-Flair.png","contentUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/Data-Flair.png","width":106,"height":48,"caption":"DataFlair"},"image":{"@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/logo\/image\/"},"sameAs":["https:\/\/www.facebook.com\/DataFlairWS\/","https:\/\/x.com\/DataFlairWS","https:\/\/www.linkedin.com\/company\/dataflair-web-services-pvt-ltd\/","https:\/\/www.youtube.com\/user\/DataFlairWS"]},{"@type":"Person","@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/person\/7f83c342f5d1632d6f7b4b0b0f447823","name":"DataFlair Team","image":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/secure.gravatar.com\/avatar\/4cf3a74600d131330b8c481d519afd1574093ed89f6d3396a95393ad223eb7cd?s=96&d=mm&r=g","url":"https:\/\/secure.gravatar.com\/avatar\/4cf3a74600d131330b8c481d519afd1574093ed89f6d3396a95393ad223eb7cd?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/4cf3a74600d131330b8c481d519afd1574093ed89f6d3396a95393ad223eb7cd?s=96&d=mm&r=g","caption":"DataFlair Team"},"description":"DataFlair Team creates expert-level guides on programming, Java, Python, C++, DSA, AI, ML, data Science, Android, Flutter, MERN, Web Development, and technology. Our goal is to empower learners with easy-to-understand content. Explore our resources for career growth and practical learning.","url":"https:\/\/data-flair.training\/blogs\/author\/dfteam1\/"}]}},"amp_enabled":true,"_links":{"self":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts\/21384","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/users\/5"}],"replies":[{"embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/comments?post=21384"}],"version-history":[{"count":11,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts\/21384\/revisions"}],"predecessor-version":[{"id":147897,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts\/21384\/revisions\/147897"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/media\/21541"}],"wp:attachment":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/media?parent=21384"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/categories?post=21384"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/tags?post=21384"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}