

{"id":145501,"date":"2025-06-23T14:44:45","date_gmt":"2025-06-23T09:14:45","guid":{"rendered":"https:\/\/data-flair.training\/blogs\/?p=145501"},"modified":"2025-06-24T15:28:28","modified_gmt":"2025-06-24T09:58:28","slug":"how-to-split-a-dataset-into-train-and-test-sets-in-machine-learning","status":"publish","type":"post","link":"https:\/\/data-flair.training\/blogs\/how-to-split-a-dataset-into-train-and-test-sets-in-machine-learning\/","title":{"rendered":"How to Split a Dataset into Train and Test Sets in Machine Learning"},"content":{"rendered":"<h3>Program 1<\/h3>\n<p><a href=\"https:\/\/drive.google.com\/file\/d\/14fMuG7hD4TA8pTlkPwtvlBdRfD0aqduX\/view?usp=sharing\" target=\"_blank\" rel=\"noopener\"><strong>Machine Learning Dataset<\/strong><\/a><\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\">import matplotlib.pyplot as plt\r\nimport pandas as pd\r\nimport numpy as np\r\n\r\ndf=pd.read_csv(\"D:\\scikit_data\\cardata\\carprices.csv\")\r\ndf.head(5)\r\n\r\ndf=df.rename(columns={'Age(yrs)':'Age'})\r\ndf=df.rename(columns={'Sell Price($)':'SellPrice'})\r\n\r\ndf.head(5)\r\n\r\nplt.scatter(df['Mileage'],df['SellPrice'],color='red')\r\n\r\nplt.scatter(df['Age'],df['SellPrice'],color='red')\r\n\r\nfrom sklearn.model_selection import train_test_split\r\n\r\nx=df[['Mileage','Age']]\r\n\r\ny=df[['SellPrice']]\r\n# x_train---&gt; Training data set for Independed variable\r\n# x_test---&gt; Testing data set for Independed variable\r\n# y_train---&gt; Training data set for depended variable\r\n# y_test---&gt; Testing data set for depended variable\r\n\r\nx_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3)\r\n\r\nlen(x_train)\r\n\r\nlen(y_train)\r\n\r\nlen(x_test)\r\n\r\nlen(y_test)\r\n\r\nx_train\r\n\r\ny_train\r\n\r\nx_test\r\n\r\ny_test\r\n\r\nfrom sklearn import linear_model\r\n\r\nmodel=linear_model.LinearRegression()\r\nmodel.fit(x_train,y_train)\r\n\r\nmodel.predict(x_test)\r\n\r\nmodel.score(x_test,y_test)\r\n\r\n# Predication by training dataset\r\ny_pred=model.predict(x_train)\r\n\r\nplt.scatter(y_train,y_pred,color='red')\r\nplt.xlabel(\"Actual Price\")\r\nplt.ylabel(\"Predicated Price\")\r\nplt.show()\r\n\r\nfrom sklearn.metrics import r2_score\r\n\r\nprint(\"R2 Score for Training data:\",r2_score(y_train,y_pred))\r\n\r\n# Predication by test dataset\r\ny_pred=model.predict(x_test)\r\n#print(y_pred)\r\nplt.scatter(y_test,y_pred,color='blue',marker='+')\r\nplt.xlabel(\"Actual Price\")\r\nplt.ylabel(\"Predicated Price\")\r\nplt.show()\r\nprint(\"R2 Score for Testing data:\",r2_score(y_test,y_pred))\r\n<\/pre>\n<p>&nbsp;<\/p>\n<p>&nbsp;<span hidden class=\"__iawmlf-post-loop-links\" data-iawmlf-links=\"[{&quot;id&quot;:51,&quot;href&quot;:&quot;https:\\\/\\\/drive.google.com\\\/file\\\/d\\\/14fMuG7hD4TA8pTlkPwtvlBdRfD0aqduX\\\/view?usp=sharing&quot;,&quot;archived_href&quot;:&quot;http:\\\/\\\/web-wp.archive.org\\\/web\\\/20251205121352\\\/https:\\\/\\\/drive.google.com\\\/file\\\/d\\\/14fMuG7hD4TA8pTlkPwtvlBdRfD0aqduX\\\/view?usp=sharing&quot;,&quot;redirect_href&quot;:&quot;&quot;,&quot;checks&quot;:[{&quot;date&quot;:&quot;2026-03-03 13:39:23&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-04-22 05:43:45&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-05-02 13:15:10&quot;,&quot;http_code&quot;:200},{&quot;date&quot;:&quot;2026-06-19 07:44:43&quot;,&quot;http_code&quot;:200}],&quot;broken&quot;:false,&quot;last_checked&quot;:{&quot;date&quot;:&quot;2026-06-19 07:44:43&quot;,&quot;http_code&quot;:200},&quot;process&quot;:&quot;done&quot;}]\"><\/span><\/p>\n","protected":false},"excerpt":{"rendered":"<p>Program 1 Machine Learning Dataset import matplotlib.pyplot as plt import pandas as pd import numpy as np df=pd.read_csv(&#8220;D:\\scikit_data\\cardata\\carprices.csv&#8221;) df.head(5) df=df.rename(columns={&#8216;Age(yrs)&#8217;:&#8217;Age&#8217;}) df=df.rename(columns={&#8216;Sell Price($)&#8217;:&#8217;SellPrice&#8217;}) df.head(5) plt.scatter(df[&#8216;Mileage&#8217;],df[&#8216;SellPrice&#8217;],color=&#8217;red&#8217;) plt.scatter(df[&#8216;Age&#8217;],df[&#8216;SellPrice&#8217;],color=&#8217;red&#8217;) from sklearn.model_selection import train_test_split x=df[[&#8216;Mileage&#8217;,&#8217;Age&#8217;]] y=df[[&#8216;SellPrice&#8217;]] # x_train&#8212;&gt; Training&#46;&#46;&#46;<\/p>\n","protected":false},"author":581,"featured_media":0,"comment_status":"open","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[36],"tags":[34591,8431,33127,33128,34593,34592],"class_list":["post-145501","post","type-post","status-publish","format-standard","hentry","category-machine-learning","tag-how-to-split-a-dataset-into-train-and-test-sets-in-machine-learning","tag-machine-learning","tag-machine-learning-practical","tag-machine-learning-program","tag-machine-learning-program-on-split-a-dataset-into-train-and-test-sets","tag-split-a-dataset-into-train-and-test-sets-in-machine-learning"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v27.8 - https:\/\/yoast.com\/product\/yoast-seo-wordpress\/ -->\n<title>How to Split a Dataset into Train and Test Sets in Machine Learning - DataFlair<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/data-flair.training\/blogs\/how-to-split-a-dataset-into-train-and-test-sets-in-machine-learning\/\" \/>\n<meta property=\"og:locale\" content=\"en_US\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"How to Split a Dataset into Train and Test Sets in Machine Learning - DataFlair\" \/>\n<meta property=\"og:description\" content=\"Program 1 Machine Learning Dataset import matplotlib.pyplot as plt import pandas as pd import numpy as np df=pd.read_csv(&quot;D:scikit_datacardatacarprices.csv&quot;) df.head(5) df=df.rename(columns={&#039;Age(yrs)&#039;:&#039;Age&#039;}) df=df.rename(columns={&#039;Sell Price($)&#039;:&#039;SellPrice&#039;}) df.head(5) plt.scatter(df[&#039;Mileage&#039;],df[&#039;SellPrice&#039;],color=&#039;red&#039;) plt.scatter(df[&#039;Age&#039;],df[&#039;SellPrice&#039;],color=&#039;red&#039;) from sklearn.model_selection import train_test_split x=df[[&#039;Mileage&#039;,&#039;Age&#039;]] y=df[[&#039;SellPrice&#039;]] # x_train---&gt; Training&#046;&#046;&#046;\" \/>\n<meta property=\"og:url\" content=\"https:\/\/data-flair.training\/blogs\/how-to-split-a-dataset-into-train-and-test-sets-in-machine-learning\/\" \/>\n<meta property=\"og:site_name\" content=\"DataFlair\" \/>\n<meta property=\"article:publisher\" content=\"https:\/\/www.facebook.com\/DataFlairWS\/\" \/>\n<meta property=\"article:published_time\" content=\"2025-06-23T09:14:45+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2025-06-24T09:58:28+00:00\" \/>\n<meta name=\"author\" content=\"DataFlair Team\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:creator\" content=\"@DataFlairWS\" \/>\n<meta name=\"twitter:site\" content=\"@DataFlairWS\" \/>\n<meta name=\"twitter:label1\" content=\"Written by\" \/>\n\t<meta name=\"twitter:data1\" content=\"DataFlair Team\" \/>\n\t<meta name=\"twitter:label2\" content=\"Est. reading time\" \/>\n\t<meta name=\"twitter:data2\" content=\"1 minute\" \/>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"How to Split a Dataset into Train and Test Sets in Machine Learning - DataFlair","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/data-flair.training\/blogs\/how-to-split-a-dataset-into-train-and-test-sets-in-machine-learning\/","og_locale":"en_US","og_type":"article","og_title":"How to Split a Dataset into Train and Test Sets in Machine Learning - DataFlair","og_description":"Program 1 Machine Learning Dataset import matplotlib.pyplot as plt import pandas as pd import numpy as np df=pd.read_csv(\"D:scikit_datacardatacarprices.csv\") df.head(5) df=df.rename(columns={'Age(yrs)':'Age'}) df=df.rename(columns={'Sell Price($)':'SellPrice'}) df.head(5) plt.scatter(df['Mileage'],df['SellPrice'],color='red') plt.scatter(df['Age'],df['SellPrice'],color='red') from sklearn.model_selection import train_test_split x=df[['Mileage','Age']] y=df[['SellPrice']] # x_train---&gt; Training&#46;&#46;&#46;","og_url":"https:\/\/data-flair.training\/blogs\/how-to-split-a-dataset-into-train-and-test-sets-in-machine-learning\/","og_site_name":"DataFlair","article_publisher":"https:\/\/www.facebook.com\/DataFlairWS\/","article_published_time":"2025-06-23T09:14:45+00:00","article_modified_time":"2025-06-24T09:58:28+00:00","author":"DataFlair Team","twitter_card":"summary_large_image","twitter_creator":"@DataFlairWS","twitter_site":"@DataFlairWS","twitter_misc":{"Written by":"DataFlair Team","Est. reading time":"1 minute"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"Article","@id":"https:\/\/data-flair.training\/blogs\/how-to-split-a-dataset-into-train-and-test-sets-in-machine-learning\/#article","isPartOf":{"@id":"https:\/\/data-flair.training\/blogs\/how-to-split-a-dataset-into-train-and-test-sets-in-machine-learning\/"},"author":{"name":"DataFlair Team","@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/person\/c187795dc82ab948373cca526df7c445"},"headline":"How to Split a Dataset into Train and Test Sets in Machine Learning","datePublished":"2025-06-23T09:14:45+00:00","dateModified":"2025-06-24T09:58:28+00:00","mainEntityOfPage":{"@id":"https:\/\/data-flair.training\/blogs\/how-to-split-a-dataset-into-train-and-test-sets-in-machine-learning\/"},"wordCount":19,"commentCount":0,"publisher":{"@id":"https:\/\/data-flair.training\/blogs\/#organization"},"keywords":["how to split a dataset into train and test sets in machine learning","machine learning","machine learning practical","machine learning program","machine learning program on split a dataset into train and test sets","split a dataset into train and test sets in machine learning"],"articleSection":["Machine Learning Tutorials"],"inLanguage":"en-US","potentialAction":[{"@type":"CommentAction","name":"Comment","target":["https:\/\/data-flair.training\/blogs\/how-to-split-a-dataset-into-train-and-test-sets-in-machine-learning\/#respond"]}]},{"@type":"WebPage","@id":"https:\/\/data-flair.training\/blogs\/how-to-split-a-dataset-into-train-and-test-sets-in-machine-learning\/","url":"https:\/\/data-flair.training\/blogs\/how-to-split-a-dataset-into-train-and-test-sets-in-machine-learning\/","name":"How to Split a Dataset into Train and Test Sets in Machine Learning - DataFlair","isPartOf":{"@id":"https:\/\/data-flair.training\/blogs\/#website"},"datePublished":"2025-06-23T09:14:45+00:00","dateModified":"2025-06-24T09:58:28+00:00","breadcrumb":{"@id":"https:\/\/data-flair.training\/blogs\/how-to-split-a-dataset-into-train-and-test-sets-in-machine-learning\/#breadcrumb"},"inLanguage":"en-US","potentialAction":[{"@type":"ReadAction","target":["https:\/\/data-flair.training\/blogs\/how-to-split-a-dataset-into-train-and-test-sets-in-machine-learning\/"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/data-flair.training\/blogs\/how-to-split-a-dataset-into-train-and-test-sets-in-machine-learning\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Blog Home","item":"https:\/\/data-flair.training\/blogs\/"},{"@type":"ListItem","position":2,"name":"Machine Learning Tutorials","item":"https:\/\/data-flair.training\/blogs\/category\/machine-learning\/"},{"@type":"ListItem","position":3,"name":"How to Split a Dataset into Train and Test Sets in Machine Learning"}]},{"@type":"WebSite","@id":"https:\/\/data-flair.training\/blogs\/#website","url":"https:\/\/data-flair.training\/blogs\/","name":"DataFlair","description":"Learn Today. Lead Tomorrow.","publisher":{"@id":"https:\/\/data-flair.training\/blogs\/#organization"},"potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/data-flair.training\/blogs\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"en-US"},{"@type":"Organization","@id":"https:\/\/data-flair.training\/blogs\/#organization","name":"DataFlair","url":"https:\/\/data-flair.training\/blogs\/","logo":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/logo\/image\/","url":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/Data-Flair.png","contentUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/Data-Flair.png","width":106,"height":48,"caption":"DataFlair"},"image":{"@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/logo\/image\/"},"sameAs":["https:\/\/www.facebook.com\/DataFlairWS\/","https:\/\/x.com\/DataFlairWS","https:\/\/www.linkedin.com\/company\/dataflair-web-services-pvt-ltd\/","https:\/\/www.youtube.com\/user\/DataFlairWS"]},{"@type":"Person","@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/person\/c187795dc82ab948373cca526df7c445","name":"DataFlair Team","image":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/secure.gravatar.com\/avatar\/2302ebc438084d2f1f993edc1996a0aae01332e81f3227cba8df0c48ec010ca4?s=96&d=mm&r=g","url":"https:\/\/secure.gravatar.com\/avatar\/2302ebc438084d2f1f993edc1996a0aae01332e81f3227cba8df0c48ec010ca4?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/2302ebc438084d2f1f993edc1996a0aae01332e81f3227cba8df0c48ec010ca4?s=96&d=mm&r=g","caption":"DataFlair Team"},"description":"DataFlair Team provides high-impact content on programming, Java, Python, C++, DSA, AI, ML, data Science, Android, Flutter, MERN, Web Development, and technology. We make complex concepts easy to grasp, helping learners of all levels succeed in their tech careers.","url":"https:\/\/data-flair.training\/blogs\/author\/dfteam6\/"}]}},"amp_enabled":true,"_links":{"self":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts\/145501","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/users\/581"}],"replies":[{"embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/comments?post=145501"}],"version-history":[{"count":4,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts\/145501\/revisions"}],"predecessor-version":[{"id":145622,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts\/145501\/revisions\/145622"}],"wp:attachment":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/media?parent=145501"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/categories?post=145501"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/tags?post=145501"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}