

{"id":739,"date":"2016-07-26T15:17:40","date_gmt":"2016-07-26T15:17:40","guid":{"rendered":"http:\/\/data-flair.training\/blogs\/?p=739"},"modified":"2018-11-21T16:28:41","modified_gmt":"2018-11-21T10:58:41","slug":"apache-storm-vs-spark-streaming","status":"publish","type":"post","link":"https:\/\/data-flair.training\/blogs\/apache-storm-vs-spark-streaming\/","title":{"rendered":"Apache Storm vs Spark Streaming &#8211; Feature wise Comparison"},"content":{"rendered":"<div class='__iawmlf-post-loop-links' style='display:none;' data-iawmlf-post-links='[{&quot;id&quot;:2354,&quot;href&quot;:&quot;http:\\\/\\\/spark.apache.org&quot;,&quot;archived_href&quot;:&quot;http:\\\/\\\/web-wp.archive.org\\\/web\\\/20251009215151\\\/https:\\\/\\\/spark.apache.org\\\/&quot;,&quot;redirect_href&quot;:&quot;&quot;,&quot;checks&quot;:[{&quot;date&quot;:&quot;2025-12-11 04:17:36&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-14 07:11:19&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-17 07:55:29&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-20 14:34:27&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-23 15:49:42&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-26 15:59:57&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-30 07:08:03&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-02 07:19:25&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-05 08:37:45&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-08 09:28:47&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-11 11:37:40&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-14 12:46:43&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-17 20:26:14&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-20 20:31:00&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-24 06:20:15&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-27 06:26:56&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-30 07:17:36&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-02 07:26:54&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-05 10:18:07&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-08 12:50:55&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-11 14:05:53&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-14 15:00:31&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-18 00:17:52&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-21 06:52:12&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-24 08:35:32&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-27 08:54:29&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-02 09:01:11&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-05 09:57:46&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-08 12:27:51&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-11 12:42:39&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-14 23:54:40&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-18 03:00:10&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-21 06:08:58&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-24 07:13:58&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-27 09:23:48&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-30 11:37:48&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-02 13:11:14&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-05 14:53:20&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-08 19:36:36&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-11 23:42:38&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-15 01:00:01&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-18 06:16:05&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-21 07:55:15&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-24 09:26:05&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-27 11:00:27&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-30 12:57:25&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-03 13:36:16&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-06 19:54:59&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-10 07:47:43&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-13 09:22:32&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-16 16:11:08&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-19 16:22:48&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-22 17:30:06&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-25 20:07:42&quot;,&quot;http_code&quot;:503},{&quot;date&quot;:&quot;2026-05-29 03:42:28&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-06-01 10:44:16&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-06-04 10:50:12&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-06-07 10:53:31&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-06-10 11:32:26&quot;,&quot;http_code&quot;:206}],&quot;broken&quot;:false,&quot;last_checked&quot;:{&quot;date&quot;:&quot;2026-06-10 11:32:26&quot;,&quot;http_code&quot;:206},&quot;process&quot;:&quot;done&quot;},{&quot;id&quot;:2461,&quot;href&quot;:&quot;http:\\\/\\\/storm.apache.org&quot;,&quot;archived_href&quot;:&quot;http:\\\/\\\/web-wp.archive.org\\\/web\\\/20251003155224\\\/https:\\\/\\\/storm.apache.org\\\/&quot;,&quot;redirect_href&quot;:&quot;&quot;,&quot;checks&quot;:[{&quot;date&quot;:&quot;2025-12-11 06:35:34&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-14 21:51:02&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-18 03:40:11&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-21 21:44:51&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-25 10:19:53&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2025-12-29 14:33:26&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-01 22:53:52&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-05 08:37:45&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-10 22:52:35&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-26 09:49:57&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-01-29 21:27:38&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-20 16:20:06&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-02-25 06:52:15&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-03 10:22:26&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-12 13:58:06&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-25 05:13:31&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-03-31 05:45:51&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-05 15:35:23&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-11 20:07:02&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-04-20 08:33:48&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-09 03:54:19&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-13 14:43:22&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-05-24 01:54:42&quot;,&quot;http_code&quot;:206},{&quot;date&quot;:&quot;2026-06-09 08:12:00&quot;,&quot;http_code&quot;:206}],&quot;broken&quot;:false,&quot;last_checked&quot;:{&quot;date&quot;:&quot;2026-06-09 08:12:00&quot;,&quot;http_code&quot;:206},&quot;process&quot;:&quot;done&quot;}]'><\/div>\n<h2>1. Objective<\/h2>\n<p>This tutorial will cover the comparison between Apache Storm vs Spark Streaming. <strong>Apache Storm<\/strong> is the stream processing engine for processing real-time streaming data. While <strong><a href=\"http:\/\/data-flair.training\/blogs\/apache-spark-tutorial\/\">Apache Spark<\/a><\/strong> is general purpose computing engine. It provides <strong>Spark Streaming<\/strong>\u00a0to handle streaming data. It process data in near real-time. Let&#8217;s understand which is better in the battle of Spark vs storm.<\/p>\n<p>So, let&#8217;s start the comparison of Apache Storm vs Spark Streaming.<\/p>\n<div id=\"attachment_43194\" style=\"width: 1210px\" class=\"wp-caption aligncenter\"><a href=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/apache-spark-streaming-vs-storm-1.jpg\"><img loading=\"lazy\" decoding=\"async\" aria-describedby=\"caption-attachment-43194\" class=\"size-full wp-image-43194\" src=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/apache-spark-streaming-vs-storm-1.jpg\" alt=\"Apache Storm vs Spark Streaming - Feature wise Comparison\" width=\"1200\" height=\"628\" srcset=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/apache-spark-streaming-vs-storm-1.jpg 1200w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/apache-spark-streaming-vs-storm-1-150x79.jpg 150w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/apache-spark-streaming-vs-storm-1-300x157.jpg 300w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/apache-spark-streaming-vs-storm-1-768x402.jpg 768w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/apache-spark-streaming-vs-storm-1-1024x536.jpg 1024w, https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/apache-spark-streaming-vs-storm-1-520x272.jpg 520w\" sizes=\"auto, (max-width: 1200px) 100vw, 1200px\" \/><\/a><p id=\"caption-attachment-43194\" class=\"wp-caption-text\">Apache Storm vs Spark Streaming &#8211; Feature wise Comparison<\/p><\/div>\n<h2>2. Apache Storm vs Spark Streaming Comparison<\/h2>\n<p>The following description shows the detailed feature wise difference between Apache Storm vs Spark Streaming. These differences will help you know which is better to use between Apache Storm and Spark. Let&#8217;s have a look on each feature one by one-<\/p>\n<h3>i. Processing Model<\/h3>\n<ul>\n<li><strong>Storm:<\/strong> It supports true stream processing model through core storm layer.<\/li>\n<li><strong>Spark Streaming: <a href=\"http:\/\/data-flair.training\/blogs\/apache-spark-streaming-comprehensive-guide\/\">Apache\u00a0Spark Streaming<\/a><\/strong> is a wrapper over Spark batch processing.<\/li>\n<\/ul>\n<h3>ii. Primitives<\/h3>\n<ul>\n<li><strong>Storm:<\/strong>\u00a0It provides a very rich set of primitives to perform tuple level process at intervals of a stream (filters, functions). Aggregations over messages in a stream are possible through group by semantics. It supports left join, right join, inner join (default) across the stream.<\/li>\n<li><strong>Spark Streaming:<\/strong> It provides 2 wide varieties of operators. First is\u00a0<strong><a href=\"http:\/\/data-flair.training\/blogs\/apache-spark-streaming-transformation-operations\/\">Stream transformation operators<\/a><\/strong>\u00a0that transform one\u00a0<em>DStream<\/em>\u00a0into another <em>DStream<\/em>. Second is\u00a0<strong>output operators<\/strong>\u00a0that write information to external systems. The previous includes stateless operators (filter, map, mapPartitions, union, distinct than on) still as stateful window operators (countByWindow, reduceByWindow then on).<\/li>\n<\/ul>\n<h3>iii. State Management<\/h3>\n<ul>\n<li><strong>Storm:<\/strong> Core Storm by default doesn\u2019t offer any framework level support to store any intermediate bolt output (the result of user operation) as a state. Hence, any application has to create\/update its own state as and once required.<\/li>\n<li><strong>Spark Streaming:\u00a0<\/strong>The underlying Spark by default treats the output of every <a href=\"http:\/\/data-flair.training\/blogs\/apache-spark-rdd-tutorial\/\"><strong>RDD<\/strong><\/a> operation(<a href=\"http:\/\/data-flair.training\/blogs\/apache-spark-rdd-transformations-actions\/\">Transformations and Actions<\/a>) as an intermediate state. It stores it as\u00a0RDD. Spark Streaming permits maintaining and changing state via\u00a0<em>updateStateByKey<\/em>\u00a0API. A pluggable method couldn\u2019t be found to implement state within the external system.<\/li>\n<\/ul>\n<h3>iv. Message Delivery Guarantees (Handling message level failures)<\/h3>\n<ul>\n<li><strong>Storm:<\/strong>\u00a0It supports 3 message processing guarantees:<em>\u00a0at least once<\/em>,\u00a0<em>at-most-once<\/em>\u00a0and\u00a0<em>exactly once<\/em>. Storm\u2019s reliability mechanisms are distributed, scalable, and fault-tolerant.<\/li>\n<li><strong>Spark Streaming: <\/strong>Apache<strong>\u00a0<\/strong>Spark Streaming defines its\u00a0<a href=\"http:\/\/data-flair.training\/blogs\/apache-spark-streaming-fault-tolerance\/\">fault tolerance\u00a0<\/a>semantics, the guarantees provided by the recipient and output operators. As per the\u00a0<a href=\"http:\/\/data-flair.training\/blogs\/apache-spark-ecosystem-components\/\"><strong>Apache Spark architecture<\/strong><\/a>, the incoming data is read and replicated in different Spark executor\u2019s nodes. This generates failure scenarios data received but may not be reflected. It handles fault tolerance differently in the case of\u00a0worker failure and driver failure.<\/li>\n<\/ul>\n<h3>v. Fault Tolerance (Handling process\/node level failures)<\/h3>\n<ul>\n<li><strong>Storm: <\/strong>Storm is intended with fault-tolerance at its core. Storm daemons (<em>Nimbus and Supervisor<\/em>) are made to be fail-fast (that means that method self-destructs whenever any sudden scenario is encountered) and stateless (all state is unbroken in Zookeeper or on disk).<\/li>\n<li><strong>Spark Streaming: <\/strong>The Driver Node (an equivalent of JT) is\u00a0<em>SPOF<\/em>. If driver node fails, then all executors will be lost with their received and replicated in-memory information. Hence, Spark Streaming uses\u00a0<em>data checkpointing <\/em>to get over from driver failure.<\/li>\n<\/ul>\n<h3>vi. Debuggability and Monitoring<\/h3>\n<ul>\n<li><strong>Storm:<\/strong>\u00a0Apache Storm UI support image of every topology; with the entire break-up of internal spouts and bolts. UI additionally contributes information having any errors coming in tasks and fine-grained stats on the throughput and latency of every part of the running topology. It helps in debugging problems at a high level.<em><strong>\u00a0Metric based monitoring:<\/strong><\/em>\u00a0Storm\u2019s inbuilt metrics feature supports framework level for applications to emit any metrics, which can simply integrate with external metrics\/monitoring systems.<\/li>\n<li><strong>Spark Streaming:<\/strong> Spark web UI displays an extra Streaming tab that shows statistics of running receivers (whether receivers are active, the variety of records received, receiver error, and so on.) and completed batches (batch process times, queuing delays, and so on). It is useful to observe the execution of the application. The following 2 info in Spark web UI are significantly necessary for standardization of batch size:<\/li>\n<\/ul>\n<ol>\n<li><em>Processing Time<\/em>\u00a0\u2013 The time to process every batch of data.<\/li>\n<li><em>Scheduling Delay<\/em>\u00a0\u2013 The time a batch stays in a queue for the process previous batches to complete.<\/li>\n<\/ol>\n<h3>vii. Auto Scaling<\/h3>\n<ul>\n<li><strong>Storm:<\/strong>\u00a0It provides configuring initial parallelism at various levels per topology \u2013 variety of worker processes, executors, tasks. Additionally, it supports dynamic rebalancing, that permits to increase or reduces the number of worker processes and executors w\/o being needed to restart the cluster or the topology. But, many initial tasks designed stay constant throughout the life of topology.<br \/>\nOnce all supervisor nodes are fully saturated with worker processes, and there\u2019s a need to scale out, one merely has to begin a replacement supervisor node and inform it to cluster wide Zookeeper.<br \/>\nIt is possible to transform the logic of monitor the present resource consumption on every node in a very Storm cluster, and dynamically add a lot of resources. STORM-594 describes such auto-scaling mechanism employing a feedback system.<\/li>\n<li><strong>Spark Streaming:<\/strong> The community is currently developing on dynamic scaling to streaming applications. At the instant, elastic scaling of Spark streaming applications doesn\u2019t support.<br \/>\nEssentially, dynamic allocation doesn\u2019t mean to use in Spark streaming at the instant (1.4 or earlier). The reason is that presently the receiving topology is static. The number of receivers is fixed. One receiver allots with every DStream instantiated and it\u2019ll use one core within the cluster. Once the StreamingContext starts, this topology cannot modify. Killing receivers leads to stopping the topology.<\/li>\n<\/ul>\n<h3>viii. Yarn Integration<\/h3>\n<ul>\n<li><strong>Storm: <\/strong>The Storm integration alongside\u00a0<a href=\"http:\/\/data-flair.training\/blogs\/hadoop-yarn-tutorial\/\"><strong>YARN<\/strong><\/a>\u00a0is recommended through <em><strong>Apache Slider<\/strong><\/em>. A slider is a YARN application that deploys non-YARN distributed applications over a YARN cluster. It interacts with\u00a0<a href=\"http:\/\/data-flair.training\/blogs\/hadoop-yarn-resource-manager-guide-tutorial\/\"><strong>YARN RM<\/strong><\/a>\u00a0to spawn <em>containers<\/em> for distributed application then manages the lifecycle of these containers. Slider provides out-of-the-box application packages for Storm.<\/li>\n<li><strong>Spark Streaming: <\/strong>Spark framework provides native integration along with YARN. Spark streaming as a layer above Spark merely leverages the integration. Every Spark streaming application reproduces as an individual Yarn application. The <em>ApplicationMaster<\/em> <em>container<\/em> runs the Spark driver and initializes the <a href=\"http:\/\/data-flair.training\/blogs\/sparkcontext-in-apache-spark-tutorial\/\"><strong>SparkContext<\/strong><\/a>. Every <em>executor<\/em>\u00a0and <em>receiver<\/em> run in containers managed by <em>ApplicationMaster<\/em>. The ApplicationMaster then periodically submits one job per micro-batch on the YARN containers.<\/li>\n<\/ul>\n<h3>ix. Isolation<\/h3>\n<ul>\n<li><strong>Storm:<\/strong> Each employee process runs executors for a particular topology. That\u2019s mixing of various topology tasks doesn\u2019t allow at worker process level which supports topology level runtime isolation. Further, every executor thread runs one or more tasks of an identical element (spout or bolt), that\u2019s no admixture of tasks across elements.<\/li>\n<li><strong>Spark Streaming: <\/strong>Spark application is a different application run on YARN cluster, wherever every executor runs in a different YARN container. Thus, JVM level isolation is provided by Yarn since 2 totally different topologies can\u2019t execute in same JVM. Besides, YARN provides resource level isolation so that container level resource constraints (CPU, memory limits) can be organized.<\/li>\n<\/ul>\n<h3>x. Open Source Apache Community<\/h3>\n<ul>\n<li><strong>Storm:<\/strong>\u00a0Apache Storm powered-by page healthy list of corporations that are running Storm in production for many use-cases. Many of them are large-scale web deployments that are pushing the boundaries for performance and scale. For instance, Yahoo reading consists of two, 300 nodes running Storm for near-real-time event process, with the largest topology spanning across four hundred nodes.<\/li>\n<li><strong>Spark Streaming: <\/strong>Apache<strong>\u00a0<\/strong>Spark streaming remains rising and has restricted expertise in production clusters. But, the general umbrella Apache Spark community is well one in all the biggest and thus the most active open supply communities out there nowadays. The general charter is space evolving given the massive developer base. this could cause maturity of Spark Streaming within the close to future.<\/li>\n<\/ul>\n<h3>xi. Ease of development<\/h3>\n<ul>\n<li><strong>Storm:<\/strong>\u00a0It provides extremely easy, rich and intuitive APIs that simply describe the<a href=\"http:\/\/data-flair.training\/blogs\/directed-acyclic-graph-dag-in-apache-spark\/\"><strong> DAG<\/strong><\/a> nature of process flow (topology). The Storm tuples, which give the abstraction of data flowing between nodes within the DAG, are dynamically written. The motivation there\u2019s to change the APIs for simple use. Any new custom tuple can be plugged in once registering its <strong>Kryo serializer<\/strong>. Developers will begin with writing topologies and run them in native cluster mode. In local mode, threads are used to simulate worker nodes, permitting the developer to set breakpoints, halt the execution, examine variables, and profile before deploying it to a distributed cluster wherever all this is often way tougher.<\/li>\n<li><strong>Spark Streaming:<\/strong>\u00a0It offers\u00a0<strong><a href=\"http:\/\/data-flair.training\/blogs\/why-you-should-learn-scala-introductory-tutorial\/\">Scala<\/a><\/strong>\u00a0and <strong>Java<\/strong> APIs that have a lot of a practical programming (transformation of data). As a result, the topology code is way a lot of elliptic. There\u2019s an upscale set of API documentation and illustrative samples on the market for the developer.<\/li>\n<\/ul>\n<h3>xii. Ease of Operability<\/h3>\n<ul>\n<li><strong>Storm: <\/strong>It is little tricky to deploy\/install Storm through many tools (puppets, and then on ) and deploys the cluster. Apache Storm contains a dependency on <em>Zookeeper cluster.<\/em>\u00a0So that it can meet coordination over clusters, store state and statistics. It implements CLI support to install actions like submit, activate, deactivate, list, kill topology. a powerful fault tolerance suggests that any daemon period of time doesn\u2019t impact executing topology.<br \/>\nIn <em>standalone mode<\/em>, Storm daemons are compel to run in supervised mode. <span class=\"hardreadability\">In<em> YARN cluster mode<\/em>, Storm daemons emerged as containers and driven by Application Master (Slider)<\/span>.<\/li>\n<li><strong>Spark Streaming:<\/strong>\u00a0It uses Spark as the fundamental execution framework. It should be easy to feed up\u00a0<strong><a href=\"http:\/\/data-flair.training\/blogs\/install-apache-spark-multi-node-cluster\/\">Spark cluster<\/a><\/strong>\u00a0on YARN. There are many deployment requirements. Usually we enable checkpointing for fault tolerance of application driver. This could bring a dependency on fault-tolerant storage (<a href=\"http:\/\/data-flair.training\/blogs\/comprehensive-hdfs-guide-introduction-architecture-data-read-write-tutorial\/\"><strong>HDFS<\/strong><\/a>).<\/li>\n<\/ul>\n<h3>xiv. Language Options<\/h3>\n<ul>\n<li><strong>Storm:<\/strong> We can create Storm applications in <em>Java, Clojure, and Scala.<\/em><\/li>\n<li><strong>Spark Streaming:<\/strong> We can create Spark applications in <em>Java, Scala, Python, and <a href=\"http:\/\/data-flair.training\/blogs\/r-programming-tutorial\/\"><strong>R.<\/strong><\/a><\/em><\/li>\n<\/ul>\n<p>So, this was all in Apache Storm vs Spark Streaming. Hope you like the explanation<\/p>\n<h2>3. Conclusion &#8211; Apache Storm vs Spark Streaming<\/h2>\n<p>Hence, the difference between Apache Storm vs Spark Streaming shows that Apache Storm is a solution for real-time stream processing. But Storm is very complex for developers to develop applications. Very few resources available in the market for it.<br \/>\nStorm\u00a0can solve only one type of problem i.e Stream processing. But the industry needs a generalized solution which can solve all the types of problems. For example Batch processing, stream processing interactive processing as well as iterative processing. Here<strong> <a href=\"http:\/\/data-flair.training\/blogs\/apache-spark-introduction-spark-comprehensive-tutorial\/\">Apache Spark <\/a><\/strong>comes into limelight which is a general purpose computation engine. It can handle any type of problem. Apart from this Apache Spark is much too easy for developers and can integrate very well with <strong><a href=\"http:\/\/data-flair.training\/blogs\/hadoop-introduction-comprehensive-tutorial-guide-beginners\/\">Hadoop<\/a>.<\/strong><br \/>\nIf you feel like something is missing in above article of Apache Storm vs Spark Streaming. So, please drop a comment.<br \/>\n<strong>See Also-<\/strong><br \/>\n<a href=\"http:\/\/data-flair.training\/blogs\/hadoop-vs-spark-vs-flink-comparison\/\">Apache Hadoop vs Spark vs Flink.<\/a><br \/>\nReference:<br \/>\n<a href=\"http:\/\/spark.apache.org\/\">http:\/\/spark.apache.org\/<\/a><br \/>\n<a href=\"http:\/\/storm.apache.org\/\">http:\/\/storm.apache.org\/<\/a><\/p>\n","protected":false},"excerpt":{"rendered":"<p>1. Objective This tutorial will cover the comparison between Apache Storm vs Spark Streaming. Apache Storm is the stream processing engine for processing real-time streaming data. While Apache Spark is general purpose computing engine.&#46;&#46;&#46;<\/p>\n","protected":false},"author":6,"featured_media":43194,"comment_status":"open","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[10,69],"tags":[896,976,1907,4548,11396,13130,13139,13149,13884],"class_list":["post-739","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-spark","category-storm","tag-apache-spark","tag-apache-storm","tag-big-data","tag-fault-tolerance","tag-real-time-processing","tag-spark-streaming","tag-spark-training","tag-spark-vs-storm","tag-stream-processing"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v27.4 - https:\/\/yoast.com\/product\/yoast-seo-wordpress\/ -->\n<title>Apache Storm vs Spark Streaming - Feature wise Comparison - DataFlair<\/title>\n<meta name=\"description\" content=\"Comparison between Apache Storm vs Spark Streaming to learn how Spark is better than Storm, Apache Spark advantage over Apache Storm\" \/>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/data-flair.training\/blogs\/apache-storm-vs-spark-streaming\/\" \/>\n<meta property=\"og:locale\" content=\"en_US\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"Apache Storm vs Spark Streaming - Feature wise Comparison - DataFlair\" \/>\n<meta property=\"og:description\" content=\"Comparison between Apache Storm vs Spark Streaming to learn how Spark is better than Storm, Apache Spark advantage over Apache Storm\" \/>\n<meta property=\"og:url\" content=\"https:\/\/data-flair.training\/blogs\/apache-storm-vs-spark-streaming\/\" \/>\n<meta property=\"og:site_name\" content=\"DataFlair\" \/>\n<meta property=\"article:publisher\" content=\"https:\/\/www.facebook.com\/DataFlairWS\/\" \/>\n<meta property=\"article:published_time\" content=\"2016-07-26T15:17:40+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2018-11-21T10:58:41+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/apache-spark-streaming-vs-storm-1.jpg\" \/>\n\t<meta property=\"og:image:width\" content=\"1200\" \/>\n\t<meta property=\"og:image:height\" content=\"628\" \/>\n\t<meta property=\"og:image:type\" content=\"image\/jpeg\" \/>\n<meta name=\"author\" content=\"DataFlair Team\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:creator\" content=\"@DataFlairWS\" \/>\n<meta name=\"twitter:site\" content=\"@DataFlairWS\" \/>\n<meta name=\"twitter:label1\" content=\"Written by\" \/>\n\t<meta name=\"twitter:data1\" content=\"DataFlair Team\" \/>\n\t<meta name=\"twitter:label2\" content=\"Est. reading time\" \/>\n\t<meta name=\"twitter:data2\" content=\"9 minutes\" \/>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"Apache Storm vs Spark Streaming - Feature wise Comparison - DataFlair","description":"Comparison between Apache Storm vs Spark Streaming to learn how Spark is better than Storm, Apache Spark advantage over Apache Storm","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/data-flair.training\/blogs\/apache-storm-vs-spark-streaming\/","og_locale":"en_US","og_type":"article","og_title":"Apache Storm vs Spark Streaming - Feature wise Comparison - DataFlair","og_description":"Comparison between Apache Storm vs Spark Streaming to learn how Spark is better than Storm, Apache Spark advantage over Apache Storm","og_url":"https:\/\/data-flair.training\/blogs\/apache-storm-vs-spark-streaming\/","og_site_name":"DataFlair","article_publisher":"https:\/\/www.facebook.com\/DataFlairWS\/","article_published_time":"2016-07-26T15:17:40+00:00","article_modified_time":"2018-11-21T10:58:41+00:00","og_image":[{"width":1200,"height":628,"url":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/apache-spark-streaming-vs-storm-1.jpg","type":"image\/jpeg"}],"author":"DataFlair Team","twitter_card":"summary_large_image","twitter_creator":"@DataFlairWS","twitter_site":"@DataFlairWS","twitter_misc":{"Written by":"DataFlair Team","Est. reading time":"9 minutes"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"Article","@id":"https:\/\/data-flair.training\/blogs\/apache-storm-vs-spark-streaming\/#article","isPartOf":{"@id":"https:\/\/data-flair.training\/blogs\/apache-storm-vs-spark-streaming\/"},"author":{"name":"DataFlair Team","@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/person\/2c58ecb4f73a39f0ef993f1ddfcd7b89"},"headline":"Apache Storm vs Spark Streaming &#8211; Feature wise Comparison","datePublished":"2016-07-26T15:17:40+00:00","dateModified":"2018-11-21T10:58:41+00:00","mainEntityOfPage":{"@id":"https:\/\/data-flair.training\/blogs\/apache-storm-vs-spark-streaming\/"},"wordCount":1808,"commentCount":15,"publisher":{"@id":"https:\/\/data-flair.training\/blogs\/#organization"},"image":{"@id":"https:\/\/data-flair.training\/blogs\/apache-storm-vs-spark-streaming\/#primaryimage"},"thumbnailUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/apache-spark-streaming-vs-storm-1.jpg","keywords":["apache spark","apache storm","big data","fault tolerance","real time processing","spark streaming","spark training","spark vs storm","stream processing"],"articleSection":["Apache Spark Tutorials","Storm Tutorials"],"inLanguage":"en-US","potentialAction":[{"@type":"CommentAction","name":"Comment","target":["https:\/\/data-flair.training\/blogs\/apache-storm-vs-spark-streaming\/#respond"]}]},{"@type":"WebPage","@id":"https:\/\/data-flair.training\/blogs\/apache-storm-vs-spark-streaming\/","url":"https:\/\/data-flair.training\/blogs\/apache-storm-vs-spark-streaming\/","name":"Apache Storm vs Spark Streaming - Feature wise Comparison - DataFlair","isPartOf":{"@id":"https:\/\/data-flair.training\/blogs\/#website"},"primaryImageOfPage":{"@id":"https:\/\/data-flair.training\/blogs\/apache-storm-vs-spark-streaming\/#primaryimage"},"image":{"@id":"https:\/\/data-flair.training\/blogs\/apache-storm-vs-spark-streaming\/#primaryimage"},"thumbnailUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/apache-spark-streaming-vs-storm-1.jpg","datePublished":"2016-07-26T15:17:40+00:00","dateModified":"2018-11-21T10:58:41+00:00","description":"Comparison between Apache Storm vs Spark Streaming to learn how Spark is better than Storm, Apache Spark advantage over Apache Storm","breadcrumb":{"@id":"https:\/\/data-flair.training\/blogs\/apache-storm-vs-spark-streaming\/#breadcrumb"},"inLanguage":"en-US","potentialAction":[{"@type":"ReadAction","target":["https:\/\/data-flair.training\/blogs\/apache-storm-vs-spark-streaming\/"]}]},{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/data-flair.training\/blogs\/apache-storm-vs-spark-streaming\/#primaryimage","url":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/apache-spark-streaming-vs-storm-1.jpg","contentUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/apache-spark-streaming-vs-storm-1.jpg","width":1200,"height":628,"caption":"Apache Storm vs Spark Streaming - Feature wise Comparison"},{"@type":"BreadcrumbList","@id":"https:\/\/data-flair.training\/blogs\/apache-storm-vs-spark-streaming\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Blog Home","item":"https:\/\/data-flair.training\/blogs\/"},{"@type":"ListItem","position":2,"name":"Apache Spark Tutorials","item":"https:\/\/data-flair.training\/blogs\/category\/spark\/"},{"@type":"ListItem","position":3,"name":"Apache Storm vs Spark Streaming &#8211; Feature wise Comparison"}]},{"@type":"WebSite","@id":"https:\/\/data-flair.training\/blogs\/#website","url":"https:\/\/data-flair.training\/blogs\/","name":"DataFlair","description":"Learn Today. Lead Tomorrow.","publisher":{"@id":"https:\/\/data-flair.training\/blogs\/#organization"},"potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/data-flair.training\/blogs\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"en-US"},{"@type":"Organization","@id":"https:\/\/data-flair.training\/blogs\/#organization","name":"DataFlair","url":"https:\/\/data-flair.training\/blogs\/","logo":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/logo\/image\/","url":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/Data-Flair.png","contentUrl":"https:\/\/data-flair.training\/blogs\/wp-content\/uploads\/sites\/2\/2016\/07\/Data-Flair.png","width":106,"height":48,"caption":"DataFlair"},"image":{"@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/logo\/image\/"},"sameAs":["https:\/\/www.facebook.com\/DataFlairWS\/","https:\/\/x.com\/DataFlairWS","https:\/\/www.linkedin.com\/company\/dataflair-web-services-pvt-ltd\/","https:\/\/www.youtube.com\/user\/DataFlairWS"]},{"@type":"Person","@id":"https:\/\/data-flair.training\/blogs\/#\/schema\/person\/2c58ecb4f73a39f0ef993f1ddfcd7b89","name":"DataFlair Team","image":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/secure.gravatar.com\/avatar\/1ce4a0e3e542444fc73bbebf83e89e8b73e2d95ccb1fcee64da9945f078b97c5?s=96&d=mm&r=g","url":"https:\/\/secure.gravatar.com\/avatar\/1ce4a0e3e542444fc73bbebf83e89e8b73e2d95ccb1fcee64da9945f078b97c5?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/1ce4a0e3e542444fc73bbebf83e89e8b73e2d95ccb1fcee64da9945f078b97c5?s=96&d=mm&r=g","caption":"DataFlair Team"},"description":"The DataFlair Team provides industry-driven content on programming, Java, Python, C++, DSA, AI, ML, data Science, Android, Flutter, MERN, Web Development, and technology. Our expert educators focus on delivering value-packed, easy-to-follow resources for tech enthusiasts and professionals.","url":"https:\/\/data-flair.training\/blogs\/author\/dfteam2\/"}]}},"amp_enabled":true,"_links":{"self":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts\/739","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/users\/6"}],"replies":[{"embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/comments?post=739"}],"version-history":[{"count":5,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts\/739\/revisions"}],"predecessor-version":[{"id":43195,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/posts\/739\/revisions\/43195"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/media\/43194"}],"wp:attachment":[{"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/media?parent=739"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/categories?post=739"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/data-flair.training\/blogs\/wp-json\/wp\/v2\/tags?post=739"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}