Sunday, August 7, 2016

Spark streaming to Elasticsearch from Cassandra and MySQL correct configuration

Leave a Comment

I have inherited a cluster of mess and trying to figure out if this is the correct way to setup spark.

Goal: we have approx 13M records in Mysql across several tables that needs to be index in ElasticSearch. We have similarly have several million records in Cassandra that needs to be index in Elasticsearch also.

We do a total reindexation because a lot data gets changed in the DB and currently the application doesn't crud to ES.

All setup are on AWS Ec2 clusters with some RDS for MYSQL.

  • I have 4 EC2 (m4.xlarge) machines running ElasticSearch Nodes.
  • I have 3 EC2 machines running Cassandra Nodes
  • I have 1 Master + 1 Slave RDS running MySQL.
  • I have 1 C3.XLarge EC2 with Spark setup (everything is actually default configuration).

The scala application that was developed takes the mysql data, runs a mapping and transform them into RDD and push to the ES. Same goes for Cassandra.

Issue: CPU + I/O taps out the ElasticSearch machines and I am at 100%. The entire process takes several hours.

Before I optimize code - should i be running the Spark system on standalone or should I be using it differently and why (so i can tell the IT I need to deploy or change the servers)

Thanks

UPDATE

{   "timestamp": 1470397729366,   "name": "es-server-1",   "transport_address": "inet[/XXX.xx.xx.XX:9300]",   "host": "es-server-1",   "ip": [     "inet[/xxx.xx.xx.XX:9300]",     "NONE"   ],   "indices": {     "docs": {       "count": 13297950,       "deleted": 4132597     },     "store": {       "size": "19gb",       "size_in_bytes": 20405153604,       "throttle_time": "8.5h",       "throttle_time_in_millis": 30887045     },     "indexing": {       "index_total": 1306040463,       "index_time": "39.4d",       "index_time_in_millis": 3409319879,       "index_current": 0,       "delete_total": 3176,       "delete_time": "468ms",       "delete_time_in_millis": 468,       "delete_current": 0,       "noop_update_total": 0,       "is_throttled": false,       "throttle_time": "3.7h",       "throttle_time_in_millis": 13387134     },     "get": {       "total": 0,       "get_time": "0s",       "time_in_millis": 0,       "exists_total": 0,       "exists_time": "0s",       "exists_time_in_millis": 0,       "missing_total": 0,       "missing_time": "0s",       "missing_time_in_millis": 0,       "current": 0     },     "search": {       "open_contexts": 2,       "query_total": 91817250,       "query_time": "7.7d",       "query_time_in_millis": 669964087,       "query_current": 0,       "fetch_total": 6111935,       "fetch_time": "26.8m",       "fetch_time_in_millis": 1609543,       "fetch_current": 0     },     "merges": {       "current": 1,       "current_docs": 140211,       "current_size": "358.5mb",       "current_size_in_bytes": 375921439,       "total": 1308682,       "total_time": "52.2d",       "total_time_in_millis": 4515401497,       "total_docs": 22683582490,       "total_size": "18.3tb",       "total_size_in_bytes": 20196592980131     },     "refresh": {       "total": 6513707,       "total_time": "5.9d",       "total_time_in_millis": 515651065     },     "flush": {       "total": 49408,       "total_time": "1.7d",       "total_time_in_millis": 147150501     },     "warmer": {       "current": 0,       "total": 1696226,       "total_time": "26.4m",       "total_time_in_millis": 1589979     },     "filter_cache": {       "memory_size": "429.2mb",       "memory_size_in_bytes": 450131652,       "evictions": 1767369     },     "id_cache": {       "memory_size": "0b",       "memory_size_in_bytes": 0     },     "fielddata": {       "memory_size": "411mb",       "memory_size_in_bytes": 431001032,       "evictions": 0     },     "percolate": {       "total": 0,       "get_time": "0s",       "time_in_millis": 0,       "current": 0,       "memory_size_in_bytes": -1,       "memory_size": "-1b",       "queries": 0     },     "completion": {       "size": "0b",       "size_in_bytes": 0     },     "segments": {       "count": 272,       "memory": "113.4mb",       "memory_in_bytes": 118989720,       "index_writer_memory": "0b",       "index_writer_memory_in_bytes": 0,       "index_writer_max_memory": "813.7mb",       "index_writer_max_memory_in_bytes": 853250866,       "version_map_memory": "0b",       "version_map_memory_in_bytes": 0,       "fixed_bit_set": "32.1mb",       "fixed_bit_set_memory_in_bytes": 33692696     },     "translog": {       "operations": 0,       "size": "17b",       "size_in_bytes": 17     },     "suggest": {       "total": 4,       "time": "0s",       "time_in_millis": 0,       "current": 0     },     "query_cache": {       "memory_size": "0b",       "memory_size_in_bytes": 0,       "evictions": 0,       "hit_count": 0,       "miss_count": 0     }   },   "os": {     "timestamp": 1470397728691,     "uptime": "4.2h",     "uptime_in_millis": 15430550,     "load_average": [       0.01,       0.13,       0.17     ],     "cpu": {       "sys": 0,       "user": 0,       "idle": 98,       "usage": 0,       "stolen": 0     },     "mem": {       "free": "1.5gb",       "free_in_bytes": 1627320320,       "used": "14.1gb",       "used_in_bytes": 15200206848,       "free_percent": 42,       "used_percent": 57,       "actual_free": "6.6gb",       "actual_free_in_bytes": 7102046208,       "actual_used": "9gb",       "actual_used_in_bytes": 9725480960     },     "swap": {       "used": "0b",       "used_in_bytes": 0,       "free": "0b",       "free_in_bytes": 0     }   },   "process": {     "timestamp": 1470397729391,     "open_file_descriptors": 1072,     "cpu": {       "percent": 8,       "sys": "2.8d",       "sys_in_millis": 247968490,       "user": "55.5d",       "user_in_millis": 4803132120,       "total": "58.4d",       "total_in_millis": 5051100610     },     "mem": {       "resident": "8.8gb",       "resident_in_bytes": 9499480064,       "share": "490.4mb",       "share_in_bytes": 514248704,       "total_virtual": "13gb",       "total_virtual_in_bytes": 14003888128     }   },   "jvm": {     "timestamp": 1470397728691,     "uptime": "168.6d",     "uptime_in_millis": 14575120452,     "mem": {       "heap_used": "4gb",       "heap_used_in_bytes": 4337667264,       "heap_used_percent": 51,       "heap_committed": "7.8gb",       "heap_committed_in_bytes": 8378908672,       "heap_max": "7.8gb",       "heap_max_in_bytes": 8378908672,       "non_heap_used": "90.5mb",       "non_heap_used_in_bytes": 94943920,       "non_heap_committed": "112mb",       "non_heap_committed_in_bytes": 117506048,       "pools": {         "young": {           "used": "263mb",           "used_in_bytes": 275871008,           "max": "266.2mb",           "max_in_bytes": 279183360,           "peak_used": "266.2mb",           "peak_used_in_bytes": 279183360,           "peak_max": "266.2mb",           "peak_max_in_bytes": 279183360         },         "survivor": {           "used": "513.3kb",           "used_in_bytes": 525696,           "max": "33.2mb",           "max_in_bytes": 34865152,           "peak_used": "33.2mb",           "peak_used_in_bytes": 34865152,           "peak_max": "33.2mb",           "peak_max_in_bytes": 34865152         },         "old": {           "used": "3.7gb",           "used_in_bytes": 4061270560,           "max": "7.5gb",           "max_in_bytes": 8064860160,           "peak_used": "5.8gb",           "peak_used_in_bytes": 6271548128,           "peak_max": "7.5gb",           "peak_max_in_bytes": 8064860160         }       }     },     "threads": {       "count": 75,       "peak_count": 90     },     "gc": {       "collectors": {         "young": {           "collection_count": 2827443,           "collection_time": "22.1h",           "collection_time_in_millis": 79772838         },         "old": {           "collection_count": 2420,           "collection_time": "4.2m",           "collection_time_in_millis": 253792         }       }     },     "buffer_pools": {       "direct": {         "count": 3519,         "used": "82.1mb",         "used_in_bytes": 86127850,         "total_capacity": "82.1mb",         "total_capacity_in_bytes": 86127850       },       "mapped": {         "count": 250,         "used": "2.8gb",         "used_in_bytes": 3045093527,         "total_capacity": "2.8gb",         "total_capacity_in_bytes": 3045093527       }     }   },   "thread_pool": {     "generic": {       "threads": 1,       "queue": 0,       "active": 0,       "rejected": 0,       "largest": 4,       "completed": 16062931     },     "index": {       "threads": 4,       "queue": 0,       "active": 0,       "rejected": 0,       "largest": 4,       "completed": 35253     },     "bench": {       "threads": 0,       "queue": 0,       "active": 0,       "rejected": 0,       "largest": 0,       "completed": 0     },     "get": {       "threads": 0,       "queue": 0,       "active": 0,       "rejected": 0,       "largest": 0,       "completed": 0     },     "snapshot": {       "threads": 0,       "queue": 0,       "active": 0,       "rejected": 0,       "largest": 0,       "completed": 0     },     "merge": {       "threads": 1,       "queue": 0,       "active": 0,       "rejected": 0,       "largest": 2,       "completed": 3112788     },     "suggest": {       "threads": 4,       "queue": 0,       "active": 0,       "rejected": 0,       "largest": 4,       "completed": 4     },     "bulk": {       "threads": 4,       "queue": 0,       "active": 0,       "rejected": 0,       "largest": 4,       "completed": 46447866     },     "optimize": {       "threads": 0,       "queue": 0,       "active": 0,       "rejected": 0,       "largest": 0,       "completed": 0     },     "warmer": {       "threads": 1,       "queue": 0,       "active": 0,       "rejected": 0,       "largest": 2,       "completed": 159210301     },     "flush": {       "threads": 1,       "queue": 0,       "active": 0,       "rejected": 0,       "largest": 2,       "completed": 756948     },     "search": {       "threads": 12,       "queue": 0,       "active": 0,       "rejected": 0,       "largest": 12,       "completed": 133987209     },     "listener": {       "threads": 2,       "queue": 0,       "active": 0,       "rejected": 0,       "largest": 2,       "completed": 26868988     },     "percolate": {       "threads": 0,       "queue": 0,       "active": 0,       "rejected": 0,       "largest": 0,       "completed": 0     },     "management": {       "threads": 5,       "queue": 0,       "active": 1,       "rejected": 0,       "largest": 5,       "completed": 227031495     },     "refresh": {       "threads": 1,       "queue": 0,       "active": 0,       "rejected": 0,       "largest": 2,       "completed": 6532756     }   },   "network": {     "tcp": {       "active_opens": 1782589,       "passive_opens": 20362132,       "curr_estab": 226,       "in_segs": 2535219205,       "out_segs": 2254643744,       "retrans_segs": 227538,       "estab_resets": 273558,       "attempt_fails": 14303,       "in_errs": 100,       "out_rsts": 260243     }   },   "fs": {     "timestamp": 1470397728691,     "total": {       "total": "196.6gb",       "total_in_bytes": 211110395904,       "free": "177.4gb",       "free_in_bytes": 190571290624,       "available": "167.4gb",       "available_in_bytes": 179800317952,       "disk_reads": 567609070,       "disk_writes": 216561071,       "disk_io_op": 784170141,       "disk_read_size": "15.6tb",       "disk_read_size_in_bytes": 17215484946432,       "disk_write_size": "23.2tb",       "disk_write_size_in_bytes": 25537856344064,       "disk_io_size": "38.8tb",       "disk_io_size_in_bytes": 42753341290496,       "disk_queue": "0",       "disk_service_time": "0"     },     "data": [       {         "path": "/data/elasticsearch/data0/es-server-1/nodes/0",         "mount": "/data/elasticsearch/data0",         "dev": "/dev/xvdf",         "total": "98.3gb",         "total_in_bytes": 105555197952,         "free": "88.8gb",         "free_in_bytes": 95364030464,         "available": "83.7gb",         "available_in_bytes": 89978544128,         "disk_reads": 280044788,         "disk_writes": 107670720,         "disk_io_op": 387715508,         "disk_read_size": "7.7tb",         "disk_read_size_in_bytes": 8567588643840,         "disk_write_size": "11.5tb",         "disk_write_size_in_bytes": 12689373003776,         "disk_io_size": "19.3tb",         "disk_io_size_in_bytes": 21256961647616,         "disk_queue": "0",         "disk_service_time": "0"       },       {         "path": "/data/elasticsearch/data1/es-server-1/nodes/0",         "mount": "/data/elasticsearch/data1",         "dev": "/dev/xvdg",         "total": "98.3gb",         "total_in_bytes": 105555197952,         "free": "88.6gb",         "free_in_bytes": 95207260160,         "available": "83.6gb",         "available_in_bytes": 89821773824,         "disk_reads": 287564282,         "disk_writes": 108890351,         "disk_io_op": 396454633,         "disk_read_size": "7.8tb",         "disk_read_size_in_bytes": 8647896302592,         "disk_write_size": "11.6tb",         "disk_write_size_in_bytes": 12848483340288,         "disk_io_size": "19.5tb",         "disk_io_size_in_bytes": 21496379642880,         "disk_queue": "0",         "disk_service_time": "0"       }     ]   },   "transport": {     "server_open": 104,     "rx_count": 883258049,     "rx_size": "1.8tb",     "rx_size_in_bytes": 2051353458225,     "tx_count": 682553258,     "tx_size": "1.6tb",     "tx_size_in_bytes": 1804963466280   },   "http": {     "current_open": 12,     "total_opened": 19973834   },   "breakers": {     "request": {       "limit_size_in_bytes": 3351563468,       "limit_size": "3.1gb",       "estimated_size_in_bytes": 0,       "estimated_size": "0b",       "overhead": 1,       "tripped": 0     },     "fielddata": {       "limit_size_in_bytes": 5027345203,       "limit_size": "4.6gb",       "estimated_size_in_bytes": 431001032,       "estimated_size": "411mb",       "overhead": 1.03,       "tripped": 0     },     "parent": {       "limit_size_in_bytes": 5865236070,       "limit_size": "5.4gb",       "estimated_size_in_bytes": 431001032,       "estimated_size": "411mb",       "overhead": 1,       "tripped": 0     }   } } 

0 Answers

If You Enjoyed This, Take 5 Seconds To Share It

0 comments:

Post a Comment