I have inherited a cluster of mess and trying to figure out if this is the correct way to setup spark.
Goal: we have approx 13M records in Mysql across several tables that needs to be index in ElasticSearch. We have similarly have several million records in Cassandra that needs to be index in Elasticsearch also.
We do a total reindexation because a lot data gets changed in the DB and currently the application doesn't crud to ES.
All setup are on AWS Ec2 clusters with some RDS for MYSQL.
- I have 4 EC2 (m4.xlarge) machines running ElasticSearch Nodes.
- I have 3 EC2 machines running Cassandra Nodes
- I have 1 Master + 1 Slave RDS running MySQL.
- I have 1 C3.XLarge EC2 with Spark setup (everything is actually default configuration).
The scala application that was developed takes the mysql data, runs a mapping and transform them into RDD and push to the ES. Same goes for Cassandra.
Issue: CPU + I/O taps out the ElasticSearch machines and I am at 100%. The entire process takes several hours.
Before I optimize code - should i be running the Spark system on standalone or should I be using it differently and why (so i can tell the IT I need to deploy or change the servers)
Thanks
UPDATE
{ "timestamp": 1470397729366, "name": "es-server-1", "transport_address": "inet[/XXX.xx.xx.XX:9300]", "host": "es-server-1", "ip": [ "inet[/xxx.xx.xx.XX:9300]", "NONE" ], "indices": { "docs": { "count": 13297950, "deleted": 4132597 }, "store": { "size": "19gb", "size_in_bytes": 20405153604, "throttle_time": "8.5h", "throttle_time_in_millis": 30887045 }, "indexing": { "index_total": 1306040463, "index_time": "39.4d", "index_time_in_millis": 3409319879, "index_current": 0, "delete_total": 3176, "delete_time": "468ms", "delete_time_in_millis": 468, "delete_current": 0, "noop_update_total": 0, "is_throttled": false, "throttle_time": "3.7h", "throttle_time_in_millis": 13387134 }, "get": { "total": 0, "get_time": "0s", "time_in_millis": 0, "exists_total": 0, "exists_time": "0s", "exists_time_in_millis": 0, "missing_total": 0, "missing_time": "0s", "missing_time_in_millis": 0, "current": 0 }, "search": { "open_contexts": 2, "query_total": 91817250, "query_time": "7.7d", "query_time_in_millis": 669964087, "query_current": 0, "fetch_total": 6111935, "fetch_time": "26.8m", "fetch_time_in_millis": 1609543, "fetch_current": 0 }, "merges": { "current": 1, "current_docs": 140211, "current_size": "358.5mb", "current_size_in_bytes": 375921439, "total": 1308682, "total_time": "52.2d", "total_time_in_millis": 4515401497, "total_docs": 22683582490, "total_size": "18.3tb", "total_size_in_bytes": 20196592980131 }, "refresh": { "total": 6513707, "total_time": "5.9d", "total_time_in_millis": 515651065 }, "flush": { "total": 49408, "total_time": "1.7d", "total_time_in_millis": 147150501 }, "warmer": { "current": 0, "total": 1696226, "total_time": "26.4m", "total_time_in_millis": 1589979 }, "filter_cache": { "memory_size": "429.2mb", "memory_size_in_bytes": 450131652, "evictions": 1767369 }, "id_cache": { "memory_size": "0b", "memory_size_in_bytes": 0 }, "fielddata": { "memory_size": "411mb", "memory_size_in_bytes": 431001032, "evictions": 0 }, "percolate": { "total": 0, "get_time": "0s", "time_in_millis": 0, "current": 0, "memory_size_in_bytes": -1, "memory_size": "-1b", "queries": 0 }, "completion": { "size": "0b", "size_in_bytes": 0 }, "segments": { "count": 272, "memory": "113.4mb", "memory_in_bytes": 118989720, "index_writer_memory": "0b", "index_writer_memory_in_bytes": 0, "index_writer_max_memory": "813.7mb", "index_writer_max_memory_in_bytes": 853250866, "version_map_memory": "0b", "version_map_memory_in_bytes": 0, "fixed_bit_set": "32.1mb", "fixed_bit_set_memory_in_bytes": 33692696 }, "translog": { "operations": 0, "size": "17b", "size_in_bytes": 17 }, "suggest": { "total": 4, "time": "0s", "time_in_millis": 0, "current": 0 }, "query_cache": { "memory_size": "0b", "memory_size_in_bytes": 0, "evictions": 0, "hit_count": 0, "miss_count": 0 } }, "os": { "timestamp": 1470397728691, "uptime": "4.2h", "uptime_in_millis": 15430550, "load_average": [ 0.01, 0.13, 0.17 ], "cpu": { "sys": 0, "user": 0, "idle": 98, "usage": 0, "stolen": 0 }, "mem": { "free": "1.5gb", "free_in_bytes": 1627320320, "used": "14.1gb", "used_in_bytes": 15200206848, "free_percent": 42, "used_percent": 57, "actual_free": "6.6gb", "actual_free_in_bytes": 7102046208, "actual_used": "9gb", "actual_used_in_bytes": 9725480960 }, "swap": { "used": "0b", "used_in_bytes": 0, "free": "0b", "free_in_bytes": 0 } }, "process": { "timestamp": 1470397729391, "open_file_descriptors": 1072, "cpu": { "percent": 8, "sys": "2.8d", "sys_in_millis": 247968490, "user": "55.5d", "user_in_millis": 4803132120, "total": "58.4d", "total_in_millis": 5051100610 }, "mem": { "resident": "8.8gb", "resident_in_bytes": 9499480064, "share": "490.4mb", "share_in_bytes": 514248704, "total_virtual": "13gb", "total_virtual_in_bytes": 14003888128 } }, "jvm": { "timestamp": 1470397728691, "uptime": "168.6d", "uptime_in_millis": 14575120452, "mem": { "heap_used": "4gb", "heap_used_in_bytes": 4337667264, "heap_used_percent": 51, "heap_committed": "7.8gb", "heap_committed_in_bytes": 8378908672, "heap_max": "7.8gb", "heap_max_in_bytes": 8378908672, "non_heap_used": "90.5mb", "non_heap_used_in_bytes": 94943920, "non_heap_committed": "112mb", "non_heap_committed_in_bytes": 117506048, "pools": { "young": { "used": "263mb", "used_in_bytes": 275871008, "max": "266.2mb", "max_in_bytes": 279183360, "peak_used": "266.2mb", "peak_used_in_bytes": 279183360, "peak_max": "266.2mb", "peak_max_in_bytes": 279183360 }, "survivor": { "used": "513.3kb", "used_in_bytes": 525696, "max": "33.2mb", "max_in_bytes": 34865152, "peak_used": "33.2mb", "peak_used_in_bytes": 34865152, "peak_max": "33.2mb", "peak_max_in_bytes": 34865152 }, "old": { "used": "3.7gb", "used_in_bytes": 4061270560, "max": "7.5gb", "max_in_bytes": 8064860160, "peak_used": "5.8gb", "peak_used_in_bytes": 6271548128, "peak_max": "7.5gb", "peak_max_in_bytes": 8064860160 } } }, "threads": { "count": 75, "peak_count": 90 }, "gc": { "collectors": { "young": { "collection_count": 2827443, "collection_time": "22.1h", "collection_time_in_millis": 79772838 }, "old": { "collection_count": 2420, "collection_time": "4.2m", "collection_time_in_millis": 253792 } } }, "buffer_pools": { "direct": { "count": 3519, "used": "82.1mb", "used_in_bytes": 86127850, "total_capacity": "82.1mb", "total_capacity_in_bytes": 86127850 }, "mapped": { "count": 250, "used": "2.8gb", "used_in_bytes": 3045093527, "total_capacity": "2.8gb", "total_capacity_in_bytes": 3045093527 } } }, "thread_pool": { "generic": { "threads": 1, "queue": 0, "active": 0, "rejected": 0, "largest": 4, "completed": 16062931 }, "index": { "threads": 4, "queue": 0, "active": 0, "rejected": 0, "largest": 4, "completed": 35253 }, "bench": { "threads": 0, "queue": 0, "active": 0, "rejected": 0, "largest": 0, "completed": 0 }, "get": { "threads": 0, "queue": 0, "active": 0, "rejected": 0, "largest": 0, "completed": 0 }, "snapshot": { "threads": 0, "queue": 0, "active": 0, "rejected": 0, "largest": 0, "completed": 0 }, "merge": { "threads": 1, "queue": 0, "active": 0, "rejected": 0, "largest": 2, "completed": 3112788 }, "suggest": { "threads": 4, "queue": 0, "active": 0, "rejected": 0, "largest": 4, "completed": 4 }, "bulk": { "threads": 4, "queue": 0, "active": 0, "rejected": 0, "largest": 4, "completed": 46447866 }, "optimize": { "threads": 0, "queue": 0, "active": 0, "rejected": 0, "largest": 0, "completed": 0 }, "warmer": { "threads": 1, "queue": 0, "active": 0, "rejected": 0, "largest": 2, "completed": 159210301 }, "flush": { "threads": 1, "queue": 0, "active": 0, "rejected": 0, "largest": 2, "completed": 756948 }, "search": { "threads": 12, "queue": 0, "active": 0, "rejected": 0, "largest": 12, "completed": 133987209 }, "listener": { "threads": 2, "queue": 0, "active": 0, "rejected": 0, "largest": 2, "completed": 26868988 }, "percolate": { "threads": 0, "queue": 0, "active": 0, "rejected": 0, "largest": 0, "completed": 0 }, "management": { "threads": 5, "queue": 0, "active": 1, "rejected": 0, "largest": 5, "completed": 227031495 }, "refresh": { "threads": 1, "queue": 0, "active": 0, "rejected": 0, "largest": 2, "completed": 6532756 } }, "network": { "tcp": { "active_opens": 1782589, "passive_opens": 20362132, "curr_estab": 226, "in_segs": 2535219205, "out_segs": 2254643744, "retrans_segs": 227538, "estab_resets": 273558, "attempt_fails": 14303, "in_errs": 100, "out_rsts": 260243 } }, "fs": { "timestamp": 1470397728691, "total": { "total": "196.6gb", "total_in_bytes": 211110395904, "free": "177.4gb", "free_in_bytes": 190571290624, "available": "167.4gb", "available_in_bytes": 179800317952, "disk_reads": 567609070, "disk_writes": 216561071, "disk_io_op": 784170141, "disk_read_size": "15.6tb", "disk_read_size_in_bytes": 17215484946432, "disk_write_size": "23.2tb", "disk_write_size_in_bytes": 25537856344064, "disk_io_size": "38.8tb", "disk_io_size_in_bytes": 42753341290496, "disk_queue": "0", "disk_service_time": "0" }, "data": [ { "path": "/data/elasticsearch/data0/es-server-1/nodes/0", "mount": "/data/elasticsearch/data0", "dev": "/dev/xvdf", "total": "98.3gb", "total_in_bytes": 105555197952, "free": "88.8gb", "free_in_bytes": 95364030464, "available": "83.7gb", "available_in_bytes": 89978544128, "disk_reads": 280044788, "disk_writes": 107670720, "disk_io_op": 387715508, "disk_read_size": "7.7tb", "disk_read_size_in_bytes": 8567588643840, "disk_write_size": "11.5tb", "disk_write_size_in_bytes": 12689373003776, "disk_io_size": "19.3tb", "disk_io_size_in_bytes": 21256961647616, "disk_queue": "0", "disk_service_time": "0" }, { "path": "/data/elasticsearch/data1/es-server-1/nodes/0", "mount": "/data/elasticsearch/data1", "dev": "/dev/xvdg", "total": "98.3gb", "total_in_bytes": 105555197952, "free": "88.6gb", "free_in_bytes": 95207260160, "available": "83.6gb", "available_in_bytes": 89821773824, "disk_reads": 287564282, "disk_writes": 108890351, "disk_io_op": 396454633, "disk_read_size": "7.8tb", "disk_read_size_in_bytes": 8647896302592, "disk_write_size": "11.6tb", "disk_write_size_in_bytes": 12848483340288, "disk_io_size": "19.5tb", "disk_io_size_in_bytes": 21496379642880, "disk_queue": "0", "disk_service_time": "0" } ] }, "transport": { "server_open": 104, "rx_count": 883258049, "rx_size": "1.8tb", "rx_size_in_bytes": 2051353458225, "tx_count": 682553258, "tx_size": "1.6tb", "tx_size_in_bytes": 1804963466280 }, "http": { "current_open": 12, "total_opened": 19973834 }, "breakers": { "request": { "limit_size_in_bytes": 3351563468, "limit_size": "3.1gb", "estimated_size_in_bytes": 0, "estimated_size": "0b", "overhead": 1, "tripped": 0 }, "fielddata": { "limit_size_in_bytes": 5027345203, "limit_size": "4.6gb", "estimated_size_in_bytes": 431001032, "estimated_size": "411mb", "overhead": 1.03, "tripped": 0 }, "parent": { "limit_size_in_bytes": 5865236070, "limit_size": "5.4gb", "estimated_size_in_bytes": 431001032, "estimated_size": "411mb", "overhead": 1, "tripped": 0 } } }
0 comments:
Post a Comment