influxdb: Avoid out of memory by influxDB (#4291)

After a few hours running with InfluxDB configured, CloudStack hangs due to OutOfMemoryException raised. The exception happens at com.cloud.server.StatsCollector.writeBatches(StatsCollector.java:1510):

2020-08-12 21:19:00,972 ERROR [c.c.s.StatsCollector] (StatsCollector-6:ctx-0a4cfe6a) (logid:03a7ba48) Error trying to retrieve host stats
java.lang.OutOfMemoryError: unable to create new native thread
        ...
        at org.influxdb.impl.BatchProcessor.<init>(BatchProcessor.java:294)
        at org.influxdb.impl.BatchProcessor$Builder.build(BatchProcessor.java:201)
        at org.influxdb.impl.InfluxDBImpl.enableBatch(InfluxDBImpl.java:311)
        at com.cloud.server.StatsCollector.writeBatches(StatsCollector.java:1510)
        at com.cloud.server.StatsCollector$AbstractStatsCollector.sendMetricsToInfluxdb(StatsCollector.java:1351)
        at com.cloud.server.StatsCollector$HostCollector.runInContext(StatsCollector.java:522)
Context on InfluxDB Batch: Enabling batch on InfluxDB is great and speeds writing but it requires caution to avoid Zombie threads.

Solution: This happens because the batching feature creates an internal thread pool that needs to be shut down explicitly; therefore, it is important to add: influxDB.close().
This commit is contained in:
Gabriel Beims Bräscher 2020-09-01 07:29:43 -03:00 committed by GitHub
parent ba4b04ff37
commit 5c29d5ba45
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 22 additions and 16 deletions

View File

@ -130,7 +130,7 @@
<cs.guava.version>23.6-jre</cs.guava.version>
<cs.httpclient.version>4.5.4</cs.httpclient.version>
<cs.httpcore.version>4.4.8</cs.httpcore.version>
<cs.influxdb-java.version>2.15</cs.influxdb-java.version>
<cs.influxdb-java.version>2.20</cs.influxdb-java.version>
<cs.jackson.version>2.9.2</cs.jackson.version>
<cs.jasypt.version>1.9.2</cs.jasypt.version>
<cs.java-ipv6.version>0.16</cs.java-ipv6.version>

View File

@ -1334,21 +1334,25 @@ public class StatsCollector extends ManagerBase implements ComponentMethodInterc
protected void sendMetricsToInfluxdb(Map<Object, Object> metrics) {
InfluxDB influxDbConnection = createInfluxDbConnection();
Pong response = influxDbConnection.ping();
if (response.getVersion().equalsIgnoreCase("unknown")) {
throw new CloudRuntimeException(String.format("Cannot ping influxdb host %s:%s.", externalStatsHost, externalStatsPort));
try {
Pong response = influxDbConnection.ping();
if (response.getVersion().equalsIgnoreCase("unknown")) {
throw new CloudRuntimeException(String.format("Cannot ping influxdb host %s:%s.", externalStatsHost, externalStatsPort));
}
Collection<Object> metricsObjects = metrics.values();
List<Point> points = new ArrayList<>();
s_logger.debug(String.format("Sending stats to %s host %s:%s", externalStatsType, externalStatsHost, externalStatsPort));
for (Object metricsObject : metricsObjects) {
Point vmPoint = creteInfluxDbPoint(metricsObject);
points.add(vmPoint);
}
writeBatches(influxDbConnection, databaseName, points);
} finally {
influxDbConnection.close();
}
Collection<Object> metricsObjects = metrics.values();
List<Point> points = new ArrayList<>();
s_logger.debug(String.format("Sending stats to %s host %s:%s", externalStatsType, externalStatsHost, externalStatsPort));
for (Object metricsObject : metricsObjects) {
Point vmPoint = creteInfluxDbPoint(metricsObject);
points.add(vmPoint);
}
writeBatches(influxDbConnection, databaseName, points);
}
/**
@ -1507,7 +1511,9 @@ public class StatsCollector extends ManagerBase implements ComponentMethodInterc
*/
protected void writeBatches(InfluxDB influxDbConnection, String dbName, List<Point> points) {
BatchPoints batchPoints = BatchPoints.database(dbName).build();
influxDbConnection.enableBatch(BatchOptions.DEFAULTS);
if(!influxDbConnection.isBatchEnabled()){
influxDbConnection.enableBatch(BatchOptions.DEFAULTS);
}
for (Point point : points) {
batchPoints.point(point);