Open
Description
I am able to run a stable cluster using docker-compose with
# Compose a collection of Docker containers used by Spacejam/Madlands server
# See README in this directory
# Makes a network so the docker image can use the hosts by name
# https://en.wikipedia.org/wiki/List_of_TCP_and_UDP_port_numbers
# https://docs.docker.com/compose/networking/
# https://docs.google.com/document/d/1isfM3HI-Rxbal9l_v2dyU6pl7CZMpQ_r2irkiMag2vE/edit#heading=h.krkqmakfnk6n
version: '3.2'
services:
zookeeper:
image: wurstmeister/zookeeper
ports:
- "2181:2181"
networks:
- madlands
kafka-1:
image: iggcanada/kafka
ports:
- target: 9094
published: 9094
protocol: tcp
mode: host
environment:
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL,OUTSIDE:PLAINTEXT
KAFKA_LISTENERS: PLAINTEXT://kafka-1:9092,OUTSIDE://0.0.0.0:9094
KAFKA_INTER_BROKER_PROTOCOL_VERSION: 0.11.0.0
KAFKA_DELETE_TOPIC_ENABLE: "true"
KAFKA_LOG_RETENTION_BYTES: -1
KAFKA_LOG_RETENTION_DAYS: 2
# Required because of bugs in Kafka 0.11.0.0
KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 3000
volumes:
- /var/run/docker.sock:/var/run/docker.sock
networks:
- madlands
kafka-2:
image: iggcanada/kafka
ports:
- target: 9094
published: 9095
protocol: tcp
mode: host
environment:
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL,OUTSIDE:PLAINTEXT
KAFKA_LISTENERS: PLAINTEXT://kafka-2:9092,OUTSIDE://0.0.0.0:9094
KAFKA_INTER_BROKER_PROTOCOL_VERSION: 0.11.0.0
KAFKA_DELETE_TOPIC_ENABLE: "true"
KAFKA_LOG_RETENTION_BYTES: -1
KAFKA_LOG_RETENTION_DAYS: 2
# Required because of bugs in Kafka 0.11.0.0
KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 3000
volumes:
- /var/run/docker.sock:/var/run/docker.sock
networks:
- madlands
kafka-3:
image: iggcanada/kafka
ports:
- target: 9094
published: 9096
protocol: tcp
mode: host
environment:
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL,OUTSIDE:PLAINTEXT
KAFKA_LISTENERS: PLAINTEXT://kafka-3:9092,OUTSIDE://0.0.0.0:9094
KAFKA_INTER_BROKER_PROTOCOL_VERSION: 0.11.0.0
KAFKA_DELETE_TOPIC_ENABLE: "true"
KAFKA_LOG_RETENTION_BYTES: -1
KAFKA_LOG_RETENTION_DAYS: 2
# Required because of bugs in Kafka 0.11.0.0
KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 3000
volumes:
- /var/run/docker.sock:/var/run/docker.sock
networks:
- madlands
# Create our named network of type bridge
# For some reason could not get server to connect to Kafka using
# the default bridge network or host network. Not sure why only
# custom bridge network works? EK
networks:
madlands:
driver: bridge
This works find on my developer system. However, when I try to get an equivalent cluster running on AWS, the cluster never becomes stable. I am using docker stack . . .
with
version: '3.2'
services:
zookeeper:
image: wurstmeister/zookeeper:latest
deploy:
replicas: 1
placement:
constraints:
- node.role == worker
ports:
- "2181:2181"
kafka1:
image: 003575935058.dkr.ecr.us-west-1.amazonaws.com/iggcanada/kafka
deploy:
replicas: 1
placement:
constraints:
- node.role == manager
ports:
- target: 9094
published: 9094
protocol: tcp
mode: host
environment:
HOSTNAME_COMMAND: "docker info | grep ^Name: | cut -d' ' -f 2"
KAFKA_ZOOKEEPER_CONNECT: kafka_zookeeper:2181
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL,OUTSIDE:PLAINTEXT
KAFKA_LISTENERS: PLAINTEXT://kafka_kafka1:9092,OUTSIDE://0.0.0.0:9094
KAFKA_INTER_BROKER_PROTOCOL_VERSION: 0.11.0.0
KAFKA_DELETE_TOPIC_ENABLE: "true"
KAFKA_LOG_RETENTION_BYTES: -1
KAFKA_LOG_RETENTION_DAYS: 2
# Required because of bugs in Kafka 0.11.0.0
KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 3000
volumes:
- /var/run/docker.sock:/var/run/docker.sock
kafka2:
image: 003575935058.dkr.ecr.us-west-1.amazonaws.com/iggcanada/kafka
deploy:
replicas: 1
placement:
constraints:
- node.role == manager
ports:
- target: 9094
published: 9094
protocol: tcp
mode: host
environment:
HOSTNAME_COMMAND: "docker info | grep ^Name: | cut -d' ' -f 2"
KAFKA_ZOOKEEPER_CONNECT: kafka_zookeeper:2181
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL,OUTSIDE:PLAINTEXT
KAFKA_LISTENERS: PLAINTEXT://kafka_kafka2:9092,OUTSIDE://0.0.0.0:9094
KAFKA_INTER_BROKER_PROTOCOL_VERSION: 0.11.0.0
KAFKA_DELETE_TOPIC_ENABLE: "true"
KAFKA_LOG_RETENTION_BYTES: -1
KAFKA_LOG_RETENTION_DAYS: 2
# Required because of bugs in Kafka 0.11.0.0
KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 3000
volumes:
- /var/run/docker.sock:/var/run/docker.sock
kafka3:
image: 003575935058.dkr.ecr.us-west-1.amazonaws.com/iggcanada/kafka
deploy:
replicas: 1
placement:
constraints:
- node.role == manager
ports:
- target: 9094
published: 9094
protocol: tcp
mode: host
environment:
HOSTNAME_COMMAND: "docker info | grep ^Name: | cut -d' ' -f 2"
KAFKA_ZOOKEEPER_CONNECT: kafka_zookeeper:2181
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL,OUTSIDE:PLAINTEXT
KAFKA_LISTENERS: PLAINTEXT://kafka_kafka3:9092,OUTSIDE://0.0.0.0:9094
KAFKA_INTER_BROKER_PROTOCOL_VERSION: 0.11.0.0
KAFKA_DELETE_TOPIC_ENABLE: "true"
KAFKA_LOG_RETENTION_BYTES: -1
KAFKA_LOG_RETENTION_DAYS: 2
# Required because of bugs in Kafka 0.11.0.0
KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 3000
volumes:
- /var/run/docker.sock:/var/run/docker.sock
This stack comes up, but what happens is:
- Two brokers seem to come up okay, but the last broker gets hung with
WARN Connection to node xxxx could not be established. Broker may not be available.
- Eventually after this container restarts a few time this broker seems to become stable. BUT, then another brokers starts failing with
WARN Connection to node xxxx could not be established. Broker may not be available.
- This pattern repeats indefinitely, and the cluster never becomes stable the way it does under
docker-compose
.
Any help would be greatly appreciated.