Repository for a stream processing solution which has been created in the context of the course "Advanced Internet Computing" at the Vienna University of Technology (Winterterm 2016)
Toolchain
- Language: Java
- Buildtool: Gradle >= 3.1
- Testcommand:
gradle test --stacktrace
plugins
- checkstyle-idea
- gradle
sudo add-apt-repository ppa:cwchien/gradle
sudo apt-get update
sudo apt install gradle default-jdk
git clone git@hyde.infosys.tuwien.ac.at:aic2016/G2T1v2.git
cd G2T1v2
mkdir bin ; cd bin
curl http://www-eu.apache.org/dist/kafka/0.10.1.0/kafka_2.11-0.10.1.0.tgz | tar --extract --gzip
curl http://www-eu.apache.org/dist/storm/apache-storm-1.0.2/apache-storm-1.0.2.tar.gz | tar --extract --gzip
cd ..
# ./gradlew test --stacktrace ## tests do not work right now
# redis
sudo apt-get install redis-server
# or get it from https://redis.io/
Adapted from Link
./bin/kafka_2.11-0.10.1.0/bin/zookeeper-server-start.sh ./conf/zookeeper.properties &
sleep 5
./bin/kafka_2.11-0.10.1.0/bin/kafka-server-start.sh ./conf/server.properties &
sleep 5
./bin/kafka_2.11-0.10.1.0/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 5 --topic taxi
./bin/apache-storm-1.0.2/bin/storm nimbus &
sleep 5
./bin/apache-storm-1.0.2/bin/storm supervisor &
sleep 5
./bin/apache-storm-1.0.2/bin/storm ui &
sleep 5
./bin/apache-storm-1.0.2/bin/storm logviewer &
sleep 5
Open WebInterface on localhost:8080
./gradlew assemble; ./gradlew stormJar
./bin/apache-storm-1.0.2/bin/storm jar build/libs/stream-processing-0.1-SNAPSHOT.jar at.ac.tuwien.aic.streamprocessing.storm.ClusterSubmitter TestName
# monitor a single component
./bin/apache-storm-1.0.2/bin/storm monitor taxicab-0_0_1 -m w-calculate-speed-bolt
./bin/apache-storm-1.0.2/bin/storm kill taxicab-0_0_1
# ^^^ will wait for topology.message.timeout.secs (30s) to allow finish processing
# e.g. Nimbus Leader NotFoundException
rm -rf /tmp/kafka-logs
./bin/kafka_2.11-0.10.1.0/bin/zookeper-shell.sh localhost:2181 rmr /brokers
2016-11-26 19:20:14.834 STDERR [INFO] Caused by: java.lang.RuntimeException: java.io.IOException: Found multiple defaults.yaml resources. You're probably bundling the Storm jars with your topology jar. [jar:file:/home/kern/Code/G2T1/apache-storm-1.0.2/lib/storm-core-1.0.2.jar!/defaults.yaml, jar:file:/home/kern/Code/G2T1/apache-storm-1.0.2/storm-local/supervisor/stormdist/taxicab-0_0_1-1-1480184405/stormjar.jar!/defaults.yaml]
# don't run StormSubmitter from IDEA
# run ./bin/apache-storm/bin/storm executable to submit the jar
Die NoSuchElementException
kommt wenn man zuwenige Values
aus einem Operator
emitted als in der Topologie gefordert.
KeeperErrorCode = NoNode for /brokers/topics/test-topic/partitions
./bin/kafka_2.11-0.10.1.0/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic taxi
ERROR org.apache.kafka.common.errors.InvalidReplicationFactorException: replication factor: 1 larger than available brokers: 0
ctrl+z
systemctl start redis-server
./bin/kafka_2.11-0.10.1.0/bin/kafka-topics.sh --alter --zookeeper localhost:2181 --partitions 5 --topic taxi
ps aux | grep kafka
ps aux | grep zookeeper
ps aux | grep redis
ps aux | grep SNAPSHOT
sudo systemctl stop redis-server
./gradlew test
The testdata (merged, sorted, with endToken) can be downloaded through this link
The workflow how the original test-data has been merged,sorted and how the end token has been added
# 0. Install sqlite3
sudo apt install sqlite3
# 1. Navigate to downloaded and extracted original test-data and merge them
copy /b *.txt testData_merged_unsorted.csv
# 2. Import test-data into sqlite
sqlite3 TaxiData < script.sqlite
#where script.sqlite contains:
create table taxiData(id integer, timestamp datetime, lat decimal, long decimal);
.mode csv
.import 'taxi_sub_data.csv' TaxiData
INSERT INTO taxiData SELECT id, datetime(max(timestamp),'+1 second') AS timestamp, 360 AS long, 360 AS lat FROM taxiData GROUP BY id;
# 3. Output query to file
sqlite3.exe -csv taxiData "SELECT * FROM taxiData ORDER BY 2" > testData_merged_sorted.csv
Simple UI tool for managing redis instances.
wget https://github.com/uglide/RedisDesktopManager/releases/download/0.9.0-alpha2/redis-desktop-manager_0.9.0.26_amd64.deb
sudo dpkg -i redis-desktop-manager_0.9.0.26_amd64.deb
#in case of dependcy errors while running above
sudo apt-get -f install
#start tool with
redis-desktop-manager
Building
sudo apt install npm
npm install bower -g
cd src/main/resources/dashboard/
bower install
npm install
Running
npm start
Fixes for potential issues
#/usr/bin/env: node: No such file or directory
ln -s /usr/bin/nodejs /usr/bin/node
#then retry
#"Cannot be run with sudo" message when exectuing sudo bower install
# retry wiht --allow-root option
sudo bower install --allow-root
#If there are problems with permissions such as "EACCES: permission denied .config/configstore/bower-github.json"
sudo chown -R $USER:$GROUP ~/.npm
sudo chown -R $USER:$GROUP ~/.config
#then run bower install again
Instructions to create the Virtual Machine as required for hand in.
sudo apt install virtualbox-qt cloud-image-utils
# adapted from https://gist.github.com/smoser/6066204
wget https://cloud-images.ubuntu.com/xenial/current/xenial-server-cloudimg-amd64-disk1.vmdk
VBoxManage clonehd \
--format VDI \
xenial-server-cloudimg-amd64-disk1.vmdk \
g2t1.vdi ;
cat > my-user-data <<EOF
#cloud-config
debug: True
disable_root: false
lock_passwd: false
password: ubuntu
chpasswd:
list: |
root:root
ubuntu:ubuntu
expire: False
ssh_pwauth: true
package_upgrade: true
apt_sources:
- source: "ppa:cwchien/gradle"
packages:
- virtualbox-guest-x11
- gradle
- default-jdk
- virtualbox-guest-dkms
- ubuntu-desktop
- firefox
- redis-server
- openjfx
- npm
- nodejs-legacy
power_state:
mode: poweroff
message: Bye Bye
EOF
cloud-localds my-seed.iso my-user-data
VBoxManage createvm --name g2t1 --ostype Ubuntu_64 --register
VBoxManage modifyvm g2t1 --memory 4096 # VBoxManage modifyvm g2t1 --memory 2048 #
VBoxManage storagectl g2t1 --name "SATA Controller" --add sata --controller IntelAHCI --portcount 2
VBoxManage storageattach g2t1 --storagectl "SATA Controller" --port 0 --device 0 --type hdd --medium g2t1.vdi
VBoxManage storageattach g2t1 --storagectl "SATA Controller" --port 1 --device 0 --type dvddrive --medium my-seed.iso
VBoxManage modifyvm g2t1 --uart1 0x3F8
VBoxManage startvm g2t1
## wait for vm to shutdown
# test for success
VBoxManage startvm g2t1
VBoxManage guestcontrol g2t1 run "/bin/date" --username root --password root
VBoxManage controlvm g2t1 poweroff
# fix /etc/hosts file
VBoxManage startvm g2t1
VBoxManage guestcontrol g2t1 run "/bin/sh" --username root --password root -- -c "cd /home/ubuntu/ \
; echo "127.0.0.1 ubuntu" >> /etc/hosts \
; shutdown -h now
;"
# import project
git archive `git rev-parse --abbrev-ref HEAD` --output project.tar
VBoxManage sharedfolder add g2t1 --name project --hostpath `pwd` --automount
VBoxManage startvm g2t1
VBoxManage guestcontrol g2t1 run "/bin/sh" --username root --password root -- -c "cd /home/ubuntu/ \
; cp /media/sf_project/project.tar /home/ubuntu/project.tar \
; chown ubuntu:ubuntu /home/ubuntu/project.tar \
;"
VBoxManage guestcontrol g2t1 run "/bin/sh" --username ubuntu --password ubuntu -- -c "cd /home/ubuntu/ \
; tar --extract --file project.tar \
; mkdir bin \
; cd bin \
; curl http://www-eu.apache.org/dist/kafka/0.10.1.0/kafka_2.11-0.10.1.0.tgz | tar --extract --gzip \
; curl http://www-eu.apache.org/dist/storm/apache-storm-1.0.2/apache-storm-1.0.2.tar.gz | tar --extract --gzip \
; cd .. \
;"
VBoxManage guestcontrol g2t1 run "/bin/sh" --username ubuntu --password ubuntu -- -c "cd /home/ubuntu/ \
; sudo apt install npm nodejs-legacy \
; cd src/main/resources/dashboard/ \
; npm install \
; npm setup \
;"
# run tests
VBoxManage guestcontrol g2t1 run "/bin/sh" --username ubuntu --password ubuntu -- -c "cd /home/ubuntu/; \
./gradlew test
"
# fetch demodata
VBoxManage guestcontrol g2t1 run "/bin/sh" --username ubuntu --password ubuntu -- -c "cd /home/ubuntu/ \
; https://www.dropbox.com/sh/rv7uambq691s44l/AAAT2N5W0EA-pJKkUp5Nqj_ba/taxi_sub_data.zip \
; unzip taxi_sub_data.zip \
; mv taxi_sub_data.csv testdata/taxi_data.csv \
;"
## or https://www.dropbox.com/sh/rv7uambq691s44l/AADHIDF0ozwcTjGJ0bMHAZdma/taxi_data.zip
VBoxManage startvm g2t1
VBoxManage guestcontrol g2t1 run "/bin/sh" --username ubuntu --password ubuntu -- -c "cd /home/ubuntu/ \
; echo 'cd ~; ./gradlew runOptimizedTopology' > ~/Desktop/startTopo.sh \
; echo 'cd ~; ./gradlew runDataprovider -Pspeed=\$1 -Pdata=./testdata/taxi_sub_data.csv' > ~/Desktop/startProvider.sh \
; echo 'cd ~/src/main/resources/dashboard/ \n npm start & \n firefox localhost:3000/optimization.html' > ~/Desktop/startUI.sh \
; chmod a+x ~/Desktop/*.sh \
;"
# TODO reset demodata
# runTopology; startDashboard; runDataprovider -Pspeed=100000
# test image import; test the vdi file
# TODO
echo "username: root \n password: root" >> g2t1.txt
run the test machine
VBoxManage startvm g2t1
# login username: ubuntu; password: ubuntu
VBoxManage controlvm g2t1 poweroff
cleanup
# delete vm and file
VBoxManage sharedfolder remove g2t1 --name project
VBoxManage unregistervm g2t1 --delete
rm xenial-server-cloudimg-amd64-disk1.vmdk
fixes
# apt install junit
systemctl redis-server stop
sudo chown --recursive ubuntu:ubuntu ./src/
./gradlew clean build
./gradlew wrapper --gradle-version=3.3
./gradlew wrapper
./gradlew clean build
# for testing install redis desktop-manager
VBoxManage guestcontrol g2t1 run "/bin/sh" --username ubuntu --password ubuntu -- -c "cd /home/ubuntu/ \
; wget https://github.com/uglide/RedisDesktopManager/releases/download/0.9.0-alpha2/redis-desktop-manager_0.9.0.26_amd64.deb \
; sudo dpkg -i redis-desktop-manager_0.9.0.26_amd64.deb \
; sudo apt -f --yes install \
; "
# npm troubles
sudo rm -rf /home/ubuntu/src/main/resources/dashboard/bower_components
sudo rm -rf /home/ubuntu/src/main/resources/dashboard/node_modules
SlideShare with good overview Trident Storm Tutorial with Votes