davidADSP · zorgluf · Mar 28, 2024 · Mar 28, 2024 · Apr 2, 2024 · Apr 4, 2024
diff --git a/.github/publish-image.yaml b/.github/publish-image.yaml
@@ -0,0 +1,20 @@
+name: Publish frouge image
+on:
+  push:
+    branches:
+      - 'main'
+      - 'multiple-upgrades'
+
+jobs:
+  publish-image:
+    runs-on: ubuntu-latest
+    steps:
+      - name: checkout
+        uses: actions/checkout@v3
+      - name: build
+        run: |
+          docker build . -t zorgluf/simple-play-frouge:latest -f app/Dockerfile_play_frouge
+      - name: publish
+        run: |
+          docker login -u zorgluf -p ${{ secrets.DOCKER_HUB_TOKEN }}
+          docker push zorgluf/simple-play-frouge:latest
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,6 @@
 private
 archive
+venv
 
 .DS_Store
 # Documentatino notebooks since they're copied over when the build_docs script

diff --git a/README.md b/README.md
@@ -99,13 +99,21 @@ Install [Docker](https://github.com/davidADSP/SIMPLE/issues) and [Docker Compose
    ```
 2. Build the image and 'up' the container.
    ```sh
-   docker-compose up -d
+   docker compose up -d
+   ```
+   or
+   ```sh
+   docker compose -f docker-compose-nvidia.yml up -d
    ```
 3. Choose an environment to install in the container (`tictactoe`, `connect4`, `sushigo`, `geschenkt`, `butterfly`, and `flamme rouge` are currently implemented)
    ```sh
    bash ./scripts/install_env.sh sushigo
    ```
 
+Build standalone docker play images :
+  ```sh
+  docker build . -t simple-play-frouge -f app/Dockerfile_play_frouge
+  ```
 ---
 <!-- TUTORIAL -->
 ## Tutorial
@@ -124,7 +132,12 @@ This entrypoint allows you to play against a trained AI, pit two AIs against eac
 
 For example, try the following command to play against a baseline random model in the Sushi Go environment.
    ```sh
-   docker-compose exec app python3 test.py -d -g 1 -a base base human -e sushigo 
+   docker compose exec app python3 test.py -d -g 1 -a base base human -e sushigo 
+   ```
+
+To start web-gui mode :
+  ```sh
+   docker compose exec app python3 test.py -rm human_web -g 1 -a base base human base base -e frouge 
    ```
 
 #### `train.py` 
@@ -133,8 +146,12 @@ This entrypoint allows you to start training the AI using selfplay PPO. The unde
 
 For example, you can start training the agent to learn how to play SushiGo with the following command:
    ```sh
-   docker-compose exec app python3 train.py -r -e sushigo 
+   docker compose exec app python3 train.py -r -e sushigo 
    ```
+Or on nvidia GPU :
+   ```sh
+   docker compose -f docker-compose-nvidia.yml exec app python3 train.py -r -e sushigo 
+   ``` 
 
 After 30 or 40 iterations the process should have achieved above the default threshold score of 0.2 and will output a new `best_model.zip` to the `/zoo/sushigo` folder. 
 
@@ -143,7 +160,7 @@ Training runs until you kill the process manually (e.g. with Ctrl-C), so do that
 You can now use the `test.py` entrypoint to play 100 games silently between the current `best_model.zip` and the random baselines model as follows:
 
   ```sh
-  docker-compose exec app python3 test.py -g 100 -a best_model base base -e sushigo 
+  docker compose exec app python3 test.py -g 100 -a best_model base base -e sushigo 
   ```
 
 You should see that the best_model scores better than the two baseline model opponents. 
@@ -154,7 +171,7 @@ Played 100 games: {'best_model_btkce': 31.0, 'base_sajsi': -15.5, 'base_poqaj':
 You can continue training the agent by dropping the `-r` reset flag from the `train.py` entrypoint arguments - it will just pick up from where it left off.
 
    ```sh
-   docker-compose exec app python3 train.py -e sushigo 
+   docker compose exec app python3 train.py -e sushigo 
    ```
 
 Congratulations, you've just completed one training cycle for the game Sushi Go! The PPO agent will now have to work out a way to beat the model it has just created...

diff --git a/app/Dockerfile b/app/Dockerfile
@@ -1,16 +1,6 @@
-FROM ubuntu:bionic-20200219 as base
+FROM ubuntu:jammy as base
 
-RUN apt-get update
-RUN apt-get -y install ssh
-RUN apt-get -y install python3-pip
-RUN apt-get -y install htop
-RUN apt-get -y install libpq-dev
-
-RUN apt-get update 
-RUN apt-get -y install cmake libopenmpi-dev python3-dev zlib1g-dev libgl1-mesa-dev
-
-RUN pip3 install --upgrade pip
-RUN pip3 install --upgrade setuptools
+RUN apt-get -y update && apt-get -y install python3-pip htop libpq-dev cmake libopenmpi-dev python3-dev zlib1g-dev libgl1-mesa-dev
 
 RUN useradd -ms /bin/bash selfplay
 USER selfplay
@@ -19,7 +9,8 @@ WORKDIR /app
 
 
 COPY --chown=selfplay:selfplay ./app/requirements.txt /app
-RUN pip3 install -r /app/requirements.txt
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /app/requirements.txt
 
 COPY --chown=selfplay:selfplay ./app .
 

diff --git a/app/Dockerfile_play_frouge b/app/Dockerfile_play_frouge
@@ -0,0 +1,19 @@
+FROM python:3.10
+
+EXPOSE 8080
+
+RUN useradd -ms /bin/bash selfplay
+USER selfplay
+ENV PATH="/home/selfplay/.local/bin:${PATH}"
+WORKDIR /app
+
+COPY --chown=selfplay:selfplay ./app .
+RUN rm -rf ./app/zoo
+COPY --chown=selfplay:selfplay ./app/zoo/pretrained ./zoo/pretrained
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt --index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pypi.python.org/simple
+RUN pip install --no-cache-dir -e ./environments/frouge
+
+CMD python3 test.py -rm human_web -g 1 -a best_model best_model human best_model best_model -e frouge -wa 0
+#CMD bash
diff --git a/app/environments/frouge/frouge/__init__.py b/app/environments/frouge/frouge/__init__.py
@@ -1,7 +1,10 @@
-from gym.envs.registration import register
+from gymnasium.envs.registration import register
 
 register(
     id='FlammeRouge-v0',
     entry_point='frouge.envs:FlammeRougeEnv',
 )
 
+# optimal training :
+#docker-compose exec app python3 train.py -r -e frouge -t 300 -os 12800 -ob 256 -dev cuda
+
diff --git a/app/environments/frouge/frouge/envs/classes.py b/app/environments/frouge/frouge/envs/classes.py
@@ -210,6 +210,17 @@ def map_to_board(self,board=None):
         a.array[self.r_position.col][self.r_position.row][0] = str(self.n) + "r"
         a.array[self.s_position.col][self.s_position.row][0] = str(self.n) + "s"
         return a
+
+    def nb_penalties(self):
+        #get nb of penalties card in playable cards
+        p = 0
+        p += self.r_deck.nb_penalties()
+        p += self.s_deck.nb_penalties()
+        p += self.r_discard.nb_penalties()
+        p += self.s_discard.nb_penalties()
+        p += self.r_hand.nb_penalties()
+        p += self.s_hand.nb_penalties()
+        return p
 
 
 class Position():
@@ -269,6 +280,15 @@ def array(self):
         for card in self.cards:
             array[ALL_CARDS.index(card)] += 0.1
         return array
+
+    def sum_values(self):
+        s = 0
+        for card in self.cards:
+            s += card.value
+        return s
+
+    def nb_penalties(self):
+        return len([ c for c in self.cards if "penalty" in c.name ])
 
 class Card():
     def __init__(self, name, value):
@@ -378,10 +398,10 @@ def move(self,player_id,c_type,n,aspiration=False):
             if start_cell == CD:
                 n = max(n,5)
             if start_cell == CSU:
-                n = max(n,5)
+                n = max(n,4)
             if start_cell == CC:
                 n = min(n,5)
-            if self.get_cell(player.c_pos(c_type).col+n,0) == CC:
+            if self.get_cell(player.c_pos(c_type).col+n,0) == CC or self.get_cell(player.c_pos(c_type).col+ int(n/2),0) == CC:
                 if n > 5:
                     n = 5
         self.set_cycl_to_pos(player_id, c_type, player.c_pos(c_type).col + n)