nuprl · arjunguha · Nov 17, 2023 · Nov 8, 2023 · Nov 8, 2023 · Nov 9, 2023
diff --git a/dataset_builder/humaneval_to_elixir.py b/dataset_builder/humaneval_to_elixir.py
@@ -0,0 +1,106 @@
+"""
+This script translates problems from the OpenAI HumanEval dataset into Elixir.
+"""
+
+import re
+import ast
+from typing import List, TypeVar
+from base_language_translator import LanguageTranslator
+
+# We turn multi-line docstrings into single-line comments. This captures the
+# start of the line.
+DOCSTRING_LINESTART_RE = re.compile("""\n(\\s*)""")
+
+TargetExp = str
+
+
+class Translator(LanguageTranslator[TargetExp]):
+    USub = "-"
+
+    def file_ext(self) -> str:
+        return "elixir"
+
+    def stop(self) -> List[str]:
+        return ["\ndefmodule", "\ndefp", "\ndef ", "\n#", "\n\n"]
+
+    def translate_prompt(
+        self, name: str, args: List[ast.arg], _returns: ast.expr, description: str
+    ) -> str:
+        """ """
+        elixir_description = (
+            "# " + re.sub(DOCSTRING_LINESTART_RE, "\n# ", description.strip()) + "\n"
+        )
+        arg_names = [arg.arg for arg in args]
+        arg_list = ", ".join(arg_names)
+        result_list = [
+            elixir_description,
+            "defmodule HumanEval do",
+            f"  def candidate({arg_list}), do: {name}({arg_list})",
+            f"  def {name}({arg_list}) do",
+            f"    ",
+        ]
+        return "\n".join(result_list)
+
+    def test_suite_prefix_lines(self, entry_point: str) -> List[str]:
+        """
+        This code goes at the start of the test suite.
+        """
+        return [
+            "ExUnit.start()",
+            "defmodule HumanEvalTest do",
+            "  use ExUnit.Case, async: true",
+            f"  test '{entry_point}' do",
+        ]
+
+    def test_suite_suffix_lines(self) -> List[str]:
+        return ["  end", "end", ""]
+
+    def deep_equality(self, left: TargetExp, right: TargetExp) -> str:
+        """
+        All tests are assertions that compare deep equality between left and right.
+
+        Make sure you use the right equality operator for your language. For example,
+        == is the wrong operator for Java and OCaml.
+        """
+        return "    assert {} == {}".format(right, left)
+
+    def gen_literal(self, c: bool | str | int | float | None) -> TargetExp:
+        """Translate a literal expression
+        c: is the literal value
+        """
+        # TODO: Make sure no string weirdness
+        if type(c) == bool:
+            return str(c).lower()
+        elif type(c) == str:
+            return f'"{c}"'
+        elif c is None:
+            return "nil"
+        return repr(c)
+
+    def gen_var(self, v: str) -> TargetExp:
+        """Translate a variable with name v."""
+        return v
+
+    def gen_list(self, l: List[TargetExp]) -> TargetExp:
+        """Translate a list with elements l
+        A list [ x, y, z] translates to [ x, y, z ] (an Elixir list)
+        """
+        return "[" + ", ".join(l) + "]"
+
+    def gen_tuple(self, t: List[TargetExp]) -> TargetExp:
+        """Translate a tuple with elements t
+        A tuple (x, y, z) translates to {x, y, z}
+        """
+        return "{" + ", ".join(t) + "}"
+
+    def gen_dict(self, keys: List[TargetExp], values: List[TargetExp]) -> TargetExp:
+        """Translate a dictionary with keys and values
+        A dictionary { "key1": val1, "key2": val2 } translates to %{"key1" => val1, "key2" => val2}
+        """
+        return "%{" + ", ".join(f"{k} => {v}" for k, v in zip(keys, values)) + "}"
+
+    def gen_call(self, func: TargetExp, args: List[TargetExp]) -> str:
+        """Translate a function call `func(args)`
+        A function call f(x, y, z) translates to f(x, y, z)
+        """
+        return f"HumanEval.{func}({', '.join(args)})"
diff --git a/dataset_builder/libexperiments.py b/dataset_builder/libexperiments.py
@@ -38,7 +38,8 @@ def path(self) -> Path:
     "swift",
     "rkt",
     "ml",
-    "hs"
+    "hs",
+    "elixir",
-    "elixir",
+    "ex",
-    "elixir",
+    "ex",
 ]
 MODELS = ["davinci", "incoder", "codegen"]
 
@@ -62,4 +63,3 @@ def all_experiments() -> Iterator[Experiment]:
                             yield Experiment(dataset, lang, model, temp, variation)
                         else:
                             pass
-
diff --git a/dataset_builder/terms.csv b/dataset_builder/terms.csv
@@ -15,6 +15,7 @@ Python,py,array,list,tuple,dictionary,None,True,False
 R,r,vector,list,list,named list,NULL,TRUE,FALSE
 Racket,rkt,list,list,list,hash,#f,#t,#f
 Ruby,rb,array,array,array,hash,nil,true,false
+Elixir,elixir,list,list,tuple,map,nil,true,false
 Rust,rs,vector,vector,tuple,HashMap,None,true,false
 Scala,scala,list,list,tuple,map,None,true,false
 Swift,swift,array,array,tuple,dictionary,nil,true,false

diff --git a/evaluation/Dockerfile b/evaluation/Dockerfile
@@ -57,6 +57,9 @@ RUN mkdir /usr/multiple && wget https://repo.mavenlibs.com/maven/org/javatuples/
 # Luau
 RUN wget https://github.com/Roblox/luau/releases/download/0.594/luau-ubuntu.zip -O /tmp/luau.zip && unzip /tmp/luau.zip -d /bin/
 
+# Elixir
+RUN wget https://binaries2.erlang-solutions.com/ubuntu/pool/contrib/e/elixir/elixir_1.15.4_1_otp_26.0.2~ubuntu~jammy_all.deb -O /tmp/elixir.deb && dpkg -i /tmp/elixir.deb
+
 COPY src /code
 WORKDIR /code
 ENTRYPOINT ["python3", "main.py"]
diff --git a/evaluation/src/containerized_eval.py b/evaluation/src/containerized_eval.py
@@ -23,6 +23,7 @@
 import eval_ocaml
 import eval_matlab
 import eval_hs
+import eval_elixir
 import tempfile
 
 
@@ -52,7 +53,8 @@
     "fs": (eval_fs.eval_script, ".fsx"),
     "ml": (eval_ocaml.eval_script, ".ml"),
     "m": (eval_matlab.eval_script, ".m"),
-    "hs": (eval_hs.eval_script, ".hs")
+    "hs": (eval_hs.eval_script, ".hs"),
+    "elixir": (eval_elixir.eval_script, ".exs"),
 }
 
 def eval_string_script(language, program):
@@ -86,4 +88,3 @@ def eval_string_script(language, program):
             "exit_code": result['exit_code'],
             "status": result['status']
         }
-
diff --git a/evaluation/src/eval_elixir.py b/evaluation/src/eval_elixir.py
@@ -0,0 +1,37 @@
+import argparse
+from sys import exit
+import subprocess
+from pathlib import Path
+from generic_eval import main as gmain
+
+
+def eval_script(path: Path):
+    try:
+        # Assumes exit-code 0 is all okay
+        output = subprocess.run(["elixir", str(path)], capture_output=True, timeout=5)
+
+        if output.returncode == 0:
+            status = "OK"
+        else:
+            outmessage = str(output)
+            if "Assertion with == failed" in outmessage:
+                status = "AssertionError"
+            elif "SyntaxError" in outmessage:
+                status = "SyntaxError"
+            else:
+                status = "Exception"
+        returncode = output.returncode
+    except subprocess.TimeoutExpired as exc:
+        status = "Timeout"
+        output = exc
+        returncode = -1
+    return {
+        "status": status,
+        "exit_code": returncode,
+        "stdout": "" if output.stdout is None else output.stdout.decode("utf-8"),
+        "stderr": "" if output.stderr is None else output.stderr.decode("utf-8"),
+    }
+
+
+if __name__ == "__main__":
+    gmain(eval_script, "Elixir", ".exs")