speech-recognition123 · Samrawit02 · Jun 1, 2022 · Jun 2, 2022 · Jun 2, 2022 · Jun 2, 2022
diff --git a/.gitignore b/.gitignore
@@ -128,3 +128,6 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+#node modules
+node_modules/
diff --git a/backend/api.py b/backend/api.py
@@ -0,0 +1,91 @@
+import sys
+from datetime import datetime
+from pathlib import Path
+
+import pandas as pd
+from flask import Flask, jsonify, make_response, request
+from flask_cors import CORS
+
+sys.path.append("../")
+from scripts.helpers import *
+from scripts.logspectrogram import *
+from scripts.models import *
+from scripts.predict import *
+from scripts.tokenizer import *
+
+# load files
+meta_data = pd.read_csv("../data/backend_data/meta_data.csv")
+sorted_metadata = meta_data.sort_values(by="duration")
+labels = sorted_metadata["label"].to_list()
+translation_obj = read_obj("../data/backend_data/translation_dict.pkl")
+
+# load translation
+translations = []
+for label in labels:
+    translations.append(translation_obj[label])
+
+# init tokenizer
+tokenizer = Tokenizer(translations)
+int_to_char, char_to_int = tokenizer.build_dict()
+output_dim = len(char_to_int) + 2
+
+# CNN
+n_mels = 128
+cnn_model, cnn_shape = cnn_net(n_mels)
+
+# BI-DIRECTIONAL RNN
+batch_size = 32
+bi_rnn = bi_directional_rnn(1024, batch_size=batch_size, output_dim=output_dim)
+
+# preprocessor
+sample_rate = 8000
+fft_size = 512
+frame_step = 256
+
+preprocess_model = preprocess_model(sample_rate, fft_size, frame_step, n_mels)
+
+# build model
+cnn_bi_rnn_model = build_model(output_dim, cnn_model, bi_rnn, preprocess_model)
+
+# load saved model
+cnn_bi_rnn_model.load_weights("../model/cnn-bi-rnn.h5")
+
+
+app = Flask(__name__)
+CORS(app)
+
+
+@app.route("/health", methods=["GET"])
+def health():
+    return make_response(
+        jsonify(
+            {
+                "success": True,
+                "timestamp": datetime.now().isoformat(),
+            }
+        ),
+        200,
+    )
+
+
+@app.route("/predict", methods=["POST"])
+def predict_audio():
+    file = request.files["file"]
+
+    # preprocess audio
+    extracted_audio = extract_audio(file)
+
+    # predict
+    predicted, error = predict(
+        cnn_bi_rnn_model,
+        extracted_audio,
+        tokenizer,
+        int_to_char,
+        actual=None,
+    )
+
+    return make_response(jsonify({"success": True, "data": predicted}), 200)
+
+
+if __name__ == "__main__":
+    app.run(host="localhost", port=5000, debug=False)
diff --git a/backend/db.sqlite b/backend/db.sqlite
diff --git a/backend/main.py b/backend/main.py
@@ -0,0 +1,91 @@
+import os
+import sys
+
+import pandas as pd
+from fastapi import FastAPI, UploadFile
+
+sys.path.append("../")
+from scripts.helpers import *
+from scripts.logspectrogram import *
+from scripts.models import *
+from scripts.predict import *
+from scripts.tokenizer import *
+
+# load files
+meta_data = pd.read_csv("../data/backend_data/meta_data.csv")
+sorted_metadata = meta_data.sort_values(by="duration")
+labels = sorted_metadata["label"].to_list()
+translation_obj = read_obj("../data/backend_data/translation_dict.pkl")
+
+# load translation
+translations = []
+for label in labels:
+    translations.append(translation_obj[label])
+
+# init tokenizer
+tokenizer = Tokenizer(translations)
+int_to_char, char_to_int = tokenizer.build_dict()
+output_dim = len(char_to_int) + 2
+
+# CNN
+n_mels = 128
+cnn_model, cnn_shape = cnn_net(n_mels)
+
+# BI-DIRECTIONAL RNN
+batch_size = 32
+bi_rnn = bi_directional_rnn(1024, batch_size=batch_size, output_dim=output_dim)
+
+# preprocessor
+sample_rate = 8000
+fft_size = 512
+frame_step = 256
+
+preprocessing_model = preprocess_model(
+    sample_rate, fft_size, frame_step, n_mels
+)
+
+# build model
+cnn_bi_rnn_model = build_model(
+    output_dim, cnn_model, bi_rnn, preprocessing_model
+)
+
+# load saved model
+cnn_bi_rnn_model.load_weights("../model/cnn-bi-rnn.h5")
+
+app = FastAPI()
+
+
+@app.get("/")
+async def index():
+    return {"status": 200, "message": "Server Works"}
+
+
+@app.post("/type")
+async def get_type(file: UploadFile):
+    return {
+        "type": file.content_type,
+    }
+
+
+@app.post("/predict")
+async def predict_audio(file: UploadFile):
+    if file.content_type == "audio/wave":
+        # load data from request
+        audio_file = file.file
+
+        # preprocess audio
+        extracted_audio = extract_audio(audio_file)
+
+        # predict
+        predicted, error = predict(
+            cnn_bi_rnn_model,
+            extracted_audio[0],
+            tokenizer,
+            int_to_char,
+            actual=None,
+        )
+        # build response
+        return {"predicted": predicted}
+
+    else:
+        return {"Status": False, "Message": "Unsupported file type"}
diff --git a/backend/titus_api.py b/backend/titus_api.py
@@ -0,0 +1,75 @@
+from distutils.log import debug
+from datetime import datetime
+from pickle import load
+from flask_sqlalchemy import SQLAlchemy
+from flask_marshmallow import Marshmallow
+
+from flask import Flask, jsonify, make_response, request
+import os
+
+
+#Initialize App
+app = Flask(__name__)
+
+base_dir = os.path.dirname(__file__)
+
+#Database
+app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///'+os.path.join(base_dir, 'db.sqlite')
+app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
+
+#Init DB
+db = SQLAlchemy(app)
+
+#Init Marshmallow
+ma = Marshmallow(app)
+
+# Creating the features db
+class Features(db.Model):
+    id = db.Column(db.Integer, primary_key=True)
+    path = db.Column(db.String(200))
+    text = db.Column(db.String(1000))
+    duration = db.Column(db.String(250))
+
+    def __init__(self, path,text, duration):
+        self.path = path
+        self.text = text
+        self.duration = duration
+
+
+#Create the database schema
+class FeatureSchema(ma.Schema):
+    class Meta:
+        fields = ['id', 'path', 'text', 'duration']
+
+#Init Schema
+feature_schema = FeatureSchema()
+# products_schema = ProductSchema(many=True,)
+
+
+#Route to handle the prediction
+@app.route('/predict', methods=['POST', 'GET'])
+def predict():
+    #Filtering details from received json file and processing
+
+    path = request.json['path']
+    text = request.json['text']
+    duration = request.json['duration']
+
+    new_file = Features(path,text, duration)
+    db.session.add(new_file)
+    db.session.commit()
+
+    file = {"path": path,
+            "text": text,
+            "duration": duration}
+
+    # pwd = os.getcwd()
+    # rnn_model_path = os.path.join(pwd, "../model/RNN_model.pickle")
+    # rnn_model = load(open(rnn_model_path, "rb"))
+    # prediction = rnn_model.predict(file)
+
+    return jsonify(file)
+
+
+if __name__ == '__main__':
+    app.run(debug=True)
diff --git a/backend/titus_nahom.py b/backend/titus_nahom.py
@@ -0,0 +1,45 @@
+import os
+from datetime import datetime
+from pickle import load
+
+from flask import Flask, jsonify, make_response, request
+
+app = Flask(__name__)
+
+
+@app.route("/health", methods=["GET"])
+def health():
+    return make_response(
+        jsonify(
+            {
+                "success": True,
+                "timestamp": datetime.now().isoformat(),
+            }
+        ),
+        200,
+    )
+
+
+@app.route("/predict", methods=["POST", 'GET'])
+def predict():
+    path = request.json['path']
+    text = request.json['text']
+    duration = request.json['duration']
+
+    #Assemble files to form a new dictionary
+    file = {"path": path,
+            "text": text,
+            "duration": duration}
+
+    pwd = os.getcwd()
+    rnn_model_path = os.path.join(pwd, "../model/RNN_model.pickle")
+    rnn_model = load(open(rnn_model_path, "rb"))
+    prediction = rnn_model.predict(file)
+
+    return jsonify(file)
+
+    return make_response(jsonify({"success": True, "data": prediction}), 200)
+
+
+if __name__ == "__main__":
+    app.run(host="localhost", port=5000, debug=True)