AgentMaker · jm12138 · Nov 11, 2021 · Nov 8, 2021 · Nov 8, 2021 · Nov 8, 2021
diff --git a/ocrjs/.gitignore b/ocrjs/.gitignore
@@ -0,0 +1,2 @@
+**/node_modules
+**/dist
diff --git a/ocrjs/README.md b/ocrjs/README.md
@@ -0,0 +1,17 @@
+## AgentOCR ocrjs
+
+Work in nodejs/electron and web
+
+powerby paddleOCR & onnxruntime
+
+```
+npm install --save ocr.js
+```
+
+examples in `test/test.ts`
+
+### 模型推理
+
+1. [x] 方向分类
+2. [x] 文字检测
+3. [ ] 文本识别
diff --git a/ocrjs/img/a.png b/ocrjs/img/a.png
diff --git a/ocrjs/img/b.png b/ocrjs/img/b.png
diff --git a/ocrjs/img/boarder.png b/ocrjs/img/boarder.png
diff --git a/ocrjs/package.json b/ocrjs/package.json
@@ -0,0 +1,22 @@
+{
+	"name": "ocrjs",
+	"author": "sovlookup <gonorth@qq.com>",
+	"version": "0.0.1",
+	"main": "dist/index.js",
+	"types": "dist/index.d.ts",
+	"scripts": {
+		"build": "tsc"
+	},
+	"dependencies": {
+		"@silvia-odwyer/photon": "^0.3.11",
+		"@silvia-odwyer/photon-node": "^0.3.1",
+		"cross-fetch": "^3.1.4",
+		"onnxruntime-common": "^1.9.0",
+		"onnxruntime-node": "^1.9.0",
+		"onnxruntime-web": "^1.9.0"
+	},
+	"devDependencies": {
+		"tsup": "^5.4.0",
+		"typescript": "^4.5.0-dev.20211010"
+	}
+}
diff --git a/ocrjs/pnpm-lock.yaml b/ocrjs/pnpm-lock.yaml
diff --git a/ocrjs/src/config.ts b/ocrjs/src/config.ts
@@ -0,0 +1,52 @@
+const models = {
+    'cls': {
+        'ch_mul_m_cls':
+            'https://bj.bcebos.com/v1/ai-studio-online/1fce1485ebe647c89754fd9f2aeee0a8e815de59b1644875af9013f0b93f56cf?responseContentDisposition=attachment%3B%20filename%3Dch_mul_m_cls.onnx',
+    },
+    'det': {
+        'ch_mul_m_det':
+            'https://bj.bcebos.com/v1/ai-studio-online/a0fc871ef7354119a35af83b53fea5dd16826a3e291b4500af8fc1feb0b6b2a6?responseContentDisposition=attachment%3B%20filename%3Dch_mul_m_det.onnx',
+        'ch_mul_s_det':
+            'https://bj.bcebos.com/v1/ai-studio-online/c905bf277fc44368a2b122b361143cad704d1d4ff5a74a13a9d4c749b02e9d48?responseContentDisposition=attachment%3B%20filename%3Dch_mul_s_det.onnx',
+        'en_mul_m_det':
+            'https://bj.bcebos.com/v1/ai-studio-online/a85db324f5c54a618712ef670fe6a3a5b4dcf68630744b16a322141c577bf39b?responseContentDisposition=attachment%3B%20filename%3Den_mul_m_det.onnx',
+        'ch_mul_v2_c_det':
+            'https://bj.bcebos.com/v1/ai-studio-online/485da521bb90456b86469443a4bc6ad25f0e7db1aa624a54b4bc9d2f1dc9db67?responseContentDisposition=attachment%3B%20filename%3Dch_mul_v2_c_det.onnx',
+    },
+    'rec': {
+        'ch_m_rec':
+            'https://bj.bcebos.com/v1/ai-studio-online/78a4d8d082e347798c0b46962c74b7a96aa545c79d2642999aa7a138dc0f3793?responseContentDisposition=attachment%3B%20filename%3Dch_m_rec.onnx',
+        'ch_s_rec':
+            'https://bj.bcebos.com/v1/ai-studio-online/b8a79b079e084148a1c8fbcbe713496d395ae68efd06419fbfdbf1d1d0046e63?responseContentDisposition=attachment%3B%20filename%3Dch_s_rec.onnx',
+        'cht_m_rec':
+            'https://bj.bcebos.com/v1/ai-studio-online/debddf1e4116481196c03e25bb9f8dd37b525adbc9874bd8bb6857d1f4b8ca5b?responseContentDisposition=attachment%3B%20filename%3Dcht_m_rec.onnx',
+        'ka_m_rec':
+            'https://bj.bcebos.com/v1/ai-studio-online/a2925709eef94299b1b64e833150c236a81e0b91ded34ab985ff0524030354d9?responseContentDisposition=attachment%3B%20filename%3Dka_m_rec.onnx',
+        'te_m_rec':
+            'https://bj.bcebos.com/v1/ai-studio-online/97505fd278cf47e49b04bb1ad3131012e66211948da1432980fbea445ca95ac4?responseContentDisposition=attachment%3B%20filename%3Dte_m_rec.onnx',
+        'ta_m_rec':
+            'https://bj.bcebos.com/v1/ai-studio-online/19c24681007e496ba71ed329b7d11628a0660bf05a7b4970a8eb1828f54e6b10?responseContentDisposition=attachment%3B%20filename%3Dta_m_rec.onnx',
+        'jp_m_rec':
+            'https://bj.bcebos.com/v1/ai-studio-online/ce668ac53692411fba4762a5c9bd93e930ff7a3557404706ac87bd5730cdbc13?responseContentDisposition=attachment%3B%20filename%3Djp_m_rec.onnx',
+        'la_m_rec':
+            'https://bj.bcebos.com/v1/ai-studio-online/a33861210f9c4a21b0cbfcbc5b69da56dec4343734ac484c801886d7574cfa13?responseContentDisposition=attachment%3B%20filename%3Dla_m_rec.onnx',
+        'ar_m_rec':
+            'https://bj.bcebos.com/v1/ai-studio-online/3e04471737084487a12bdaab0585f2de25012e9eb5ce4c4b832c83cd15f64377?responseContentDisposition=attachment%3B%20filename%3Dar_m_rec.onnx',
+        'kr_m_rec':
+            'https://bj.bcebos.com/v1/ai-studio-online/9dd5aa5dec9e440e81fbead852845d4779655b960ca9433d8d8178cb21066515?responseContentDisposition=attachment%3B%20filename%3Dkr_m_rec.onnx',
+        'fr_m_rec':
+            'https://bj.bcebos.com/v1/ai-studio-online/0424f2c98e714e9bb085245b98780ccc8dd059cdf17d45e3aeeb947ed9274176?responseContentDisposition=attachment%3B%20filename%3Dfr_m_rec.onnx',
+        'ger_m_rec':
+            'https://bj.bcebos.com/v1/ai-studio-online/219aebb29d4b44d791f4152d97df1a20f3edd69a8ad440bcb596676b63eb4c3a?responseContentDisposition=attachment%3B%20filename%3Dger_m_rec.onnx',
+        'cy_m_rec':
+            'https://bj.bcebos.com/v1/ai-studio-online/1ebdfb414e2e4fa2a8a8d7b823b43b3bf2cb5890511440588e20e356414d2de1?responseContentDisposition=attachment%3B%20filename%3Dcy_m_rec.onnx',
+        'en_m_rec':
+            'https://bj.bcebos.com/v1/ai-studio-online/68b2ee780b2f4017be955fcb226091e4e557935898004e899614709e9874cc16?responseContentDisposition=attachment%3B%20filename%3Den_m_rec.onnx',
+        'de_m_rec':
+            'https://bj.bcebos.com/v1/ai-studio-online/d4e698603f6243e98bbf8b7e28585d168e831d6bd852426cb1cbe11a7c487c71?responseContentDisposition=attachment%3B%20filename%3Dde_m_rec.onnx',
+        'ch_v2_c_rec':
+            'https://bj.bcebos.com/v1/ai-studio-online/365e482fd2f94052924bfe4f5d44aac34866850e00f74625a716fb928da32163?responseContentDisposition=attachment%3B%20filename%3Dch_v2_c_rec.onnx',
+    }
+}
+
+export { models }
diff --git a/ocrjs/src/dict/ch_dict.txt b/ocrjs/src/dict/ch_dict.txt
diff --git a/ocrjs/src/index.ts b/ocrjs/src/index.ts
@@ -0,0 +1,65 @@
+import { clsInfer, detInfer, recInfer } from "./infer"
+import init, { onnxModuleType, photonModuleType } from "./init"
+import fetch from "cross-fetch"
+import { models } from "./config"
+import { InferenceSession } from "onnxruntime-common"
+
+export default class OCR {
+  private onnxruntime!: onnxModuleType
+  private photon!: photonModuleType
+  private models: Map<string, InferenceSession> = new Map();
+  constructor() { }
+
+  async init() {
+    const { onnxruntime, photon } = await init()
+    this.onnxruntime = onnxruntime
+    this.photon = photon
+    return this
+  }
+
+  private async getOrDownLoadModel(modelURL: string) {
+    if (!this.models.has(modelURL)) {
+      const res = await fetch(modelURL)
+      this.models.set(
+        modelURL,
+        await this.onnxruntime.InferenceSession.create(await res.arrayBuffer()),
+      )
+    }
+    return this.models.get(modelURL) as InferenceSession
+  }
+
+  async cls(base64: string) {
+    return await clsInfer(
+      {
+        base64,
+        modelSession: await this.getOrDownLoadModel(models.cls.ch_mul_m_cls),
+        onnxruntime: this.onnxruntime,
+        photon: this.photon,
+      },
+    )
+  }
+
+  // todo 调参 resizemethod
+  async det(base64: string) {
+    return await detInfer(
+      {
+        base64,
+        modelSession: await this.getOrDownLoadModel(models.det.ch_mul_v2_c_det),
+        onnxruntime: this.onnxruntime,
+        photon: this.photon,
+      }, 1
+    )
+  }
+
+  // todo 调参 resizemethod
+  async rec(base64: string) {
+    return await recInfer(
+      {
+        base64,
+        modelSession: await this.getOrDownLoadModel(models.rec.ch_v2_c_rec),
+        onnxruntime: this.onnxruntime,
+        photon: this.photon,
+      }, 1
+    )
+  }
+}
diff --git a/ocrjs/src/infer/index.ts b/ocrjs/src/infer/index.ts
@@ -0,0 +1,97 @@
+import { Tensor } from "onnxruntime-common";
+import { InferInput } from "../init";
+import { Point } from "./types";
+import { infer, yResize } from "./utils";
+
+// 文本方向分类
+async function clsInfer(opt: InferInput) {
+  // [x,2]
+  const postInfer = (
+    dataC: Tensor,
+  ) => (dataC.data[0] >= dataC.data[1]
+    ? { "direction": "up", "confidence": dataC.data[0] }
+    : { "direction": "down", "confidence": dataC.data[1] });
+
+  return await infer(opt, postInfer);
+}
+
+// 文本检测
+async function detInfer(opt: InferInput, resizeMethod: 1 | 2 | 3 | 4 | 5) {
+  opt.reshapeOpt = {
+    mean: [0.485, 0.456, 0.406],
+    std: [0.229, 0.224, 0.225],
+    bgr: true,
+  };
+
+  const { photon } = opt;
+  const { resize } = photon;
+
+  let wb: number;
+  let hb: number;
+
+  // wh 必须为 32 倍数
+  const preInfer = (img: import("@silvia-odwyer/photon").PhotonImage) => {
+    const raw_w = img.get_width();
+    const raw_h = img.get_height();
+
+    const new_w = yResize(raw_w);
+    const new_h = yResize(raw_h);
+
+    wb = raw_w / new_w;
+    hb = raw_h / new_h;
+
+    return resize(img, new_w, new_h, resizeMethod);
+  };
+
+  // [x,1,w,h] 解析结果
+  const postInfer = (dataC: Tensor) => {
+    // min
+    let mx: number | null = null;
+    let my: number | null = null;
+    // max
+    let ax = 0;
+    let ay = 0;
+    const w = dataC.dims[dataC.dims.length - 2];
+    dataC.data.forEach((v, i) => {
+      if (v > 0.000001) {
+        const point = (i + 1);
+        const tmpx = point % w;
+        const x = tmpx === 0 ? w : tmpx;
+        const y = Math.ceil(point / w);
+
+        if (x > ax) ax = x;
+        if (y > ay) ay = y;
+        if (mx === null || x < mx ) mx = x;
+        if (my === null || y < my ) my = y;
+      }
+    });
+    return {
+      leftTop: <Point> {
+        x: Math.round(<number><unknown>mx * wb),
+        y: Math.round(<number><unknown>mx * hb),
+      },
+      rightBottom: <Point> {
+        x: Math.round(ax * wb),
+        y: Math.round(ay * hb),
+      },
+    };
+  };
+  return await infer(opt, postInfer, preInfer);
+}
+
+// 文本识别
+async function recInfer(opt: InferInput, resizeMethod: 1 | 2 | 3 | 4 | 5) {
+  const { photon } = opt;
+  const { resize } = photon;
+
+  // wh 必须为 32
+  const preInfer = (img: import("@silvia-odwyer/photon").PhotonImage) =>
+    resize(img, 32, 32, resizeMethod);
+
+  // [x,8,6624] todo 解析结果
+  const postInfer = (dataC: Tensor) => JSON.stringify(dataC);
+
+  return await infer(opt, postInfer, preInfer);
+}
+
+export { clsInfer, detInfer, recInfer };
diff --git a/ocrjs/src/infer/types.ts b/ocrjs/src/infer/types.ts
@@ -0,0 +1,13 @@
+import { TypedTensor, Tensor } from 'onnxruntime-common'
+
+export interface Feeds {
+    [index: string]: TypedTensor<"float32">
+}
+
+export interface Point {
+    x: number,
+    y: number
+}
+
+export type PreInfer = (img: import('@silvia-odwyer/photon').PhotonImage) => typeof img | Promise<typeof img>
+export type PostInfer<T> = (tensor: Tensor) => T | Promise<T>
diff --git a/ocrjs/src/infer/utils.ts b/ocrjs/src/infer/utils.ts
@@ -0,0 +1,80 @@
+import { InferInput, ReshapeOption } from "../init";
+import { Feeds, PostInfer, PreInfer } from "./types";
+
+export async function infer<T>(
+  opt: InferInput,
+  postInfer: PostInfer<T>,
+  preInfer?: PreInfer,
+) {
+  const { base64, modelSession, onnxruntime, photon, reshapeOpt = {} } = opt;
+  const { Tensor } = onnxruntime;
+  const { PhotonImage } = photon;
+  const data = base64.replace(/^data:image\/(png|jpg);base64,/, "");
+  let phtn_img = PhotonImage.new_from_base64(data);
+  if (preInfer !== undefined) phtn_img = await preInfer(phtn_img);
+  const width = phtn_img.get_width();
+  const height = phtn_img.get_height();
+  const img = allReshapeToRGB(phtn_img.get_raw_pixels(), width, height, reshapeOpt);
+  try {
+    const dims = [1, 3, width, height];
+    // 输入模型的数据
+    const feeds = <Feeds> {};
+    feeds[modelSession.inputNames[0]] = new Tensor("float32", img, dims);
+    // 进行模型推理
+    const results = await modelSession.run(feeds);
+    // 读取结果
+    const dataC = results[modelSession.outputNames[0]];
+
+    return await postInfer(dataC);
+  } catch (e) {
+    throw e;
+  }
+}
+
+export function yResize(n: number) {
+  n = n < 32 ? 32 : n;
+  const y = n % 32;
+  if ((32 - (2 * y)) > 0) {
+    n = n - y;
+  } else {
+    n = n + 32 - y;
+  }
+  return n;
+}
+
+/**
+ * 全部转rgb * H * W
+ * @param imageData 数据
+ * @param opt 参数
+ * @param opt.mean 均值
+ * @param opt.std 方差
+ */
+function allReshapeToRGB(
+  imageData: Uint8Array,
+  width: number,
+  height: number,
+  opt: ReshapeOption,
+) {
+  // mean和std是介于0-1之间的
+  const { mean = [0, 0, 0], std = [1, 1, 1], bgr = false, normalizeType = 0 } =
+    opt;
+  const result = new Float32Array(height * width * 3);
+  let offset = 0;
+  // h w c
+  for (let i = 0; i < height; ++i) {
+    const iw = i * width;
+    for (let j = 0; j < width; ++j) {
+      const iwj = iw + j;
+      for (let k = 0; k < 3; ++k) {
+        const a = bgr ? iwj * 4 + (2 - k) : iwj * 4 + k;
+        result[offset] = normalizeType === 0
+          ? imageData[a] / 255
+          : (imageData[a] - 128) / 128;
+        result[offset] -= mean[k];
+        result[offset] /= std[k];
+        offset++;
+      }
+    }
+  }
+  return result;
+}