MigoXLab · e06084 · Jan 13, 2025 · Jan 13, 2025 · Jan 13, 2025
diff --git a/README.md b/README.md
@@ -1,6 +1,3 @@
-[google1350ab9ed01b79fc.html](./google1350ab9ed01b79fc.html)
-google-site-verification: google1350ab9ed01b79fc.html
-
 <div align="center" xmlns="http://www.w3.org/1999/html">
 <!-- logo -->
 <p align="center">

diff --git a/dingo/model/prompt/prompt_text_quality_v3.py b/dingo/model/prompt/prompt_text_quality_v3.py
@@ -43,3 +43,44 @@ class PromptTextQualityV3(BasePrompt):
 Please remember to output only a JSON format data, without any additional content.
 # Input content
     """
+
+@Model.prompt_register("TEXT_QUALITY_V3_B3", [])
+class PromptTextQualityV3B3(BasePrompt):
+    content = """
+# Role
+You are an expert in language model.
+# Background
+The dataset has been compiled from a variety of sources, including social media platforms, news outlets, academic journals, and online forums.
+# Goals
+Your primary objective is to assess the suitability of this dataset for training a large language model.Please note that if the text meets any of the following negative descriptions, it will be considered low-quality data.
+# Criteria
+1. Completeness
+    1.1 Error_Formula_Table: If the text contains formulas or tables, then their format or content is incorrect.
+    1.2 Error_List_Number: If the text contains list number, numbers in the list number have inconsistent formats and logical discontinuity.
+    1.3 Error_Section_Order: The order of text is cluttered, such as titles embedded in the main text or structures that are difficult to read.
+2. Effectiveness
+    2.1 Error_Garbled_Characters: The text contains a large amount of garbled and anti crawler characters.
+    2.2 Error_Words_Stuck: The text contains a large number of words that are stuck together without being separated by spaces. Words with hyphens are considered normal and treat newline characters (\n) as spaces.
+    2.3 Error_Lack_Punctuation: Text contains a large number of sentences that have no punctuation mark at the end.
+3. Similarity
+    3.1 Error_Duplicate_Content: The text contains consecutive repeated text and multiple occurrences of characters.
+4. Security
+    4.1 Error_Political_Content: The text contains harmful political content, including hurting socialist regime system, hurting china interests image, hurting china unity stability, promoting terrorism_and_extremism, or promoting ethnic hate and discrimination.
+    4.2 Error_Prohibited_Content: The text contains prohibited content, including violence, pornography, or gambling.
+# Workflow
+1. Carefully read and understand the provided text, evaluate the quality of the text based on the negative criteria.
+2. Assign a type to the text.
+    -If the text does not hit any negative criteria above, type must only be 'Good'; otherwise, type must only be one of the list ['Completeness', 'Effectiveness', 'Similarity', 'Security'].
+3. Assign a name to the text.
+    -If type is 'Good', name must only be 'None'.
+    -If type is "Completeness", name must only be one of the list ["Error_Formula_Table", "Error_List_Number", "Error_Section_Order"]
+    -If type is "Effectiveness", name must only be one of the list ["Error_Garbled_Characters", "Error_Words_Stuck" or "Error_Lack_Punctuation"]
+    -If type is "Similarity", name must only be one of the list ["Error_Duplicate_Content"]
+    -If type is "Security", name must only be one of the list ["Error_Political_Content", "Error_Prohibited_Content"]
+4. Assign a score to the text according the type. If the type is "Good", score is 1, otherwise the score is 0.
+5. Provide a clear reason for the evaluation.
+6. Return the results in JSON format: {"score": 0/1, "type": [], "name": [], "reason": []}.
+# Warning
+Please remember to output only a JSON format data, without any additional content.
+# Input content
+"""
diff --git a/google1350ab9ed01b79fc.html b/google1350ab9ed01b79fc.html
diff --git a/googlee2eb6e8bd49c0266.html b/googlee2eb6e8bd49c0266.html
@@ -0,0 +1 @@
+google-site-verification: googlee2eb6e8bd49c0266.html
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		google-site-verification: googlee2eb6e8bd49c0266.html