diff --git a/pytext/data/tensorizers.py b/pytext/data/tensorizers.py index ff6fff89c..ce3e10ab2 100644 --- a/pytext/data/tensorizers.py +++ b/pytext/data/tensorizers.py @@ -338,7 +338,7 @@ def tensorize(self, batch): return pad_and_tensorize(batch, dtype=torch.float) -class MetaInput(Tensorizer): +class RawString(Tensorizer): """A pass-through tensorizer to include raw fields from datasource in the batch. Used mostly for metric reporting.""" @@ -358,7 +358,7 @@ def numberize(self, row): return row[self.column] -class JsonMetaInput(MetaInput): +class RawJson(RawString): def numberize(self, row): return json.loads(row[self.column]) diff --git a/pytext/models/doc_model.py b/pytext/models/doc_model.py index 38f20d606..5e43b1f58 100644 --- a/pytext/models/doc_model.py +++ b/pytext/models/doc_model.py @@ -8,8 +8,8 @@ from pytext.config.field_config import WordFeatConfig from pytext.data.tensorizers import ( LabelTensorizer, - MetaInput, NumericLabelTensorizer, + RawString, Tensorizer, TokenTensorizer, ) @@ -59,7 +59,7 @@ class ModelInput(Model.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config() labels: LabelTensorizer.Config = LabelTensorizer.Config(allow_unknown=True) # for metric reporter - raw_text: MetaInput.Config = MetaInput.Config(column="text") + raw_text: RawString.Config = RawString.Config(column="text") inputs: ModelInput = ModelInput() embedding: WordEmbedding.Config = WordEmbedding.Config()