Skip to content

Commit

Permalink
Merge reformat. Changelog
Browse files Browse the repository at this point in the history
  • Loading branch information
flaviussn committed Jan 20, 2020
1 parent 267fff6 commit f204d0a
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 36 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,16 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [0.18.10] - 2020-01-20
### Added

- Added Makefile with tests dependency installation, test code, formatter and types.
- Added setup.cfg file with Make configuration
- Added some tests for the functionality
- Files linted using flake8
- Files formatted using black
- Test tested with pytest
- Unused variables deleted

## [0.18.9] - 2020-01-20
### Fixed
Expand Down
41 changes: 26 additions & 15 deletions simpletransformers/classification/classification_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@


class ClassificationModel:

def __init__(
self,
model_type,
Expand All @@ -111,8 +110,7 @@ def __init__(
use_cuda (optional): Use GPU if available. Setting to False will force model to use CPU only.
cuda_device (optional): Specific GPU that should be used. Will use the first available GPU by default.
**kwargs (optional): For providing proxies, force_download, resume_download, cache_dir and other options specific to the 'from_pretrained' implementation where this will be supplied.
""" # noqa: ignore flake8"

""" # noqa: ignore flake8"

MODEL_CLASSES = {
"bert": (BertConfig, BertForSequenceClassification, BertTokenizer),
Expand Down Expand Up @@ -143,10 +141,12 @@ def __init__(

config_class, model_class, tokenizer_class = MODEL_CLASSES[model_type]
if num_labels:
self.config = config_class.from_pretrained(model_name, num_labels=num_labels, **kwargs)
self.config = config_class.from_pretrained(
model_name, num_labels=num_labels, **kwargs
)
self.num_labels = num_labels
else:
self.config = config_class.from_pretrained(model_name, **kwargs)
self.config = config_class.from_pretrained(model_name, **kwargs)
self.num_labels = self.config.num_labels
self.weight = weight

Expand All @@ -166,9 +166,16 @@ def __init__(

if self.weight:

self.model = model_class.from_pretrained(model_name, config=self.config, weight=torch.Tensor(self.weight).to(self.device), **kwargs)
self.model = model_class.from_pretrained(
model_name,
config=self.config,
weight=torch.Tensor(self.weight).to(self.device),
**kwargs,
)
else:
self.model = model_class.from_pretrained(model_name, config=self.config, **kwargs)
self.model = model_class.from_pretrained(
model_name, config=self.config, **kwargs
)

self.results = {}

Expand All @@ -187,15 +194,19 @@ def __init__(
if args:
self.args.update(args)

self.tokenizer = tokenizer_class.from_pretrained(
model_name, do_lower_case=self.args["do_lower_case"], **kwargs
)

self.tokenizer = tokenizer_class.from_pretrained(model_name, do_lower_case=self.args['do_lower_case'], **kwargs)

self.args['model_name'] = model_name
self.args['model_type'] = model_type
self.args["model_name"] = model_name
self.args["model_type"] = model_type

if model_type in ['camembert', 'xlmroberta']:
warnings.warn(f"use_multiprocessing automatically disabled as {model_type} fails when using multiprocessing for feature conversion.")
self.args['use_multiprocessing'] = False
if model_type in ["camembert", "xlmroberta"]:
warnings.warn(
f"use_multiprocessing automatically disabled as {model_type}"
" fails when using multiprocessing for feature conversion."
)
self.args["use_multiprocessing"] = False

self.args["model_name"] = model_name
self.args["model_type"] = model_type
Expand Down Expand Up @@ -582,7 +593,7 @@ def train(
) and not os.path.exists(output_dir_current):
os.makedirs(output_dir_current)

if args['save_model_every_epoch']:
if args["save_model_every_epoch"]:

model_to_save = model.module if hasattr(model, "module") else model
model_to_save.save_pretrained(output_dir_current)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,16 @@


class MultiLabelClassificationModel(ClassificationModel):

def __init__(self, model_type, model_name, num_labels=None, pos_weight=None, args=None, use_cuda=True, **kwargs):
def __init__(
self,
model_type,
model_name,
num_labels=None,
pos_weight=None,
args=None,
use_cuda=True,
**kwargs
):

"""
Initializes a MultiLabelClassification model.
Expand All @@ -45,7 +53,7 @@ def __init__(self, model_type, model_name, num_labels=None, pos_weight=None, arg
args (optional): Default args will be used if this parameter is not provided. If provided, it should be a dict containing the args that should be changed in the default args.
use_cuda (optional): Use GPU if available. Setting to False will force model to use CPU only.
**kwargs (optional): For providing proxies, force_download, resume_download, cache_dir and other options specific to the 'from_pretrained' implementation where this will be supplied.
"""# noqa: ignore flake8"
""" # noqa: ignore flake8"

MODEL_CLASSES = {
"bert": (
Expand Down Expand Up @@ -79,7 +87,9 @@ def __init__(self, model_type, model_name, num_labels=None, pos_weight=None, arg
config_class, model_class, tokenizer_class = MODEL_CLASSES[model_type]
if num_labels:

self.config = config_class.from_pretrained(model_name, num_labels=num_labels, **kwargs)
self.config = config_class.from_pretrained(
model_name, num_labels=num_labels, **kwargs
)

self.num_labels = num_labels
else:
Expand All @@ -99,9 +109,16 @@ def __init__(self, model_type, model_name, num_labels=None, pos_weight=None, arg
self.device = "cpu"

if self.pos_weight:
self.model = model_class.from_pretrained(model_name, config=self.config, pos_weight=torch.Tensor(self.pos_weight).to(self.device), **kwargs)
self.model = model_class.from_pretrained(
model_name,
config=self.config,
pos_weight=torch.Tensor(self.pos_weight).to(self.device),
**kwargs
)
else:
self.model = model_class.from_pretrained(model_name, config=self.config, **kwargs)
self.model = model_class.from_pretrained(
model_name, config=self.config, **kwargs
)

self.results = {}

Expand All @@ -120,9 +137,9 @@ def __init__(self, model_type, model_name, num_labels=None, pos_weight=None, arg
if args:
self.args.update(args)


self.tokenizer = tokenizer_class.from_pretrained(model_name, do_lower_case=self.args['do_lower_case'], **kwargs)

self.tokenizer = tokenizer_class.from_pretrained(
model_name, do_lower_case=self.args["do_lower_case"], **kwargs
)

self.args["model_name"] = model_name
self.args["model_type"] = model_type
Expand Down
2 changes: 2 additions & 0 deletions simpletransformers/custom_models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from transformers.modeling_utils import SequenceSummary, PreTrainedModel
from transformers import RobertaModel
from transformers.configuration_roberta import RobertaConfig
from torch.nn import BCEWithLogitsLoss

from transformers.modeling_albert import (
AlbertConfig,
Expand All @@ -19,6 +20,7 @@
from transformers.modeling_roberta import ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
from transformers.modeling_distilbert import DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP


class BertForMultiLabelSequenceClassification(BertPreTrainedModel):
"""
Bert model adapted for multi-label sequence classification
Expand Down
23 changes: 17 additions & 6 deletions simpletransformers/ner/ner_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,16 @@


class NERModel:
def __init__(self, model_type, model_name, labels=None, args=None, use_cuda=True, cuda_device=-1, **kwargs):
def __init__(
self,
model_type,
model_name,
labels=None,
args=None,
use_cuda=True,
cuda_device=-1,
**kwargs,
):
"""
Initializes a NERModel
Expand Down Expand Up @@ -107,7 +116,9 @@ def __init__(self, model_type, model_name, labels=None, args=None, use_cuda=True

config_class, model_class, tokenizer_class = MODEL_CLASSES[model_type]

self.model = model_class.from_pretrained(model_name, num_labels=self.num_labels, **kwargs)
self.model = model_class.from_pretrained(
model_name, num_labels=self.num_labels, **kwargs
)

if use_cuda:
if torch.cuda.is_available():
Expand Down Expand Up @@ -135,9 +146,9 @@ def __init__(self, model_type, model_name, labels=None, args=None, use_cuda=True
if args:
self.args.update(args)


self.tokenizer = tokenizer_class.from_pretrained(model_name, do_lower_case=self.args['do_lower_case'], **kwargs)

self.tokenizer = tokenizer_class.from_pretrained(
model_name, do_lower_case=self.args["do_lower_case"], **kwargs
)

self.args["model_name"] = model_name
self.args["model_type"] = model_type
Expand Down Expand Up @@ -466,7 +477,7 @@ def train(self, train_dataset, output_dir, show_running_loss=True, eval_df=None)
) and not os.path.exists(output_dir_current):
os.makedirs(output_dir_current)

if args['save_model_every_epoch']:
if args["save_model_every_epoch"]:

model_to_save = model.module if hasattr(model, "module") else model
model_to_save.save_pretrained(output_dir_current)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,9 @@


class QuestionAnsweringModel:

def __init__(self, model_type, model_name, args=None, use_cuda=True, cuda_device=-1, **kwargs):
def __init__(
self, model_type, model_name, args=None, use_cuda=True, cuda_device=-1, **kwargs
):

"""
Initializes a QuestionAnsweringModel model.
Expand Down Expand Up @@ -125,9 +126,9 @@ def __init__(self, model_type, model_name, args=None, use_cuda=True, cuda_device
if args:
self.args.update(args)


self.tokenizer = tokenizer_class.from_pretrained(model_name, do_lower_case=self.args['do_lower_case'], **kwargs)

self.tokenizer = tokenizer_class.from_pretrained(
model_name, do_lower_case=self.args["do_lower_case"], **kwargs
)

self.args["model_name"] = model_name
self.args["model_type"] = model_type
Expand Down Expand Up @@ -542,7 +543,7 @@ def train(self, train_dataset, output_dir, show_running_loss=True, eval_data=Non
) and not os.path.exists(output_dir_current):
os.makedirs(output_dir_current)

if args['save_model_every_epoch']:
if args["save_model_every_epoch"]:

model_to_save = model.module if hasattr(model, "module") else model
model_to_save.save_pretrained(output_dir_current)
Expand Down

0 comments on commit f204d0a

Please sign in to comment.