import spacy, sklearn_crfsuite
from .base import BasePipeline
from ..pipeline_components import ClinicalTokenizer
from medacy.model.feature_extractor import FeatureExtractor
from ..pipeline_components import GoldAnnotatorComponent
[docs]class TestingPipeline(BasePipeline):
"""
A pipeline for test running
"""
def __init__(self, entities=[]):
"""
Create a pipeline with the name 'clinical_pipeline' utilizing
by default spaCy's small english model.
:param metamap: an instance of MetaMap
"""
description="""Pipeline for unit tests"""
super().__init__("test_pipeline",
spacy_pipeline=spacy.load("en_core_web_sm"),
description=description,
creators="Andriy Mulyar (andriymulyar.com)", #append if multiple creators
organization="NLP@VCU"
)
self.entities = entities
self.spacy_pipeline.tokenizer = self.get_tokenizer() #set tokenizer
self.add_component(GoldAnnotatorComponent, entities) #add overlay for GoldAnnotation
[docs] def get_learner(self):
return ("CRF_l2sgd", sklearn_crfsuite.CRF(
algorithm='l2sgd',
c2=0.1,
max_iterations=100,
all_possible_transitions=True
))
[docs] def get_tokenizer(self):
tokenizer = ClinicalTokenizer(self.spacy_pipeline)
return tokenizer.tokenizer