Source code for medacy.pipelines.testing_pipeline

import spacy, sklearn_crfsuite
from .base import BasePipeline
from ..pipeline_components import ClinicalTokenizer
from medacy.model.feature_extractor import FeatureExtractor

from ..pipeline_components import GoldAnnotatorComponent


[docs]class TestingPipeline(BasePipeline): """ A pipeline for test running """ def __init__(self, entities=[]): """ Create a pipeline with the name 'clinical_pipeline' utilizing by default spaCy's small english model. :param metamap: an instance of MetaMap """ description="""Pipeline for unit tests""" super().__init__("test_pipeline", spacy_pipeline=spacy.load("en_core_web_sm"), description=description, creators="Andriy Mulyar (andriymulyar.com)", #append if multiple creators organization="NLP@VCU" ) self.entities = entities self.spacy_pipeline.tokenizer = self.get_tokenizer() #set tokenizer self.add_component(GoldAnnotatorComponent, entities) #add overlay for GoldAnnotation
[docs] def get_learner(self): return ("CRF_l2sgd", sklearn_crfsuite.CRF( algorithm='l2sgd', c2=0.1, max_iterations=100, all_possible_transitions=True ))
[docs] def get_tokenizer(self): tokenizer = ClinicalTokenizer(self.spacy_pipeline) return tokenizer.tokenizer
[docs] def get_feature_extractor(self): extractor = FeatureExtractor(window_size=3, spacy_features=['pos_', 'shape_', 'prefix_', 'suffix_', 'text']) return extractor