Source code for medacy.pipelines.drug_event_pipeline

import spacy, sklearn_crfsuite
from .base import BasePipeline
from medacy.model.feature_extractor import FeatureExtractor

from ..pipeline_components import GoldAnnotatorComponent, MetaMapComponent, CharacterTokenizer
from ..pipeline_components.lexicon import LexiconComponent

[docs]class DrugEventPipeline(BasePipeline): def __init__(self, metamap=None, entities=[], lexicon={}): """ Init a pipeline for processing data related to identifying adverse drug events :param metamap: instance of MetaMap :param entities: entities to be identified, for this pipeline adverse drug events :param lexicon: Dictionary with labels and their corresponding lexicons to match on """ description= "Pipeline for recognition of adverse drug events from the 2018/19 FDA OSE drug label challenge" super().__init__("drug_event_pipeline", spacy_pipeline=spacy.load("en_core_web_sm"), description=description, creators="Corey Sutphin", organization="NLP@VCU") self.entities = entities #self.spacy_pipeline.tokenizer = self.get_tokenizer() # Currently using SpaCy's default tokenizer self.add_component(GoldAnnotatorComponent, entities) # add overlay for GoldAnnotation if metamap is not None: self.add_component(MetaMapComponent, metamap, semantic_type_labels=['sosy', 'phpr', 'orga', 'npop', 'mobd', 'inpo', 'comd', 'biof', 'bdsu', 'acab']) if lexicon is not None: self.add_component(LexiconComponent, lexicon)
[docs] def get_learner(self): return ("CRF_l2sgd", sklearn_crfsuite.CRF( algorithm='l2sgd', c2=0.1, max_iterations=100, all_possible_transitions=True ))
[docs] def get_tokenizer(self): tokenizer = CharacterTokenizer(self.spacy_pipeline) return tokenizer.tokenizer
[docs] def get_feature_extractor(self): extractor = FeatureExtractor(window_size=3, spacy_features=['pos_', 'shape_', 'prefix_', 'suffix_', 'like_num', 'text', 'head']) return extractor