33# Define a set of indicators with the kind of filtering/summariation we want 
44# 
55# Academic Language, Latinate Words, Low Frequency Words, Adjectives, Adverbs, 
6- #    Sentences, Paragraphs --   
6+ #    Sentences, Paragraphs -- 
77#    just need to have lexicalfeatures in the pipeline to run. 
88# 
99# Transition Words, Ordinal Transition Words -- 
3535    ('Statements of Opinion' , 'Doc' , 'vwp_statements_of_opinion' , None , 'percent' ),
3636    ('Statements of Fact' , 'Doc' , 'vwp_statements_of_fact' , None , 'percent' ),
3737    # Transitions 
38-     # eventually we want to exclude \n\n as transitions using `[('!=',['introductory'])]` 
38+     # eventually we want to exclude \n\n as transitions using `[('!=',  ['introductory'])]` 
3939    # however the introductory category also includes "let us" and "let's" 
4040    # no highlighting is shown on the new lines, so we won't remove it for now. 
4141    ('Transition Words' , 'Doc' , 'transitions' , None , 'counts' ),
4242    # 
43-     ('Positive Transition Words' , 'Doc' , 'transitions' ,[('==' ,['positive' ])], 'total' ),
44-     ('Conditional Transition Words' , 'Doc' , 'transitions' ,[('==' ,['conditional' ])], 'total' ),
45-     ('Consequential Transition Words' , 'Doc' , 'transitions' ,[('==' ,['consequential' ])], 'total' ),
46-     ('Contrastive Transition Words' , 'Doc' , 'transitions' ,[('==' ,['contrastive' ])], 'total' ),
47-     ('Counterpoint Transition Words' , 'Doc' , 'transitions' ,[('==' ,['counterpoint' ])], 'total' ),
48-     ('Comparative Transition Words' , 'Doc' , 'transitions' ,[('==' ,['comparative' ])], 'total' ),
49-     ('Cross Referential Transition Words' , 'Doc' , 'transitions' ,[('==' ,['crossreferential' ])], 'total' ),
50-     ('Illustrative Transition Words' , 'Doc' , 'transitions' ,[('==' ,['illustrative' ])], 'total' ),
51-     ('Negative Transition Words' , 'Doc' , 'transitions' ,[('==' ,['negative' ])], 'total' ),
52-     ('Emphatic Transition Words' , 'Doc' , 'transitions' ,[('==' ,['emphatic' ])], 'total' ),
53-     ('Evenidentiary Transition Words' , 'Doc' , 'transitions' ,[('==' ,['evidentiary' ])], 'total' ),
54-     ('General Transition Words' , 'Doc' , 'transitions' ,[('==' ,['general' ])], 'total' ),
55-     ('Ordinal Transition Words' , 'Doc' , 'transitions' ,[('==' ,['ordinal' ])], 'total' ),
56-     ('Purposive Transition Words' , 'Doc' , 'transitions' ,[('==' ,['purposive' ])], 'total' ),
57-     ('Periphrastic Transition Words' , 'Doc' , 'transitions' ,[('==' ,['periphrastic' ])], 'total' ),
58-     ('Hypothetical Transition Words' , 'Doc' , 'transitions' ,[('==' ,['hypothetical' ])], 'total' ),
59-     ('Summative Transition Words' , 'Doc' , 'transitions' ,[('==' ,['summative' ])], 'total' ),
60-     ('Introductory Transition Words' , 'Doc' , 'transitions' ,[('==' ,['introductory' ])], 'total' ),
43+     ('Positive Transition Words' , 'Doc' , 'transitions' ,  [('==' ,  ['positive' ])], 'total' ),
44+     ('Conditional Transition Words' , 'Doc' , 'transitions' ,  [('==' ,  ['conditional' ])], 'total' ),
45+     ('Consequential Transition Words' , 'Doc' , 'transitions' ,  [('==' ,  ['consequential' ])], 'total' ),
46+     ('Contrastive Transition Words' , 'Doc' , 'transitions' ,  [('==' ,  ['contrastive' ])], 'total' ),
47+     ('Counterpoint Transition Words' , 'Doc' , 'transitions' ,  [('==' ,  ['counterpoint' ])], 'total' ),
48+     ('Comparative Transition Words' , 'Doc' , 'transitions' ,  [('==' ,  ['comparative' ])], 'total' ),
49+     ('Cross Referential Transition Words' , 'Doc' , 'transitions' ,  [('==' ,  ['crossreferential' ])], 'total' ),
50+     ('Illustrative Transition Words' , 'Doc' , 'transitions' ,  [('==' ,  ['illustrative' ])], 'total' ),
51+     ('Negative Transition Words' , 'Doc' , 'transitions' ,  [('==' ,  ['negative' ])], 'total' ),
52+     ('Emphatic Transition Words' , 'Doc' , 'transitions' ,  [('==' ,  ['emphatic' ])], 'total' ),
53+     ('Evenidentiary Transition Words' , 'Doc' , 'transitions' ,  [('==' ,  ['evidentiary' ])], 'total' ),
54+     ('General Transition Words' , 'Doc' , 'transitions' ,  [('==' ,  ['general' ])], 'total' ),
55+     ('Ordinal Transition Words' , 'Doc' , 'transitions' ,  [('==' ,  ['ordinal' ])], 'total' ),
56+     ('Purposive Transition Words' , 'Doc' , 'transitions' ,  [('==' ,  ['purposive' ])], 'total' ),
57+     ('Periphrastic Transition Words' , 'Doc' , 'transitions' ,  [('==' ,  ['periphrastic' ])], 'total' ),
58+     ('Hypothetical Transition Words' , 'Doc' , 'transitions' ,  [('==' ,  ['hypothetical' ])], 'total' ),
59+     ('Summative Transition Words' , 'Doc' , 'transitions' ,  [('==' ,  ['summative' ])], 'total' ),
60+     ('Introductory Transition Words' , 'Doc' , 'transitions' ,  [('==' ,  ['introductory' ])], 'total' ),
6161    # pos_ 
62-     ('Adjectives' , 'Token' , 'pos_' , [('==' ,['ADJ' ])], 'total' ),
63-     ('Adverbs' , 'Token' , 'pos_' , [('==' ,['ADV' ])], 'total' ),
64-     ('Nouns' , 'Token' , 'pos_' , [('==' ,['NOUN' ])], 'total' ),
65-     ('Proper Nouns' , 'Token' , 'pos_' , [('==' ,['PROPN' ])], 'total' ),
66-     ('Verbs' , 'Token' , 'pos_' , [('==' ,['VERB' ])], 'total' ),
67-     ('Numbers' , 'Token' , 'pos_' , [('==' ,['NUM' ])], 'total' ),
68-     ('Prepositions' , 'Token' , 'pos_' , [('==' ,['ADP' ])], 'total' ),
69-     ('Coordinating Conjunction' , 'Token' , 'pos_' , [('==' ,['CCONJ' ])], 'total' ),
70-     ('Subordinating Conjunction' , 'Token' , 'pos_' , [('==' ,['SCONJ' ])], 'total' ),
71-     ('Auxiliary Verb' , 'Token' , 'pos_' , [('==' ,['AUX' ])], 'total' ),
72-     ('Pronoun' , 'Token' , 'pos_' , [('==' ,['PRON' ])], 'total' ),
62+     ('Adjectives' , 'Token' , 'pos_' , [('==' ,  ['ADJ' ])], 'total' ),
63+     ('Adverbs' , 'Token' , 'pos_' , [('==' ,  ['ADV' ])], 'total' ),
64+     ('Nouns' , 'Token' , 'pos_' , [('==' ,  ['NOUN' ])], 'total' ),
65+     ('Proper Nouns' , 'Token' , 'pos_' , [('==' ,  ['PROPN' ])], 'total' ),
66+     ('Verbs' , 'Token' , 'pos_' , [('==' ,  ['VERB' ])], 'total' ),
67+     ('Numbers' , 'Token' , 'pos_' , [('==' ,  ['NUM' ])], 'total' ),
68+     ('Prepositions' , 'Token' , 'pos_' , [('==' ,  ['ADP' ])], 'total' ),
69+     ('Coordinating Conjunction' , 'Token' , 'pos_' , [('==' ,  ['CCONJ' ])], 'total' ),
70+     ('Subordinating Conjunction' , 'Token' , 'pos_' , [('==' ,  ['SCONJ' ])], 'total' ),
71+     ('Auxiliary Verb' , 'Token' , 'pos_' , [('==' ,  ['AUX' ])], 'total' ),
72+     ('Pronoun' , 'Token' , 'pos_' , [('==' ,  ['PRON' ])], 'total' ),
7373    # sentence variety 
7474    ('Sentence Types' , 'Doc' , 'sentence_types' , None , 'counts' ),
75-     ('Simple Sentences' , 'Doc' , 'sentence_types' ,[('==' ,['Simple' ])], 'total' ),
76-     ('Simple with Complex Predicates' , 'Doc' , 'sentence_types' ,[('==' ,['SimpleComplexPred' ])], 'total' ),
77-     ('Simple with Compound Predicates' , 'Doc' , 'sentence_types' ,[('==' ,['SimpleCompoundPred' ])], 'total' ),
78-     ('Simple with Compound Complex Predicates' , 'Doc' , 'sentence_types' ,[('==' ,['SimpleCompoundComplexPred' ])], 'total' ),
79-     ('Compound Sentences' , 'Doc' , 'sentence_types' ,[('==' ,['Compound' ])], 'total' ),
80-     ('Complex Sentences' , 'Doc' , 'sentence_types' ,[('==' ,['Complex' ])], 'total' ),
81-     ('Compound Complex Sentences' , 'Doc' , 'sentence_types' ,[('==' ,['CompoundComplex' ])], 'total' ),
75+     ('Simple Sentences' , 'Doc' , 'sentence_types' ,  [('==' ,  ['Simple' ])], 'total' ),
76+     ('Simple with Complex Predicates' , 'Doc' , 'sentence_types' ,  [('==' ,  ['SimpleComplexPred' ])], 'total' ),
77+     ('Simple with Compound Predicates' , 'Doc' , 'sentence_types' ,  [('==' ,  ['SimpleCompoundPred' ])], 'total' ),
78+     ('Simple with Compound Complex Predicates' , 'Doc' , 'sentence_types' ,  [('==' ,  ['SimpleCompoundComplexPred' ])], 'total' ),
79+     ('Compound Sentences' , 'Doc' , 'sentence_types' ,  [('==' ,  ['Compound' ])], 'total' ),
80+     ('Complex Sentences' , 'Doc' , 'sentence_types' ,  [('==' ,  ['Complex' ])], 'total' ),
81+     ('Compound Complex Sentences' , 'Doc' , 'sentence_types' ,  [('==' ,  ['CompoundComplex' ])], 'total' ),
8282    # Sources/Attributes/Citations/Quotes 
8383    ('Information Sources' , 'Token' , 'vwp_source' , None , 'percent' ),
8484    ('Attributions' , 'Token' , 'vwp_attribution' , None , 'percent' ),
8585    ('Citations' , 'Token' , 'vwp_cite' , None , 'percent' ),
8686    ('Quoted Words' , 'Token' , 'vwp_quoted' , None , 'percent' ),
8787    # Dialogue 
88-     ('Direct Speech Verbs' , 'Doc' , 'vwp_direct_speech' , None , 'percent' ), # TODO needs new label 
89-     ('Indirect Speech Quotation' , 'Token' , 'vwp_in_direct_speech' , None , 'percent' ), # TODO needs new label 
88+     ('Direct Speech Verbs' , 'Doc' , 'vwp_direct_speech' , None , 'percent' ),   # TODO needs new label 
89+     ('Indirect Speech Quotation' , 'Token' , 'vwp_in_direct_speech' , None , 'percent' ),   # TODO needs new label 
9090    # vwp_quoted - already used above 
9191    # tone 
92-     ('Positive Tone' , 'Token' , 'vwp_tone' , [('>' ,[.4 ])], 'percent' ),
93-     ('Negative Tone' , 'Token' , 'vwp_tone' , [('<' ,[- .4 ])], 'percent' ),
92+     ('Positive Tone' , 'Token' , 'vwp_tone' , [('>' ,  [.4 ])], 'percent' ),
93+     ('Negative Tone' , 'Token' , 'vwp_tone' , [('<' ,  [- .4 ])], 'percent' ),
9494    # details 
9595    ('Concrete Details' , 'Token' , 'concrete_details' , None , 'percent' ),
9696    ('Main Idea Sentences' , 'Doc' , 'main_ideas' , None , 'total' ),
9797    ('Supporting Idea Sentences' , 'Doc' , 'supporting_ideas' , None , 'total' ),
9898    ('Supporting Detail Sentences' , 'Doc' , 'supporting_details' , None , 'total' ),
9999    # Other items 
100-     ('Polysyllabic Words' , 'Token' , 'nSyll' , [('>' ,[3 ])], 'percent' ),
101-     ('Low Frequency Words' , 'Token' , 'max_freq' , [('<' ,[4 ])], 'percent' ),
100+     ('Polysyllabic Words' , 'Token' , 'nSyll' , [('>' ,  [3 ])], 'percent' ),
101+     ('Low Frequency Words' , 'Token' , 'max_freq' , [('<' ,  [4 ])], 'percent' ),
102102    ('Sentences' , 'Doc' , 'sents' , None , 'total' ),
103103    ('Paragraphs' , 'Doc' , 'delimiter_\n ' , None , 'total' ),
104104    ('Character Trait Words' , 'Token' , 'vwp_character' , None , 'percent' ),
105105    ('In Past Tense' , 'Token' , 'in_past_tense_scope' , None , 'percent' ),
106-     ('Propositional Attitudes' , 'Doc' , 'vwp_propositional_attitudes' , None , 'percent' ),  # TODO 
107-     ('Social Awareness' , 'Doc' , 'vwp_social_awareness' , None , 'percent' )  # TODO 
106+     ('Propositional Attitudes' , 'Doc' , 'vwp_propositional_attitudes' , None , 'percent' ),
107+     ('Social Awareness' , 'Doc' , 'vwp_social_awareness' , None , 'percent' )
108108]
109109
110110# Create indicator dict to easily refer to each tuple above by name 
115115    INDICATOR_W_IDS .append ((id , ) +  indicator )
116116    INDICATORS [id ] =  (id , ) +  indicator 
117117
118+ 
118119class  NLPIndicators (dataobject ):
119120    id : str 
120121    name : str 
@@ -124,5 +125,6 @@ class NLPIndicators(dataobject):
124125    function : str 
125126    # tooltip: str 
126127
128+ 
127129indicators  =  map (lambda  ind : NLPIndicators (* ind ), INDICATOR_W_IDS )
128130INDICATOR_JSONS  =  [asdict (ind ) for  ind  in  indicators ]
0 commit comments