Wikidata:University of Virginia/Listeria/UVa people/Common ngrams in titles of UVA-coauthored publications without P921 (main subject) statement

This list is periodically updated by a bot. Manual changes to the list will be removed on the next update!

SELECT ?item (SAMPLE(?word) AS ?ngram) #?count ?l #(CONCAT("[https://query.wikidata.org/#SELECT%20%0A%20%20DISTINCT%20%3Fitem%20%3Ftitle%0A%20%20%28REPLACE%28STR%28%3Fitem%29%2C%20%22.%2aQ%22%2C%20%22Q%22%29%20AS%20%3Fqid%29%20%0A%20%20%28%22P921%22%20AS%20%3Fproperty%29%0A%20%20%28%22Q202864%22%20AS%20%3FReplaceWithYourTargetID%29%0A%20%20%28%22S887%22%20AS%20%3Fheuristic%29%0A%20%20%28%22Q69652283%22%20AS%20%3Fdeduced%29%0A%0AWHERE%20%7B%0A%20%20hint%3AQuery%20hint%3Aoptimizer%20%22None%22.%0A%20%20%0A%7B%20%20SERVICE%20wikibase%3Amwapi%20%7B%0A%20%20%20%20bd%3AserviceParam%20wikibase%3Aapi%20%22Search%22%3B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20wikibase%3Aendpoint%20%22www.wikidata.org%22%3B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20mwapi%3Asrsearch%20%22", ENCODE_FOR_URI(?word), "%20and%20haswbstatement%3AP31%3DQ13442814%20-haswbstatement%3AP921%3DQ202864%22.%0A%20%20%20%20%20%20%3Fpage_title%20wikibase%3AapiOutput%20mwapi%3Atitle.%0A%20%20%7D%0A%20%7D%0AUNION%0A%7B%20%20SERVICE%20wikibase%3Amwapi%20%7B%0A%20%20%20%20bd%3AserviceParam%20wikibase%3Aapi%20%22Search%22%3B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20wikibase%3Aendpoint%20%22www.wikidata.org%22%3B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20mwapi%3Asrsearch%20%22", ENCODE_FOR_URI(?word), "%20-and%20haswbstatement%3AP31%3DQ13442814%20-haswbstatement%3AP921%3DQ202864%22.%0A%20%20%20%20%20%20%3Fpage_title%20wikibase%3AapiOutput%20mwapi%3Atitle.%0A%20%20%7D%0A%20%7D%0A%20%20BIND%28IRI%28CONCAT%28STR%28wd%3A%29%2C%20%3Fpage_title%29%29%20AS%20%3Fitem%29%0A%20%20%0A%20%20%3Fitem%20wdt%3AP31%20wd%3AQ13442814%3B%0A%20%20%20%20%20%20%20%20wdt%3AP1476%20%3Ftitle.%0A%20%20FILTER%20CONTAINS%28LCASE%28%3Ftitle%29%2C%20%22", ENCODE_FOR_URI(?word), "%22%29.%0A%0A%7D%0A 📰]") AS ?topic_string_resolver1)  #(CONCAT("[https://query.wikidata.org/#%0ASELECT%0A%20%20%23%20Number%20of%20works%20on%20the%20topic%20with%20the%20author%20name%20string%0A%20%20%3Fcount%0A%0A%20%20%23%20Number%20of%20works%20with%20the%20author%20name%20string%0A%20%20%3Fcount1%0A%0A%20%20%23%20Author%20as%20a%20string%0A%20%20%3Fauthorstring%0A%0A%20%20%23%20Build%20URL%20to%20the%20Author%20disambiguator%20tool%0A%20%20%28CONCAT%28%0A%20%20%20%20%20%20%27https%3A%2F%2Ftools.wmflabs.org%2Fauthor-disambiguator%2Fnames_oauth.php%3Fname%3D%27%2C%0A%20%20%20%20%20%20ENCODE_FOR_URI%28%3Fauthorstring%29%29%20AS%20%3Fresolver_url%29%0AWITH%20%7B%0A%20%20SELECT%20DISTINCT%20%3Fwork%20%3Fauthorstring%20WHERE%20%7B%0A%20%20%20%20%3Fwork%20wdt%3AP2093%20%3Fauthorstring%20.%0A%23%20%20%20%20%3Fwork%20wdt%3AP50%20%5B%20wdt%3AP21%20wd%3AQ6581072%20%5D.%0A%23%20%20%20%20%3Fwork%20wdt%3AP50%20%5Bwdt%3AP166%20%5B%5D%5D.%0A%23%20%20%20%20%3Fwork%20wdt%3AP921%20%5Bwdt%3AP625%20%5B%5D%5D.%0A%20%20%20%20%7B%20%3Fwork%20wdt%3AP1476%20%3Ftitle%20.%20%7D%20%0A%20%20%20%20FILTER%20CONTAINS%28LCASE%28%3Ftitle%29%2C%20%22", ENCODE_FOR_URI(?word), "%22%29.%20%20%0A%20%20%7D%0A%20%20LIMIT%20100%0A%7D%20AS%20%25works%0AWITH%20%7B%0A%20%20SELECT%0A%20%20%20%20%28COUNT%28DISTINCT%20%3Fwork%29%20AS%20%3Fcount%29%0A%20%20%20%20%28COUNT%28DISTINCT%20%3Fwork1%29%20AS%20%3Fcount1%29%0A%20%20%20%20%3Fauthorstring%0A%20%20WHERE%20%7B%0A%20%20%20%20INCLUDE%20%25works%0A%20%20%20%20%3Fwork1%20wdt%3AP2093%20%3Fauthorstring%20.%0A%20%20%7D%0A%20%20GROUP%20BY%20%3Fauthorstring%0A%7D%20AS%20%25result%0AWHERE%20%7B%0A%20%20INCLUDE%20%25result%0A%7D%0AORDER%20BY%20DESC%28%3Fcount%29%20DESC%28%3Fcount1%29%0A%23LIMIT%2050%0A%0A%20%20%0A 🙋]") AS ?topic_string_resolver2)   WITH { # Generating a list of regexes to look for the nv-th word in a string                                                                 SELECT ?re1 ?re2 ?nv { ?n wdt:P5176 []; wdt:P1181 ?nv . FILTER(?nv > 0 ) FILTER( ?nv < 51)      BIND( CONCAT( "^([^ ]+ ){", str( ?nv - 1 ),"}([^ ]+) .*") as ?re1)      BIND( CONCAT( "^([^ ]+ ){", str( ?nv + 1),"}([^ ]+) .*") as ?re2)                      } } as %regexscaffold  WITH { # Generating a list of titles to be analyzed   SELECT ?title ?x ?input    { # Choosing the entities whose titles are to be analyzed       ?x wdt:P31 wd:Q13442814 ;          wdt:P1476 ?title.       ?x wdt:P50 ?author .       { ?author ?property wd:Q213439. }       UNION       {         ?author ?property2 _:b9.         _:b9 wdt:P361 wd:Q213439.       }      FILTER NOT EXISTS {?x wdt:P921 ?topic}      FILTER(lang(?title)="en")       # Basic processing of the titles       BIND (CONCAT("::: ::: ::: ::: ", # add start codon to assist with processing of n-grams at beginning of title                             REPLACE(str(?title),"[\\.:,;\\[\\]()$]",""), # remove some frequent special characters                             " ;;; ;;; ;;; ;;;") # add stop codon to assist with processing of n-grams at end of title                      as ?input )    }  LIMIT 500 } as %titlelist  WITH { # Applying the regexes to the titles to extract ngrams, and counting occurrences of the ngrams across titles   SELECT DISTINCT ?word                    (COUNT(DISTINCT ?title) as ?count) (SAMPLE(DISTINCT ?x) AS ?item) ?l                   {                      INCLUDE %regexscaffold  INCLUDE %titlelist                             BIND( LCASE(REPLACE(?input, ?re1, "$1")) as ?0gram)                              BIND( LCASE(REPLACE(?input, ?re1, "$2")) as ?1gram)                              BIND( LCASE(REPLACE(?input, ?re2, "$1")) as ?0gram1)                              BIND( LCASE(REPLACE(?input, ?re2, "$2")) as ?1gram1)                              BIND( (CONCAT(?0gram, " ", ?1gram, " ", ?0gram1, " ", ?1gram1)) as ?word)  FILTER (!(CONTAINS(?word, "::: "))) # this filters out any n-gram where n != 4, from the start of the title FILTER (!(CONTAINS(?word, " ;;;"))) # this filters out any n-gram where n != 4, from the end of the title                             BIND(STRLEN(?word) AS ?l)  #                            FILTER (?l > 4 )                     }   GROUP BY ?word  ?count ?item ?l #  HAVING(?count > 2) #  ORDER BY DESC(?count) #  LIMIT 250 } as %words  WHERE {   INCLUDE %words  } GROUP BY ?item ?ngram #?count ?l ?topic_string_resolver1 ?topic_string_resolver2 ORDER BY DESC(?count) DESC(?l) LIMIT 250

ngram	Sample article with this phrase
	Q27012177
	Q27012170
	Q27012154
	Q27012145
	Q27012147
	Q27012143
	Q27010700
	Q27010663
	Q27010482
	Q27009533
	Q27009451
	Q27008522
	Q27006807
	Q27005584
	Q27004673
	Q27001569
	Q27001059
	Q27000924
	Q27000084
	Q26999723
	Q26997922
	Q26995671
	Q26991915
	Q26865692
	Q26865230
	Q26864916
	Q26862618
	Q26862578
	Q26853126
	Q26849747
	Q26830368
	Q26830333
	Q26829985
	Q26829142
	Q26828775
	Q26828642
	Q26827810
	Q26827619
	Q26827564
	Q26827485
	Q26825328
	Q26825229
	Q26824575
	Q26824043
	Q26821892
	Q26820854
	Q26781699
	Q26779597
	Q26777120
	Q26768541
	Q26751988
	Q26748775
	Q26747749
	Q26741877
	Q26269950
	Q26269944
	Q25257840
	Q24603102
	Q24601072
	Q24596960
	Q24594772
	Q24594232
	Q24560053
	Q24545347
	Q24540045
	Q24536310
	Q24530904
	Q24338206
	Q24323391
	Q24319024
	Q24317765
	Q24317706
	Q24316125
	Q24312307
	Q24303887
	Q24298274
	Q24296060
	Q24292885
	Q24292371
	Q24289368
	Q24273234
	Q24201442
	Q24195075
	Q23000444
	Q22337365
	Q22337356
	Q22337235
	Q22336977
	Q22336967
	Q22299336
	Q22254205
	Q22122521
	Q22122167
	Q22066251
	Q22065892
	Q22000581
	Q21709322
	Q21707743
	Q21707754
	Q21707613
	Q21706731
	Q21706628
	Q21706646
	Q21706643
	Q21706625
	Q21706460
	Q21706485
	Q21706459
	Q21706453
	Q21706455
	Q21706444
	Q21706439
	Q21559736
	Q21558637
	Q21558458
	Q21184027
	Q21183896
	Q21145823
	Q21145769
	Q21145722
	Q21145267
	Q21144663
	Q21144219
	Q21135570
	Q21134959
	Q21129379
	Q21128974
	Q21128967
	Q21092580
	Q21092496
	Q21090665
	Q7767118
	Q7747319
	Q27012203
	Q27012359
	Q27012469
	Q27012495
	Q27012500
	Q27012516
	Q27012561
	Q27012759
	Q27012963
	Q27013707
	Q27013806
	Q27013816
	Q27016115
	Q27016503
	Q27016572
	Q27016785
	Q27017220
	Q27018978
	Q27020037
	Q27025151
	Q27025437
	Q27132103
	Q27136964
	Q27276923
	Q27300348
	Q27300981
	Q27304399
	Q27306784
	Q27308784
	Q27308852
	Q27309551
	Q27313323
	Q27315058
	Q27316277
	Q27316592
	Q27316745
	Q27316883
	Q27316890
	Q27318554
	Q27323007
	Q27323026
	Q27325399
	Q27329469
	Q27333071
	Q27333232
	Q27333508
	Q27333606
	Q27333676
	Q27334577
	Q27334785
	Q27334852
	Q27334868
	Q27335313
	Q27335527
	Q27335998
	Q27336309
	Q27339211
	Q27339550
	Q27339572
	Q27340160
	Q27340392
	Q27342528
	Q27342659
	Q27342699
	Q27345023
	Q27345269
	Q27346347
	Q27347232
	Q27347241
	Q27348469
	Q27349493
	Q27349997
	Q27350056
	Q27350445
	Q27441070
	Q27443139
	Q27444296
	Q27444420
	Q27445657
	Q27446505
	Q27448063
	Q27448681
	Q27449516
	Q27449952
	Q27450079
	Q27450532
	Q27450792
	Q27451722
	Q27452708
	Q27454610
	Q27455918
	Q27619697
	Q27622743
	Q27625279
	Q27628569
	Q27631527
	Q27632643
	Q27635143
	Q27637274
	Q27637702
	Q27640255
	Q27640459
	Q27641464
	Q27641621
	Q27641772
	Q27656975
	Q27657133
	Q27657521
	Q27658478
	Q27660062
	Q27660095
	Q27660843
	Q27662177
	Q27666353
	Q27667453
	Q27672369
	Q27674687

∑ 250 items.

End of automatically generated list.