Wikidata:University of Virginia/Listeria/UVa people/Common ngrams in titles of UVA-coauthored publications without P921 (main subject) statement

This list is periodically updated by a bot. Manual changes to the list will be removed on the next update!

WDQS | PetScan | TABernacle | Find images | Recent changes | Query: SELECT ?item (SAMPLE(?word) AS ?ngram) #?count ?l #(CONCAT("[https://query.wikidata.org/#SELECT%20%0A%20%20DISTINCT%20%3Fitem%20%3Ftitle%0A%20%20%28REPLACE%28STR%28%3Fitem%29%2C%20%22.%2aQ%22%2C%20%22Q%22%29%20AS%20%3Fqid%29%20%0A%20%20%28%22P921%22%20AS%20%3Fproperty%29%0A%20%20%28%22Q202864%22%20AS%20%3FReplaceWithYourTargetID%29%0A%20%20%28%22S887%22%20AS%20%3Fheuristic%29%0A%20%20%28%22Q69652283%22%20AS%20%3Fdeduced%29%0A%0AWHERE%20%7B%0A%20%20hint%3AQuery%20hint%3Aoptimizer%20%22None%22.%0A%20%20%0A%7B%20%20SERVICE%20wikibase%3Amwapi%20%7B%0A%20%20%20%20bd%3AserviceParam%20wikibase%3Aapi%20%22Search%22%3B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20wikibase%3Aendpoint%20%22www.wikidata.org%22%3B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20mwapi%3Asrsearch%20%22", ENCODE_FOR_URI(?word), "%20and%20haswbstatement%3AP31%3DQ13442814%20-haswbstatement%3AP921%3DQ202864%22.%0A%20%20%20%20%20%20%3Fpage_title%20wikibase%3AapiOutput%20mwapi%3Atitle.%0A%20%20%7D%0A%20%7D%0AUNION%0A%7B%20%20SERVICE%20wikibase%3Amwapi%20%7B%0A%20%20%20%20bd%3AserviceParam%20wikibase%3Aapi%20%22Search%22%3B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20wikibase%3Aendpoint%20%22www.wikidata.org%22%3B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20mwapi%3Asrsearch%20%22", ENCODE_FOR_URI(?word), "%20-and%20haswbstatement%3AP31%3DQ13442814%20-haswbstatement%3AP921%3DQ202864%22.%0A%20%20%20%20%20%20%3Fpage_title%20wikibase%3AapiOutput%20mwapi%3Atitle.%0A%20%20%7D%0A%20%7D%0A%20%20BIND%28IRI%28CONCAT%28STR%28wd%3A%29%2C%20%3Fpage_title%29%29%20AS%20%3Fitem%29%0A%20%20%0A%20%20%3Fitem%20wdt%3AP31%20wd%3AQ13442814%3B%0A%20%20%20%20%20%20%20%20wdt%3AP1476%20%3Ftitle.%0A%20%20FILTER%20CONTAINS%28LCASE%28%3Ftitle%29%2C%20%22", ENCODE_FOR_URI(?word), "%22%29.%0A%0A%7D%0A 📰]") AS ?topic_string_resolver1) #(CONCAT("[https://query.wikidata.org/#%0ASELECT%0A%20%20%23%20Number%20of%20works%20on%20the%20topic%20with%20the%20author%20name%20string%0A%20%20%3Fcount%0A%0A%20%20%23%20Number%20of%20works%20with%20the%20author%20name%20string%0A%20%20%3Fcount1%0A%0A%20%20%23%20Author%20as%20a%20string%0A%20%20%3Fauthorstring%0A%0A%20%20%23%20Build%20URL%20to%20the%20Author%20disambiguator%20tool%0A%20%20%28CONCAT%28%0A%20%20%20%20%20%20%27https%3A%2F%2Ftools.wmflabs.org%2Fauthor-disambiguator%2Fnames_oauth.php%3Fname%3D%27%2C%0A%20%20%20%20%20%20ENCODE_FOR_URI%28%3Fauthorstring%29%29%20AS%20%3Fresolver_url%29%0AWITH%20%7B%0A%20%20SELECT%20DISTINCT%20%3Fwork%20%3Fauthorstring%20WHERE%20%7B%0A%20%20%20%20%3Fwork%20wdt%3AP2093%20%3Fauthorstring%20.%0A%23%20%20%20%20%3Fwork%20wdt%3AP50%20%5B%20wdt%3AP21%20wd%3AQ6581072%20%5D.%0A%23%20%20%20%20%3Fwork%20wdt%3AP50%20%5Bwdt%3AP166%20%5B%5D%5D.%0A%23%20%20%20%20%3Fwork%20wdt%3AP921%20%5Bwdt%3AP625%20%5B%5D%5D.%0A%20%20%20%20%7B%20%3Fwork%20wdt%3AP1476%20%3Ftitle%20.%20%7D%20%0A%20%20%20%20FILTER%20CONTAINS%28LCASE%28%3Ftitle%29%2C%20%22", ENCODE_FOR_URI(?word), "%22%29.%20%20%0A%20%20%7D%0A%20%20LIMIT%20100%0A%7D%20AS%20%25works%0AWITH%20%7B%0A%20%20SELECT%0A%20%20%20%20%28COUNT%28DISTINCT%20%3Fwork%29%20AS%20%3Fcount%29%0A%20%20%20%20%28COUNT%28DISTINCT%20%3Fwork1%29%20AS%20%3Fcount1%29%0A%20%20%20%20%3Fauthorstring%0A%20%20WHERE%20%7B%0A%20%20%20%20INCLUDE%20%25works%0A%20%20%20%20%3Fwork1%20wdt%3AP2093%20%3Fauthorstring%20.%0A%20%20%7D%0A%20%20GROUP%20BY%20%3Fauthorstring%0A%7D%20AS%20%25result%0AWHERE%20%7B%0A%20%20INCLUDE%20%25result%0A%7D%0AORDER%20BY%20DESC%28%3Fcount%29%20DESC%28%3Fcount1%29%0A%23LIMIT%2050%0A%0A%20%20%0A 🙋]") AS ?topic_string_resolver2) WITH { # Generating a list of regexes to look for the nv-th word in a string SELECT ?re1 ?re2 ?nv { ?n wdt:P5176 []; wdt:P1181 ?nv . FILTER(?nv > 0 ) FILTER( ?nv < 51) BIND( CONCAT( "^([^ ]+ ){", str( ?nv - 1 ),"}([^ ]+) .*") as ?re1) BIND( CONCAT( "^([^ ]+ ){", str( ?nv + 1),"}([^ ]+) .*") as ?re2) } } as %regexscaffold WITH { # Generating a list of titles to be analyzed SELECT ?title ?x ?input { # Choosing the entities whose titles are to be analyzed ?x wdt:P31 wd:Q13442814 ; wdt:P1476 ?title. ?x wdt:P50 ?author . { ?author ?property wd:Q213439. } UNION { ?author ?property2 _:b9. _:b9 wdt:P361 wd:Q213439. } FILTER NOT EXISTS {?x wdt:P921 ?topic} FILTER(lang(?title)="en") # Basic processing of the titles BIND (CONCAT("::: ::: ::: ::: ", # add start codon to assist with processing of n-grams at beginning of title REPLACE(str(?title),"[\\.:,;\\[\\]()$]",""), # remove some frequent special characters " ;;; ;;; ;;; ;;;") # add stop codon to assist with processing of n-grams at end of title as ?input ) } LIMIT 500 } as %titlelist WITH { # Applying the regexes to the titles to extract ngrams, and counting occurrences of the ngrams across titles SELECT DISTINCT ?word (COUNT(DISTINCT ?title) as ?count) (SAMPLE(DISTINCT ?x) AS ?item) ?l { INCLUDE %regexscaffold INCLUDE %titlelist BIND( LCASE(REPLACE(?input, ?re1, "$1")) as ?0gram) BIND( LCASE(REPLACE(?input, ?re1, "$2")) as ?1gram) BIND( LCASE(REPLACE(?input, ?re2, "$1")) as ?0gram1) BIND( LCASE(REPLACE(?input, ?re2, "$2")) as ?1gram1) BIND( (CONCAT(?0gram, " ", ?1gram, " ", ?0gram1, " ", ?1gram1)) as ?word) FILTER (!(CONTAINS(?word, "::: "))) # this filters out any n-gram where n != 4, from the start of the title FILTER (!(CONTAINS(?word, " ;;;"))) # this filters out any n-gram where n != 4, from the end of the title BIND(STRLEN(?word) AS ?l) # FILTER (?l > 4 ) } GROUP BY ?word ?count ?item ?l # HAVING(?count > 2) # ORDER BY DESC(?count) # LIMIT 250 } as %words WHERE { INCLUDE %words } GROUP BY ?item ?ngram #?count ?l ?topic_string_resolver1 ?topic_string_resolver2 ORDER BY DESC(?count) DESC(?l) LIMIT 250
ngram Sample article with this phrase
Q27012177
Q27012170
Q27012154
Q27012145
Q27012147
Q27012143
Q27010700
Q27010663
Q27010482
Q27009533
Q27009451
Q27008522
Q27006807
Q27005584
Q27004673
Q27001569
Q27001059
Q27000924
Q27000084
Q26999723
Q26997922
Q26995671
Q26991915
Q26865692
Q26865230
Q26864916
Q26862618
Q26862578
Q26853126
Q26849747
Q26830368
Q26830333
Q26829985
Q26829142
Q26828775
Q26828642
Q26827810
Q26827619
Q26827564
Q26827485
Q26825328
Q26825229
Q26824575
Q26824043
Q26821892
Q26820854
Q26781699
Q26779597
Q26777120
Q26768541
Q26751988
Q26748775
Q26747749
Q26741877
Q26269950
Q26269944
Q25257840
Q24603102
Q24601072
Q24596960
Q24594772
Q24594232
Q24560053
Q24545347
Q24540045
Q24536310
Q24530904
Q24338206
Q24323391
Q24319024
Q24317765
Q24317706
Q24316125
Q24312307
Q24303887
Q24298274
Q24296060
Q24292885
Q24292371
Q24289368
Q24273234
Q24201442
Q24195075
Q23000444
Q22337365
Q22337356
Q22337235
Q22336977
Q22336967
Q22299336
Q22254205
Q22122521
Q22122167
Q22066251
Q22065892
Q22000581
Q21709322
Q21707743
Q21707754
Q21707613
Q21706731
Q21706628
Q21706646
Q21706643
Q21706625
Q21706460
Q21706485
Q21706459
Q21706453
Q21706455
Q21706444
Q21706439
Q21559736
Q21558637
Q21558458
Q21184027
Q21183896
Q21145823
Q21145769
Q21145722
Q21145267
Q21144663
Q21144219
Q21135570
Q21134959
Q21129379
Q21128974
Q21128967
Q21092580
Q21092496
Q21090665
Q7767118
Q7747319
Q27012203
Q27012359
Q27012469
Q27012495
Q27012500
Q27012516
Q27012561
Q27012759
Q27012963
Q27013707
Q27013806
Q27013816
Q27016115
Q27016503
Q27016572
Q27016785
Q27017220
Q27018978
Q27020037
Q27025151
Q27025437
Q27132103
Q27136964
Q27276923
Q27300348
Q27300981
Q27304399
Q27306784
Q27308784
Q27308852
Q27309551
Q27313323
Q27315058
Q27316277
Q27316592
Q27316745
Q27316883
Q27316890
Q27318554
Q27323007
Q27323026
Q27325399
Q27329469
Q27333071
Q27333232
Q27333508
Q27333606
Q27333676
Q27334577
Q27334785
Q27334852
Q27334868
Q27335313
Q27335527
Q27335998
Q27336309
Q27339211
Q27339550
Q27339572
Q27340160
Q27340392
Q27342528
Q27342659
Q27342699
Q27345023
Q27345269
Q27346347
Q27347232
Q27347241
Q27348469
Q27349493
Q27349997
Q27350056
Q27350445
Q27441070
Q27443139
Q27444296
Q27444420
Q27445657
Q27446505
Q27448063
Q27448681
Q27449516
Q27449952
Q27450079
Q27450532
Q27450792
Q27451722
Q27452708
Q27454610
Q27455918
Q27619697
Q27622743
Q27625279
Q27628569
Q27631527
Q27632643
Q27635143
Q27637274
Q27637702
Q27640255
Q27640459
Q27641464
Q27641621
Q27641772
Q27656975
Q27657133
Q27657521
Q27658478
Q27660062
Q27660095
Q27660843
Q27662177
Q27666353
Q27667453
Q27672369
Q27674687

∑ 250 items.

End of automatically generated list.