Wikidata:WikiProject 20th Century Press Archives/Use cases

 

Home

 

Data Structure

 

Data Sources

 

Use Cases

 

Tools & Tasks

 

Statistics

 

All PM20 folders currently linked to Wikidata, by type

edit

Queried from the Wikidata SPARQL endpoint:

Queries for person folders

edit

Map of economists in PM20 by place of birth

edit

per Wikidata query:

#defaultView:Map{"hide":"?geo"}
select ?image ?item ?itemLabel ?pobLabel ?viewer ?geo
where {
  # occupation: economist
  ?item wdt:P106 wd:Q188094 ;
        # with PM20 id
        wdt:P4293 ?pm20Id ;
        # with place of birth
        wdt:P19 ?pob .
  # restrict to items with online accessible articles
  ?item p:P4293/pq:P5592 ?workCount .
  filter(?workCount > 0)
  # geo coordinates for pob
  ?pob wdt:P625 ?geo .
  # optional image
  optional { ?item wdt:P18 ?image . }
  # viewer link
  bind(uri(concat('https://pm20.zbw.eu/dfgview/', ?pm20Id)) as ?viewer)
  # add labels
  service wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en, de, fr, es, nl, pl, ru". }
}
Try it!

Gender bias in the PM20 persons archives

edit

Gender assignment from Wikidata (empty gender labels mean: unknown)

select (count(?item) as ?count) ?gender ?genderLabel
where {
  # persons
  ?item wdt:P31 wd:Q5 ;
        # with PM20 id
        wdt:P4293 ?pm20Id .
  # restrict to items with online accessible articles
  ?item p:P4293/pq:P5592 ?workCount .
  filter(?workCount > 0)
  # get gender (if available)
  optional { ?item wdt:P21 ?gender . }
  # add labels
  service wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en, de". }
}
group by ?gender ?genderLabel
Try it!

Participants of expeditions

edit

per Wikidata query:

select distinct ?item ?itemLabel ?pm20 ?viewer ?workCount
where {
  # all research expeditions
  ##?expedition wdt:P31 wd:Q366301 .
  # alternatively: all expeditions of any type
  ?expedition p:P31/ps:P31/wdt:P279* wd:Q2401485 .
  ?item wdt:P1344 ?expedition ;
        # with PM20 id
        wdt:P4293 ?pm20Id .
  # restrict to items with online accessible articles
  ?item p:P4293/pq:P5592 ?workCount .
  filter(?workCount > 0)
  # optional image
  optional { ?item wdt:P18 ?image . }
  # PM20 Link
  bind(uri(concat('http://purl.org/pressemappe20/folder/', ?pm20Id)) as ?pm20)
  # viewer link
  bind(substr(?pm20Id, 4, 4) as ?numStub)
  bind(substr(?pm20Id, 4, 6) as ?num)
  bind(uri(concat('http://dfg-viewer.de/show/?tx_dlf[id]=http://zbw.eu/beta/pm20mets/', ?numStub, 'xx/', ?num, '.xml')) as ?viewer)
  # add labels
  service wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en, de, fr, es, nl, pl, ru". }
}
order by desc (?workCount)
Try it!

Heads of state

edit

per Wikidata query. The base set is derived from a federated query to the PM20 endpoint, because the "field of activity" property is up to now not completely migrated to Wikidata.

PREFIX schema: <http://schema.org/>
PREFIX zbwext: <http://zbw.eu/namespaces/zbw-extensions/>
#
select distinct ?item ?itemLabel ?pm20 ?viewer ?workCount
where {
  # get the basic set of persons with "field of activity"
  # "Staatsoberhaupt" from PM20 endpoint
  service <http://zbw.eu/beta/sparql/pm20/query> {
    ?pm20 zbwext:activity/schema:about "Head of state"@en .
    bind(strafter(str(?pm20), 'http://purl.org/pressemappe20/folder/') as ?pm20Id)
  }
  ?item wdt:P4293 ?pm20Id .
  #
  # restrict to items with online accessible articles
  ?item p:P4293/pq:P5592 ?workCount .
  filter(?workCount > 0)
  # viewer link
  bind(substr(?pm20Id, 4, 4) as ?numStub)
  bind(substr(?pm20Id, 4, 6) as ?num)
  bind(uri(concat('http://dfg-viewer.de/show/?tx_dlf[id]=http://zbw.eu/beta/pm20mets/', ?numStub, 'xx/', ?num, '.xml')) as ?viewer)
  # add labels
  service wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE], en, de, fr, es, nl, pl, ru" . }
}
order by ?itemLabel
Try it!

The federated query currently takes 15-20 seconds. If, for some purposes, this is too much, it can be accelerated to a fraction of this by putting the IDs resulting from the "service" statement into a "values" clause.

Queries for PM20 subject categories

edit

All subject categories

edit

The following query uses these:

  • Properties: instance of (P31)     , PM20 subject code (P8484)     , series ordinal (P1545)     
    select distinct ?item ?subjectCode ?itemLabel ?pm20Category
    where {
      ?item wdt:P31 wd:Q92707903 ;
            wdt:P8484 ?subjectCode .
    
      # extend for sorting
      ?item p:P8484 ?statement .
      ?statement ps:P8484 ?subjectCode ;
                 pq:P1545 ?sortCode .
      
      # construct URI for category page access
      bind(uri(concat('https://pm20.zbw.eu/category/subject/s/', ?subjectCode)) as ?pm20Category)
    
      SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
    }
    order by  ?sortCode
    

Queries for PM20 "country" categories

edit

All geo codes

edit

The following query uses these:

  • Properties: PM20 geo code (P8483)     , series ordinal (P1545)     
    select distinct ?geoCode ?item ?itemLabel ?pm20Category
    where {
      ?item wdt:P8483 ?geoCode .
    
      # extend for sorting
      ?item p:P8483 ?statement .
      ?statement ps:P8483 ?geoCode ;
                 pq:P1545 ?sortCode .
      
      # construct URI for category page access
      bind(uri(concat('https://pm20.zbw.eu/category/geo/s/', ?geoCode)) as ?pm20Category)
    
      SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
    }
    order by ?sortCode
    

Queries for PM20 country/subject folders

edit

Subject folders map

edit

The following query uses these:

Features: map (Q24515275)     

#defaultView:Map{"hide": [ "?geoLocation", "?layer", "?count" ], "markercluster": true}
select distinct ?count ?countryLabel ?country (concat(str(?count), ' folders') as ?folderCount) ?pm20Category ?geoLocation ?layer
where {
  # get countries, folder counts and one coordinate value
  { 
    select ?country (count(?item) as ?count) (sample(?coords) as ?geoLocation)
    where {
      ?item wdt:P31 wd:Q91257459 ;
            wdt:P1269 ?country .
      
      # set default coordinates for items like "world"
      bind("Point(-25 0)"^^geo:wktLiteral as ?defaultCoords)
      optional {
        ?country wdt:P625 ?geoCoords .
      }
      bind(coalesce(?geoCoords, ?defaultCoords) as ?coords)
    }
    group by ?country
  }
  
  # construct URI for country page access
  ?country p:P8483 ?statement .
  ?statement ps:P8483 ?geoCode ;
             pq:P4390 wd:Q39893449 .
  bind(uri(concat('https://pm20.zbw.eu/category/geo/s/', ?geoCode)) as ?pm20Category)
  
  # create map layer
  bind(if(?count < 3, "1 - 2 folders",
          if(?count < 51, "3 - 50 folders", "more than 50 folders")) as ?layer)
                
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
order by ?layer

Link to result from slightly shortened query: https://w.wiki/v9Q (see phab:T220703)

Subject folders for Japan

edit

The following query uses these:

Alle Länder/Sach-Mappen (in German)

edit

The following query uses these:

Folder and document count per top category

edit

Please keep in mind that only part of the countries of this archive are accessibe online - therefore folder counts and document counts are incomplete.

The following query uses these:

  • Properties: instance of (P31)     , PM20 subject code (P8484)     , main subject (P921)     , PM20 folder ID (P4293)     , number of works accessible online (P5592)     
    select ?top ?topCode ?topLabel ?folderCount ?docCount
    where {
      {
        select ?topCode (count(*) as ?folderCount) (sum(?folderDocCount) as ?docCount)
        where {
          ?item wdt:P31 wd:Q91257459 .
          ?item p:P921 ?statement .
          ?statement pq:P8484 ?subjectCode .
          bind(substr(?subjectCode, 1, 1) as ?topCode)
          optional {
            ?item p:P4293 ?statement2 .
            ?statement2 pq:P5592 ?folderDocCount .
            filter(isLiteral(?folderDocCount))
          }
        }
        group by ?topCode
      }
      ?top wdt:P31 wd:Q92707903 ;
           wdt:P8484 ?topCode .
      SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE], en". }
    }
    order by desc(?folderCount)
    

Folder and document count per geographicl category

edit

The following query uses these:

Folder count per country distribution

edit

The following query uses these:

  • Properties: instance of (P31)     , facet of (P1269)     
    select distinct  ?bucket (count(?country) as ?countryCount)
    where {
      { 
        select ?country (count(?item) as ?folderCount)
        where {
          ?item wdt:P31 wd:Q91257459 ;
                wdt:P1269 ?country .
        }
        group by ?country
      }
      bind(if(?folderCount < 3, "01 - 2", 
              if(?folderCount < 11, "03 - 10", 
                 if(?folderCount < 51, "11 - 50",
                   if(?folderCount < 101, "51 - 100", "more than 100")))) as ?bucket)
    }
    group by ?bucket
    order by ?bucket
    

"Real-world" items linking to PM20 subject folders

edit

The following query uses these:

  • Properties: PM20 folder ID (P4293)     , instance of (P31)     
    select distinct ?item ?itemLabel ?pm20Id
    where {
      ?item wdt:P4293 ?pm20Id .
      filter(strstarts(?pm20Id, 'sh/'))
      filter( not exists {
        ?item wdt:P31 wd:Q91257459 .
      })
      SERVICE wikibase:label { bd:serviceParam wikibase:language "AUTO_LANGUAGE,en,de". }
    }
    

Queries for companies/organizations

edit

Map of PM20 companies by headquarters location

edit

per Wikidata query (uses country, if headquarters location is unknown; incudes other organizations)

#defaultView:Map{"hide":"?geo"}
select ?image ?item ?itemLabel ?locationLabel ?viewer ?geo
where {
  hint:Query hint:optimizer "None" .
  #
  # company items (directly linked)
  ?statement ps:P4293 ?pm20Id .
  filter(strstarts(?pm20Id, 'co/'))
  ?item p:P4293 ?statement .
  filter (not exists {
      ?statement pq:P4390 [] .
    })
  # with country
  ?item wdt:P17 ?country .
  optional {
    ?item wdt:P159 ?headquarters .
  }
  bind(coalesce(?headquarters, ?country) as ?location)
  #
  # restrict to items with online accessible articles
  ?item p:P4293/pq:P5592 ?workCount .
  filter(?workCount > 0)
  # geo coordinates for location
  ?location wdt:P625 ?geo .
  # optional image
  optional { ?item wdt:P18 ?image . }
  # viewer link
  bind(uri(concat('https://pm20.zbw.eu/dfgview/', ?pm20Id)) as ?viewer)
  # add labels
  service wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en, de, fr, es, nl, pl, ru". }
}
Try it!

Company folders by NACE-Code

edit

List is incomplete - not all relevant company items have NACE code!

Query in ZBW Labs interface - expand "cnt" column for actual list of companies

Folder count per country

edit

The following query uses these:

  • Properties: country (P17)     , PM20 folder ID (P4293)     , mapping relation type (P4390)     
    select (count(?item) as ?count) ?country ?countryLabel
    where {
      # company items (directly linked)
      ?statement ps:P4293 ?pm20Id .
      filter(strstarts(?pm20Id, 'co/'))
      ?item p:P4293 ?statement .
      filter (not exists {
          ?statement pq:P4390 [] .
        })
      #
      ?item wdt:P17 ?country .
      service wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en,de,fr". }
    }
    group by ?country ?countryLabel
    order by desc(?count)
    

Statistics: total count of items linked to PM20 company folders

edit

The following query uses these:

  • Properties: PM20 folder ID (P4293)     , mapping relation type (P4390)     , number of works accessible online (P5592)     
    select (count(distinct ?wd) as ?total)
    (?total - sum(?new) as ?preExistingItems)
    (sum(?new) as ?itemsCreated)
    (concat(str(round(sum(?new) / count(distinct ?id)*1000)/10), ' %') as ?percentageCreated)
    where {
      # items with link to pm20 company folder
      ?statement ps:P4293 ?id .
      filter(strstarts(?id, 'co/'))
      ?wd p:P4293 ?statement .
    
      # don't take into account related matches (mapping relation type)
      optional {
          ?statement pq:P4390 ?relType .
      }
      filter(!bound(?relType) || ?relType!=wd:Q39894604)
    
      # count only folders with documents
      ?statement pq:P5592 ?docCount .
      
      # check if the item was created after start of the PM20 companies' linking phase
      # at 2021-04-26, first new item Q106647030
      bind(xsd:integer(strafter(str(?wd), concat(str(wd:), 'Q'))) as ?qidNum)
      bind(if(?qidNum >= 106647030, 1, 0) as ?new)
    }
    

Wikidata property values sourced from PM20 company folders

edit

Result ordered by German labels

The following query uses these:

  • Properties: PM20 folder ID (P4293)     
    select ?property ?pid ?propertyLabel ?cntPreExistingItems ?cntNewItems ?cntTotal
    where {
      {  
        select (sum(?old) as ?cntPreExistingItems)
        ((count(?item) - ?cntPreExistingItems) as ?cntNewItems) (count(?item) as ?cntTotal) ?relation 
        where {
    
          ?item ?relation [rdf:type wikibase:BestRank; prov:wasDerivedFrom/pr:P4293 ?ref_value ] .
          
          # restrict to companies archive
          filter(strstarts(?ref_value, 'co/'))
          
          # check if the item was created before start of the PM20 companies' linking phase
          # at 2021-04-26, first new item Q106647030
          bind(xsd:integer(strafter(str(?item), concat(str(wd:), 'Q'))) as ?qidNum)
          bind(if(?qidNum < 106647030, 1, 0) as ?old)
        }
        group by ?relation
        having (?cntTotal > 1)
      }
      
      ?property wikibase:claim ?relation .
      
      bind(strafter(str(?property), 'http://www.wikidata.org/entity/') as ?pid)
      service wikibase:label { bd:serviceParam wikibase:language "en" }
    }
    order by desc(?cntTotal)
    

List of all PM20 companies (folders and microfilm sections)

edit

The following query uses these:

  • Properties: PM20 folder ID (P4293)     , PM20 film section ID (P11822)     , inception (P571)     , dissolved, abolished or demolished date (P576)     , headquarters location (P159)     
    select ?wd ?wdLabel ?inFunction (group_concat(?hqLabel; separator='; ') as ?headquarter) ?pm20 ?hint
    where {
      select ?wd ?wdLabel ?inFunction ?hqLabel ?pm20 ?hint
      where {
        {
          ?wd wdt:P4293 ?pm20Id .
          filter(isLiteral(?pm20Id))
          filter(strstarts(?pm20Id, 'co'))
        } union {
          ?wd wdt:P11822 ?sectionId .
          filter(isLiteral(?sectionId))
          filter(contains(?sectionId, '/co/'))
        }
        optional {
          ?wd wdt:P571 ?start .
        }
        optional {
          ?wd wdt:P576 ?end .
        }
        optional {
          ?wd wdt:P159 ?hq .
        }
        bind(concat(coalesce(str(year(?start)), ''), coalesce(concat(' - ', str(year(?end))), '')) as ?inFunction)
        #
        bind(if(bound(?pm20Id), uri(concat('https://pm20.zbw.eu/folder/', ?pm20Id)), uri(concat('https://pm20.zbw.eu/film/', ?sectionId))) as ?pm20)
        bind(if(bound(?pm20Id), 'PM20 folder', 'PM20 microfilm (on premises only)') as ?hint)
        #
        service wikibase:label { bd:serviceParam wikibase:language "en,de,fr,es,it,nl,pt,pl,cs,sv" . }
      }
    }
    group by ?wd ?wdLabel ?inFunction ?pm20 ?hint
    order by ?wdLabel
    # query times out without limit
    # as of 2023-07, there are 12755 results
    limit 25000
    

Queries on comodities and wares

edit
edit

The following query uses these:

Features: ImageGrid (Q24515278)     

#defaultView:ImageGrid{"hide":["?img"]}
select ?ware ?wareLabel (max(?image) as ?img) (concat('in ', str(count(?country)), ' Ländern') as ?countries) ?pm20link
where {
  {
  select ?ware ?image ?wareId
  where {
    ?ware wdt:P10890 ?wareId ;
          wdt:P18 ?image .
   }
  }
  ?folder wdt:P921 ?ware ;
          wdt:P1269 ?country .
   
   bind(uri(concat('https://pm20.zbw.eu/category/ware/i/', ?wareId)) as ?pm20link)

   SERVICE wikibase:label { bd:serviceParam wikibase:language "de" . }
}
group by ?ware ?wareLabel ?pm20link
order by ?wareLabel

Fulltext search for any type of PM20 folder

edit

Based on example query by Andrawaag and Infragstruktur ("Rewritten to prevent timeout - uses the wikibase cirrussearch extension to ensure we only get relevant matches")

# PM20 folder with certain string(s) in the item label, alias or description

select ?item ?itemLabel ?pm20
with {
  select * where {
    bind ("brücken hamburg" as ?searchfor)
  }
} as %p
with {
  select ?item
  where {
    include %p
    bind (concat("haswbstatement:P4293 ", ?searchfor) as ?searchstr)
    SERVICE wikibase:mwapi {
      bd:serviceParam wikibase:endpoint "www.wikidata.org" .
      bd:serviceParam wikibase:api "Generator" .
      bd:serviceParam mwapi:generator "search" .
      bd:serviceParam mwapi:gsrsearch ?searchstr .
      bd:serviceParam mwapi:gsrlimit "max" .
      bd:serviceParam mwapi:gsrnamespace "0" .
	  bd:serviceParam mwapi:gsrprop "" .
      ?item wikibase:apiOutputItem mwapi:title .
    }
  }
} as %i
where {
  include %i
  include %p
  ?item rdfs:label ?itemLabel .
  filter (lang(?itemLabel)="en")
  ?item wdt:P4293 ?pm20Id.
  bind(uri(concat('https://pm20.zbw.eu/folder/', ?pm20Id)) as ?pm20)
}
Try it!

Descripton mw:MWAPI, mw:Help:CirrusSearch, mw:Help:Extension:WikibaseCirrusSearch