Querying: Difference between revisions

12,002 bytes added ,  3 months ago
m
 
(39 intermediate revisions by the same user not shown)
Line 1: Line 1:
Run SPARQL queries via the [https://ottgaz.org/query/ Ottgaz Query Service].
[https://en.wikipedia.org/wiki/SPARQL SPARQL] is a powerful query language, but it is also quite complex. New users may find it easiest to use some of the sample queries below and adapt them to their needs.  
 
'''Remember:''' SPARQL is currently case sensitive, as elastic search is disabled in wikibase.cloud.


== Namespace ==
== Namespace ==
Run SPARQL queries via the [https://ottgaz.org/query/ Ottgaz Query Service].


Declare these namespaces at the start of every query:
Declare these namespaces at the start of every query:
Line 18: Line 17:


=== Label ===
=== Label ===
All items that are vilayets, as well as their English language label. [https://tinyurl.com/2pren5e2 run]
All regions that had vilayet status, as well as their English language label. [https://tinyurl.com/2k4uq6u2 run]


  SELECT ?vilayet ?vilayetLabel
  SELECT ?vilayet ?vilayetLabel
  WHERE  
  WHERE  
  {
  {
   ?vilayet ogt:P6 og:Q5 .  
   ?vilayet ogt:P15 og:Q5.
   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
}
=== Duration, start and end dates ===
Duration between start and end date of every region with a start date, as well as region type. [https://tinyurl.com/2k8h75bz run]
SELECT ?regionLabel ?statusLabel (YEAR(?starttime) as ?start) (YEAR(?endtime) as ?end) ?duration
WHERE
{
  ?region ogt:P6 og:Q1.
  ?region ogp:P15 ?statement.
  ?statement ogps:P15 ?status.
  ?statement ogpq:P7 ?starttime.
  OPTIONAL{?statement ogpq:P8 ?endtime.}
  OPTIONAL{?statement ogpq:P22 ?endtime.}
  BIND(year(?endtime)-year(?starttime) as ?duration )
   
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
}
ORDER BY desc(?duration)
=== Number of kazas created each year ===
In each year, how many kazas were inaugurated? [https://tinyurl.com/2z9gjrch run]
SELECT ?year (COUNT (DISTINCT ?kaza) AS ?count)
WHERE
{
  ?kaza ogt:P15 og:Q3.
  ?kaza ogp:P15 ?statement.
  ?statement ogps:P15 og:Q3.
  ?statement ogpq:P7 ?starttime.
  bind(year(?starttime) as ?year)
}
GROUP BY ?year
ORDER BY desc(?count)
===Count status types in a given year===
For a given year, how many regions of each status are found in the dataset? [https://tinyurl.com/2mqqhl8u run]
SELECT (COUNT (DISTINCT ?region) AS ?count) ?statusLabel
WHERE
{
  ?region ogp:P15 ?statement.
  ?statement ogps:P15 ?status.
  ?statement ogpq:P7 ?starttime.
  OPTIONAL{?statement ogpq:P8 ?endtime.}
  OPTIONAL{?statement ogpq:P22 ?endtime.}
  #enter same year twice
  FILTER(YEAR(?starttime) <= 1600 && YEAR(?endtime) > 1600)
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
}
GROUP BY ?statusLabel
ORDER BY desc(?count)
===Number of kazas between 1800 and 1900===
*This query is not working yet.* The query below returns the number of vilayets between 1750 and 1760. Thanks to [https://stackoverflow.com/users/7879193/stanislav-kralin Stanislav Kralin] for [https://stackoverflow.com/questions/75137894/counting-items-over-time-using-sparql help]. [ run]
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT ?number (count(*) as ?count)  {
  SERVICE <https://query.wikidata.org/sparql> {
?year wdt:P31 wd:Q21199 ; wdt:P1181 ?number
FILTER (?number >= 1750  && ?number <= 1760)
}
?vilayet ogt:P15 og:Q5.
?vilayet ogp:P15 ?statement.
?statement ogps:P15 og:Q5.
?statement ogpq:P7 ?starttime.
OPTIONAL{?statement ogpq:P8 ?endtime.}
OPTIONAL{?statement ogpq:P22 ?endtime.}
BIND (YEAR(?starttime) AS ?year1)
BIND (YEAR(COALESCE(?endtime, NOW())) AS ?year2)
FILTER (?number >= ?year1 && ?number <= ?year2)
}
group by ?number
order by ?number
===Sancak status ending between 1830 and 1840===
This query will return the region more than once, if it undergoes status change more than once during this time period. [https://tinyurl.com/2ew6y2vw run]
SELECT ?sancak ?sancakLabel (YEAR(?starttime) as ?start) (YEAR(?endtime) as ?end)
WHERE
{
  ?sancak ogt:P6 og:Q1.
  ?sancak ogp:P15 ?statement.
  ?statement ogps:P15 og:Q4.
  ?statement ogpq:P7 ?starttime.
  OPTIONAL{?statement ogpq:P8 ?endtime.}
  OPTIONAL{?statement ogpq:P22 ?endtime.}
  FILTER(YEAR(?endtime) >= 1830 && YEAR(?endtime) <= 1840) 
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
}
ORDER BY (?endtime)
=== Map of sanjak seats ===
This shows only those sanjaks with seats designated and geolocated. [https://tinyurl.com/2hqpwoas run]
#defaultView:Map
SELECT ?sancakLabel ?seatLabel ?geo
WHERE
{
  ?sancak ogt:P15 og:Q4.
  ?sancak ogt:P14 ?seat.
  OPTIONAL {?seat ogt:P10 ?geo.}
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
}
=== Status count ===
Which regions have the most status changes entered in Ottgaz? [https://tinyurl.com/2a2akktv run]
SELECT (COUNT (DISTINCT ?statement) AS ?statementCount) (COUNT (DISTINCT ?status) AS ?statusCount) ?regionLabel
WHERE
{
  ?region ogp:P15 ?statement.
  ?statement ogps:P15 ?status.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
}
GROUP BY ?regionLabel
ORDER BY desc(?statementCount)
LIMIT 100
=== Sancak seats by vilayet and year ===
This shows a tree map with the Ottoman language names of sancak seats within each vilayet in the year 1700. Seat names direct to items. Change the year in line 23. [https://tinyurl.com/2bu4bomk run]
#defaultView:TreeMap
SELECT  ?vilayetLabel ?sancakSeat_ota ?sancakSeat
WHERE
{
  ?sancak ogp:P15 ?statement.
  ?statement ogps:P15 og:Q4.
  ?statement ogpq:P7 ?starttime.
  ?statement ogpq:P9 ?vilayet.
  ?sancak ogt:P14 ?sancakSeat.
  OPTIONAL{?statement ogpq:P8 ?endtime.}
  OPTIONAL{?statement ogpq:P22 ?endtime.}
  ?sancakSeat rdfs:label ?sancakSeat_ota filter (lang(?sancakSeat_ota) = "ota")
  #enter same year twice
  FILTER(YEAR(?starttime) <= 1700 && YEAR(?endtime) > 1700)
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
}
=== Map of nahiye seats, colored by vilayet, in 1900 ===
This is a variation on the sancak seat map on the main page. [http://tinyurl.com/2ae3rfxb run]
SELECT ?vilayet3Label ?nahiyeSeat_ota ?nahiyeSeat ?nahiyeCoord ?nahiyeSeatLabel
WHERE
{   
  # nahiye
  ?nahiye ogp:P15 ?statement4.
  ?statement4 ogps:P15 og:Q2.
  ?statement4 ogpq:P7 ?starttime4.
  ?statement4 ogpq:P34 ?kaza2.
  ?nahiye ogt:P14 ?nahiyeSeat.
  ?nahiyeSeat ogt:P10 ?nahiyeCoord.
  OPTIONAL{?statement4 ogpq:P8 ?endtime4.}
  OPTIONAL{?statement4 ogpq:P22 ?endtime4.}
           
  # find sanckas for kazas
  ?kaza2 ogp:P15 ?statement5.
  ?statement5 ogps:P15 og:Q3.
  ?statement5 ogpq:P7 ?starttime5.
  ?statement5 ogpq:P34 ?sancak3.
  OPTIONAL{?statement5 ogpq:P8 ?endtime5.}
  OPTIONAL{?statement5 ogpq:P22 ?endtime5.}
             
  # find vilayets for kazas
  ?sancak3 ogp:P15 ?statement6.
  ?statement6 ogps:P15 og:Q4.
  ?statement6 ogpq:P7 ?starttime6.
  ?statement6 ogpq:P34 ?vilayet3.
  OPTIONAL{?statement6 ogpq:P8 ?endtime6.}
  OPTIONAL{?statement6 ogpq:P22 ?endtime6.}
 
  # get ottoman label
  ?nahiyeSeat rdfs:label ?nahiyeSeat_ota filter (lang(?nahiyeSeat_ota) = "ota")
           
  #enter same year twice
  FILTER(YEAR(?starttime4) <= 1900 && YEAR(?endtime4) > 1900)
  FILTER(YEAR(?starttime5) <= 1900 && YEAR(?endtime5) > 1900)
  FILTER(YEAR(?starttime6) <= 1900 && YEAR(?endtime6) > 1900)
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
  }
  }


== Federated query with Wikidata ==
== Federated query with Wikidata ==


For every sanjak in Ottgaz, this query fetches the vilayet or eyalet to which Wikidata says it belongs. Thanks to Lucas Werkmeister for help. [https://tinyurl.com/2ehssccv run]
=== Wikidata version of vilayet from Ottgaz sancaks ===
 
For every sanjak in Ottgaz, this query fetches the vilayet or eyalet to which Wikidata says it belongs. Thanks to Lucas Werkmeister for help. [https://tinyurl.com/2eokop65 run]


  PREFIX og: <https://ottgaz.org/entity/>
  PREFIX og: <https://ottgaz.org/entity/>
Line 38: Line 220:
  SELECT ?sancak ?sancakLabel ?vilayet ?vilayetLabel
  SELECT ?sancak ?sancakLabel ?vilayet ?vilayetLabel
  WHERE {
  WHERE {
   ?sancak ogt:P6 og:Q4 . # item must be a sanjak
   ?sancak ogt:P15 og:Q4 . # item must be a sanjak
   ?sancak ogt:P4 ?c . # finds reference URL to wikidata
   ?sancak ogt:P4 ?c . # finds reference URL to wikidata
   BIND(IRI(REPLACE(REPLACE(REPLACE(STR(?c), "Property:", ""), "/wiki/", "/entity/"), "https://", "http://")) AS ?wd_c) # produces proper URL for query form
   BIND(IRI(REPLACE(REPLACE(REPLACE(STR(?c), "Property:", ""), "/wiki/", "/entity/"), "https://", "http://")) AS ?wd_c) # produces proper URL for query form
Line 53: Line 235:
   }
   }
  }
  }
=== Fetch missing geocoordinates from Wikidata ===
This query for some reason won't supply a shortlink. Model for fetching information from Wikidata. Thanks for help from Wikibase telegram group users Tuukka Hastrup and Nikki. Note the crucial and mysterious `hint:Query hint:optimizer "None" .` [https://ottgaz.org/query/#%23Seat%20with%20Wikidata%20Qids%20but%20without%20geolocation%0APREFIX%20og%3A%20%3Chttps%3A%2F%2Fottgaz.org%2Fentity%2F%3E%0APREFIX%20ogs%3A%20%3Chttps%3A%2F%2Fottgaz.org%2Fentity%2Fstatement%2F%3E%0APREFIX%20ogv%3A%20%3Chttps%3A%2F%2Fottgaz.org%2Fvalue%2F%3E%0APREFIX%20ogt%3A%20%3Chttps%3A%2F%2Fottgaz.org%2Fprop%2Fdirect%2F%3E%0APREFIX%20ogp%3A%20%3Chttps%3A%2F%2Fottgaz.org%2Fprop%2F%3E%0APREFIX%20ogps%3A%20%3Chttps%3A%2F%2Fottgaz.org%2Fprop%2Fstatement%2F%3E%0APREFIX%20ogpq%3A%20%3Chttps%3A%2F%2Fottgaz.org%2Fprop%2Fqualifier%2F%3E%0APREFIX%20wd%3A%20%3Chttp%3A%2F%2Fwww.wikidata.org%2Fentity%2F%3E%0APREFIX%20wdt%3A%20%3Chttp%3A%2F%2Fwww.wikidata.org%2Fprop%2Fdirect%2F%3E%0ASELECT%20%20%3Fseat%20%3FseatLabel%20%3FwdURL%20%3Fcoords%0AWHERE%20%0A%7B%0A%20VALUES%20%3Fcenter%20%7Bog%3AQ287%20og%3AQ497%20og%3AQ6421%7D%0A%20%3Fseat%20ogt%3AP6%20%3Fcenter.%0A%20%3Fseat%20ogt%3AP3%20%3FwdQid.%0A%20MINUS%20%7B%3Fseat%20ogt%3AP10%20%3Fgeo.%7D%0A%20BIND%20%28URI%28CONCAT%28%22http%3A%2F%2Fwww.wikidata.org%2Fentity%2F%22%2C%0A%20ENCODE_FOR_URI%28%3FwdQid%29%29%29%20as%20%3FwdURL%29.%0A%20hint%3AQuery%20hint%3Aoptimizer%20%22None%22%20.%0A%20SERVICE%20%3Chttps%3A%2F%2Fquery.wikidata.org%2Fsparql%3E%20%7B%0A%20%20%20SELECT%20%3FwdURL%20%3Fcoords%20%7B%0A%20%20%20%3FwdURL%20wdt%3AP625%20%3Fcoords%20.%0A%20%20%20%7D%0A%20%7D%0A%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22%5BAUTO_LANGUAGE%5D%2Cen%22%20%7D%0A%7D run]
#Seat with Wikidata Qids but without geolocation
PREFIX og: <https://ottgaz.org/entity/>
PREFIX ogs: <https://ottgaz.org/entity/statement/>
PREFIX ogv: <https://ottgaz.org/value/>
PREFIX ogt: <https://ottgaz.org/prop/direct/>
PREFIX ogp: <https://ottgaz.org/prop/>
PREFIX ogps: <https://ottgaz.org/prop/statement/>
PREFIX ogpq: <https://ottgaz.org/prop/qualifier/>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT  ?seat ?seatLabel ?wdURL ?coords
WHERE
{
  VALUES ?center {og:Q287 og:Q497 og:Q6421}
  ?seat ogt:P6 ?center.
  ?seat ogt:P3 ?wdQid.
  MINUS {?seat ogt:P10 ?geo.}
  BIND (URI(CONCAT("http://www.wikidata.org/entity/",
  ENCODE_FOR_URI(?wdQid))) as ?wdURL).
  hint:Query hint:optimizer "None" .
  SERVICE <https://query.wikidata.org/sparql> {
    SELECT ?wdURL ?coords {
    ?wdURL wdt:P625 ?coords .
    }
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
}
== Help ==
After trying the samples, if you need more help formulating queries, try:
* working through the [https://www.wikidata.org/wiki/Wikidata:SPARQL_tutorial Wikidata SPARQL tutorial]
* searching [https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples Wikidata sample queries] and adapting them to Ottgaz
* asking for help on the [[Talk:Querying|discussion page]]
* asking for help on [https://stackoverflow.com/questions/tagged/sparql Stack Overflow] or the [https://t.me/+WBsf9-C9KPuMZCDT wikibase Telegram group]
== Cleanup queries ==
These queries are useful to find information gaps to be filled when tidying the dataset.
=== Missing labels ===
Returns labels in English, Ottoman, and Arabic. Sort results to find blanks. [https://tinyurl.com/2at3o9p6 run]
SELECT ?seat ?seat_en ?seat_ota ?seat_ar
WHERE
{
  ?seat ogt:P6 og:Q6421.
 
  OPTIONAL {?seat rdfs:label ?seat_en filter (lang(?seat_en) = "en")}.
  OPTIONAL {?seat rdfs:label ?seat_ota filter (lang(?seat_ota) = "ota")}. 
  OPTIONAL {?seat rdfs:label ?seat_ar filter (lang(?seat_ar) = "ar")}. 
 
}
=== Seats without geolocation ===
Wikidata ID can be used to supply geolocation. [https://tinyurl.com/273jk9c4 run]
SELECT ?sancak ?sancakLabel ?seat ?seatLabel ?wikidata
WHERE
{
  ?sancak ogt:P15 og:Q4.
  ?sancak ogt:P14 ?seat.
  MINUS {?seat ogt:P10 ?geo.}
  OPTIONAL {?seat ogt:P3 ?wikidata.} 
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
}
=== Seats without regions ===
During the initial setup, certain regions listed seats, but there was no reciprocal listing of region for the seat items. This query includes date qualifiers, where present. [https://tinyurl.com/228d48rc run]
SELECT ?region ?regionLabel ?seat ?seatLabel ?starttime ?endtime ?latestdate
WHERE
{
  ?region ogt:P14 ?seat.
  MINUS {?seat ogt:P11 ?region.}
  OPTIONAL {
    ?region ogp:P14 ?statement.
    ?statement ogps:P14 ?seat.
    ?statement ogpq:P7 ?starttime.
      OPTIONAL { ?statement ogpq:P8 ?endtime.}
      OPTIONAL { ?statement ogpq:P22 ?latestdate.}
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
}
==== Regions without seats ====
Inverse of the above. [https://tinyurl.com/2ab4zer6 run]
=== Regions without status statements ===
Useful for finding errors in bulk uploads [https://tinyurl.com/2y26wh6q run]
SELECT ?region ?regionLabel
WHERE
{
  ?region ogt:P6 og:Q1.
  MINUS {?region ogp:P15 ?statement.}
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
}
=== NSF ID geolocation ===
[https://tinyurl.com/2bhder53 run]