Source code for tests.testSPARQL

'''
Created on 2020-08-14

@author: wf
'''
import unittest
import copy
from tests.basetest import Basetest
from lodstorage.sparql import SPARQL
from lodstorage.sample import Sample
from lodstorage.lod import LOD
from lodstorage.query import Query
import time
import datetime
import warnings

[docs]class TestSPARQL(Basetest): ''' Test SPARQL access e.g. Apache Jena via Wrapper'''
[docs] def getJena(self,mode='query',debug=False,typedLiterals=False,profile=False): ''' get the jena endpoint for the given mode Args: mode(string): query or update debug(boolean): True if debug information should be output typedLiterals(boolean): True if INSERT DATA SPARQL commands should use typed literals profile(boolean): True if profile/timing information should be shown ''' endpoint="http://localhost:3030/example" jena=SPARQL(endpoint,mode=mode,debug=debug,typedLiterals=typedLiterals,profile=profile) return jena
[docs] def testJenaQuery(self): ''' test Apache Jena Fuseki SPARQL endpoint with example SELECT query ''' jena=self.getJena() queryString = "SELECT * WHERE { ?s ?p ?o. }" results=jena.query(queryString) self.assertTrue(len(results)>20) pass
[docs] def testJenaInsert(self): ''' test a Jena INSERT DATA ''' jena=self.getJena(mode="update") insertCommands = [ """ PREFIX cr: <http://cr.bitplan.com/> INSERT DATA { cr:version cr:author "Wolfgang Fahl". } """,'INVALID COMMAND'] for index,insertCommand in enumerate(insertCommands): if index!=0: warnings.simplefilter("ignore") result,ex=jena.insert(insertCommand) if index==0: if ex: print(f"Exception: {ex}") self.assertTrue(ex is None) if self.debug: print(result) else: msg=ex.args[0] if self.debug: print(msg) self.assertTrue("QueryBadFormed" in msg) #self.assertTrue("Error 400" in msg) pass
[docs] def checkErrors(self,errors,expected=0): ''' check the given list of errors - print any errors if there are some and after that assert that the length of the list of errors is zero Args: errors(list): the list of errors to check ''' if self.debug: if len(errors)>0: print("ERRORS:") for error in errors: print(error) self.assertEqual(expected,len(errors))
[docs] def testDob(self): ''' test the DOB (date of birth) function that converts from ISO-Date to datetime.date ''' dt=Sample.dob("1926-04-21") self.assertEqual(1926,dt.year) self.assertEqual(4,dt.month) self.assertEqual(21,dt.day)
[docs] def testListOfDictInsert(self): ''' test inserting a list of Dicts and retrieving the values again using a person based example instead of https://en.wikipedia.org/wiki/FOAF_(ontology) we use an object oriented derivate of FOAF with a focus on datatypes ''' listofDicts=Sample.getRoyals() typedLiteralModes=[True,False] entityType='foafo:Person' primaryKey='name' prefixes='PREFIX foafo: <http://foafo.bitplan.com/foafo/0.1/>' for typedLiteralMode in typedLiteralModes: jena=self.getJena(mode='update',typedLiterals=typedLiteralMode,debug=self.debug) deleteString= """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX foafo: <http://foafo.bitplan.com/foafo/0.1/> DELETE WHERE { ?person a 'foafo:Person'. ?person ?p ?o. } """ jena.query(deleteString) errors=jena.insertListOfDicts(listofDicts,entityType,primaryKey,prefixes) self.checkErrors(errors) jena=self.getJena(mode="query",debug=self.debug) queryString = """ PREFIX foafo: <http://foafo.bitplan.com/foafo/0.1/> SELECT ?name ?born ?numberInLine ?wikidataurl ?age ?ofAge ?lastmodified WHERE { ?person a 'foafo:Person'. ?person foafo:Person_name ?name. ?person foafo:Person_born ?born. ?person foafo:Person_numberInLine ?numberInLine. ?person foafo:Person_wikidataurl ?wikidataurl. ?person foafo:Person_age ?age. ?person foafo:Person_ofAge ?ofAge. ?person foafo:Person_lastmodified ?lastmodified. }""" personResults=jena.query(queryString) self.assertEqual(len(listofDicts),len(personResults)) personList=jena.asListOfDicts(personResults) for index,person in enumerate(personList): if self.debug: print("%d: %s" %(index,person)) # check the correct round-trip behavior self.assertEqual(listofDicts,personList)
[docs] def testControlEscape(self): ''' check the control-escaped version of an UTF-8 string ''' controls=\tΩ\r\n"; expected=\\\\r\\n" esc=SPARQL.controlEscape(controls) self.assertEqual(expected,esc)
[docs] def testSPARQLErrorMessage(self): ''' test error handling see https://stackoverflow.com/questions/63486767/how-can-i-get-the-fuseki-api-via-sparqlwrapper-to-properly-report-a-detailed-err ''' listOfDicts=[{ 'title': '“Bioinformatics of Genome Regulation and Structure\Systems Biology” – BGRS\SB-2018', 'url': 'https://thenode.biologists.com/event/11th-international-multiconference-bioinformatics-genome-regulation-structuresystems-biology-bgrssb-2018/'}] entityType="cr:Event" primaryKey='title' prefixes="PREFIX cr: <http://cr.bitplan.com/Event/0.1/>" jena=self.getJena(mode='update',typedLiterals=False,debug=self.debug) errors=jena.insertListOfDicts(listOfDicts,entityType,primaryKey,prefixes) self.checkErrors(errors,1) error=errors[0] print(f"error is {error}") self.assertTrue("probably the sparql query is bad formed" in error)
[docs] def testEscapeStringContent(self): ''' test handling of double quoted strings ''' helpListOfDicts=[{'topic':'edit','description': '''Use the "edit" button to start editing - you can use - tab \t - carriage return \r - newline \n as escape characters ''' }] entityType='help:Topic' primaryKey='topic' prefixes='PREFIX help: <http://help.bitplan.com/help/0.0.1/>' jena=self.getJena(mode='update',debug=self.debug) errors=jena.insertListOfDicts(helpListOfDicts, entityType, primaryKey, prefixes, profile=self.profile) self.checkErrors(errors) query=""" PREFIX help: <http://help.bitplan.com/help/0.0.1/> SELECT ?topic ?description WHERE { ?help help:Topic_topic ?topic. ?help help:Topic_description ?description. } """ jena=self.getJena(mode='query') listOfDicts=jena.queryAsListOfDicts(query) # check round trip equality self.assertEqual(helpListOfDicts,listOfDicts)
[docs] def testIssue7(self): ''' test conversion of dates with timezone info ''' values=["2020-01-01T00:00:00Z","42000-01-01T00:00:00Z"] expected=[datetime.datetime(2020,1,1,0,0),None] for index,value in enumerate(values): dt=SPARQL.strToDatetime(value,debug=self.debug) self.assertEqual(expected[index],dt)
[docs] def testListOfDictSpeed(self): ''' test the speed of adding data ''' limit=5000 for batchSize in [None,1000]: listOfDicts=Sample.getSample(limit) jena=self.getJena(mode='update',profile=self.profile) entityType="ex:TestRecord" primaryKey='pkey' prefixes='PREFIX ex: <http://example.com/>' startTime=time.time() errors=jena.insertListOfDicts(listOfDicts, entityType, primaryKey, prefixes,batchSize=batchSize) self.checkErrors(errors) elapsed=time.time()-startTime if self.profile: print ("adding %d records took %5.3f s => %5.f records/s" % (limit,elapsed,limit/elapsed))
[docs] def testWikdata(self): ''' check wikidata ''' # check we have local wikidata copy: #if getpass.getuser()=="wf": # # use 2018 wikidata copy # endpoint="http://jena.zeus.bitplan.com/wikidata/" endpoint="https://query.wikidata.org/sparql" wd=SPARQL(endpoint) queryString="""# get a list of whisky distilleries PREFIX wd: <http://www.wikidata.org/entity/> PREFIX wdt: <http://www.wikidata.org/prop/direct/> SELECT ?item ?coord WHERE { # instance of whisky distillery ?item wdt:P31 wd:Q10373548. # get the coordinate ?item wdt:P625 ?coord. } """ results=wd.query(queryString) self.assertTrue(238<=len(results))
[docs] def testIssue20And76(self): ''' see https://github.com/WolfgangFahl/pyLoDStorage/issues/20 add fixNone option to SPARQL results (same functionality as in SQL) https://github.com/WolfgangFahl/pyLoDStorage/issues/76 SPARQL GET method support ''' endpoint="https://query.wikidata.org/sparql" for method in [ "POST","GET" ]: wd=SPARQL(endpoint,method=method) queryString=""" # Conference Series wikidata query # see https://confident.dbis.rwth-aachen.de/dblpconf/wikidata # WF 2021-01-30 SELECT ?confSeries ?short_name ?official_website WHERE { # scientific conference series (Q47258130) ?confSeries wdt:P31 wd:Q47258130. OPTIONAL { ?confSeries wdt:P1813 ?short_name . } # official website (P856) OPTIONAL { ?confSeries wdt:P856 ?official_website } } LIMIT 200 """ lod=wd.queryAsListOfDicts(queryString,fixNone=True) fields=LOD.getFields(lod) if self.debug: print(fields) for row in lod: for field in fields: self.assertTrue(field in row)
[docs] def testStackoverflow55961615Query(self): ''' see https://stackoverflow.com/questions/55961615/how-to-integrate-wikidata-query-in-python https://stackoverflow.com/a/69771615/1497139 ''' qlod=None try: endpoint="https://query.wikidata.org/sparql" wd=SPARQL(endpoint) queryString="""SELECT ?s ?sLabel ?item ?itemLabel ?sourceCode ?webSite ?stackexchangeTag { SERVICE wikibase:mwapi { bd:serviceParam wikibase:api "EntitySearch". bd:serviceParam wikibase:endpoint "www.wikidata.org". bd:serviceParam mwapi:search "natural language processing". bd:serviceParam mwapi:language "en". ?item wikibase:apiOutputItem mwapi:item. ?num wikibase:apiOrdinal true. } ?s wdt:P279|wdt:P31 ?item . OPTIONAL { ?s wdt:P1324 ?sourceCode. } OPTIONAL { ?s wdt:P856 ?webSite. } OPTIONAL { ?s wdt:P1482 ?stackexchangeTag. } SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" } } ORDER BY ?itemLabel ?sLabel""" qlod=wd.queryAsListOfDicts(queryString,fixNone=True) except Exception as ex: print(f"{endpoint} access failed with {ex}- could not run test") if qlod is not None: query=Query(name="EntitySearch",query=queryString,lang='sparql') debug=self.debug for tablefmt in ["github","mediawiki","latex"]: qdoc=query.documentQueryResult(qlod,tablefmt=tablefmt) if debug: print (qdoc)
[docs] def testStackoverflow71444069(self): ''' https://stackoverflow.com/questions/71444069/create-csv-from-result-of-a-for-google-colab/71548650#71548650 ''' from lodstorage.sparql import SPARQL from lodstorage.csv import CSV sparqlQuery="""SELECT ?org ?orgLabel WHERE { ?org wdt:P31 wd:Q4830453. #instance of organizations ?org wdt:P17 wd:Q96. #Mexico country SERVICE wikibase:label { bd:serviceParam wikibase:language "en"} }""" sparql=SPARQL("https://query.wikidata.org/sparql") qlod=sparql.queryAsListOfDicts(sparqlQuery) csv=CSV.toCSV(qlod) if self.debug: print(csv)
if __name__ == "__main__": #import sys;sys.argv = ['', 'Test.testName'] unittest.main()