importpandasaspdSPECIES_COLS=["scientific_name","display_name","common_name","taxon_id","assembly","accession","release",]importxmltodictENSEMBL_REST="http://rest.ensembl.org"ENSEMBL_REST_EXT=Namespace(**{"SPECIES_INFO":"/info/species?",# all species info"ARCHIVE_IDS":"/archive/id",# retrieves the latest version of ids"XREFS_ID":"/xrefs/id/","LOOKUP_IDS":"/lookup/id","LOOKUP_SYMBOLS":"/lookup/symbol/","SEQ_IDS":"/sequence/id",})importasyncioimportsysfromtypingimportAnyimporthttpximportnest_asyncioimportrequests# type: ignorefromsyncerimportsyncnest_asyncio.apply()# Fixes the issue with iPython compatibilityasyncdef_async_get(client,url,content_type="application/json"):resp=awaitclient.get(url,headers={"content-type":content_type})ifresp.headers["content-type"].find("application/json")>-1:returnresp.json()else:returnresp.text@syncasyncdefget_request_async(server_request:str,terms,content_type="application/json"):asyncwithhttpx.AsyncClient()asclient:tasks=[]forterminterms:url=f"{server_request}{term}"tasks.append(asyncio.ensure_future(_async_get(client,url,content_type=content_type)))resps=awaitasyncio.gather(*tasks)returnrespsdefget_request(server,request,content_type="application/json",**kwds)->Any:"""Fetch an endpoint from the server. Allow overriding of default content-type """r=requests.get(server+request,headers={"Accept":content_type},**kwds)ifnotr.ok:r.raise_for_status()sys.exit()ifcontent_type=="application/json":returnr.json()else:returnr.textdefpost_request(server,request,data,content_type="application/json",**kwds)->Any:"""POST requests."""r=requests.post(server+request,headers={"Content-Type":content_type},data=data,**kwds)ifnotr.ok:r.raise_for_status()sys.exit()ifcontent_type=="application/json":returnr.json()else:returnr.textclassEnsemblREST:"""Queries via the Ensembl REST APIs."""def__init__(self)->None:self._server=ENSEMBL_REST@propertydefserver(self):"""ENSEMBL_REST."""returnself._serverdef_config_data(self,ids,label):ids=str(ids).replace("'",'"')returnf'{{ "{label}" : {ids}}}'defspecies_info(self,return_raw=False):"""ENSEMBL_REST_EXT.SPECIES_INFO."""ext=ENSEMBL_REST_EXT.SPECIES_INFOres=get_request(self.server,ext,"text/xml")ifreturn_raw:returnreselse:returnxmltodict.parse(res)["opt"]["data"]["species"]defxref(self,ids,**kwargs):"""Retrieve external references of Ensembl ids. See https://rest.ensembl.org/documentation/info/xref_id """ifisinstance(ids,str):ext=f"{ENSEMBL_REST_EXT.XREFS_ID}{ids}?"res=get_request(self.server,ext,**kwargs)else:res=get_request_async(self.server+ENSEMBL_REST_EXT.XREFS_ID,ids)returnresdefarchive_ids(self,ids):"""Retrieve the latest version for a set of identifiers."""ext=ENSEMBL_REST_EXT.ARCHIVE_IDSres=post_request(self.server,ext,data=self._config_data(ids,"id"))returnresdeflookup_ids(self,ids,**kwargs):"""Find the species and database for several identifiers. See https://rest.ensembl.org/documentation/info/lookup_post """ext=ENSEMBL_REST_EXT.LOOKUP_IDSres=post_request(self.server,ext,data=self._config_data(ids,"id"),**kwargs)returnresdeflookup_symbols(self,symbols,species="homo_sapiens",**kwargs):"""Find the species and database for symbols in a linked external database."""ext=f"{ENSEMBL_REST_EXT.LOOKUP_SYMBOLS}{species}"res=post_request(self.server,ext,data=self._config_data(symbols),**kwargs)returnresdefseq_ids(self,ids,**kwargs):"""Request multiple types of sequence by a stable identifier list."""ext=ENSEMBL_REST_EXT.SEQ_IDSres=post_request(self.server,ext,data=self._config_data(ids),**kwargs)returnresdefupdate_species_table()->None:"""Fetch species table from Ensembl REST. Returns: a dataframe """entries=EnsemblREST().species_info()# format into a dataframesp_dict:dict={}cols=["display_name","common_name","taxon_id","assembly","accession","release",]forentryinentries:id=entry.get("@name")sp_dict[id]=[]foriinSPECIES_COLS:ifi!="scientific_name":sp_dict[id].append(entry.get(f"@{i}"))sp_df=pd.DataFrame.from_dict(sp_dict).Tsp_df.columns=colssp_df.index.name="scientific_name"sp_df["display_name"]=sp_df["display_name"].str.lower()# Adding a short_name columnsp_df["short_name"]=[f"{i[0].lower()}{i.split('_')[-1]}"foriinsp_df.index]# Set display_name as the index for std_idsp_df=sp_df.reset_index().set_index("display_name")sp_df.to_csv(SPECIES_FILENAME,header=True,index=True)
---------------------------------------------------------------------------ImportErrorTraceback (most recent call last)
Input In [2], in <cell line: 17>()4SPECIES_COLS=[5"scientific_name",6"display_name",(...)11"release",12]15importxmltodict---> 17from._httpximportget_request,get_request_async,post_request18from._urlsimportENSEMBL_REST,ENSEMBL_REST_EXT21classEnsemblREST:ImportError: attempted relative import with no known parent package