Skip to content

Commit

Permalink
Bugfix: bigquery requires number of rows otherwise it only returns 10
Browse files Browse the repository at this point in the history
  • Loading branch information
marblestation committed Jan 7, 2019
1 parent 4f8a9fc commit 306576a
Showing 1 changed file with 20 additions and 3 deletions.
23 changes: 20 additions & 3 deletions ADSCitationCapture/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,16 @@ def request_existing_citations(app, bibcode):
existing_citation_bibcodes = [b['bibcode'] for b in existing_citation_bibcodes]
return existing_citation_bibcodes


def get_canonical_bibcodes(app, bibcodes):
def _get_canonical_bibcodes(app, bibcodes):
"""
Convert input bibcodes into their canonical form if they exist
"""
params = urllib.urlencode({
'fl': 'bibcode',
'q': '*:*',
'wt': 'json',
'fq':'{!bitset}'
'fq':'{!bitset}',
'rows': len(bibcodes)
})
headers = {}
headers["Authorization"] = "Bearer:{}".format(app.conf['ADS_API_TOKEN'])
Expand All @@ -57,6 +57,23 @@ def get_canonical_bibcodes(app, bibcodes):
r = requests.post(url, headers=headers, data=data)
return [d['bibcode'] for d in r.json().get('response', {}).get('docs', [])]

def get_canonical_bibcodes(app, bibcodes):
"""
Convert input bibcodes into their canonical form if they exist.
If the list of bibcodes is higher than the bigquery limit, it will
paginate through them.
"""
start = 0
limit = 2000
n_bibcodes = len(bibcodes)
canonical_bibcodes = []
while True:
canonical_bibcodes += _get_canonical_bibcodes(app, bibcodes[start:limit])
start += limit
if start > n_bibcodes:
break
return canonical_bibcodes

def get_canonical_bibcode(app, bibcode):
"""
Convert input bibcodes into their canonical form if they exist
Expand Down

0 comments on commit 306576a

Please sign in to comment.