Source code for pskb_website.remote

"""
Main entry point for interacting with remote service APIs
"""

import base64
import collections
import json
import urllib

from flask_oauthlib.client import OAuth
from flask import session

from . import app
from . import cache

oauth = OAuth(app)

github = oauth.remote_app(
    'github',
    consumer_key=app.config['GITHUB_CLIENT_ID'],
    consumer_secret=app.config['GITHUB_SECRET'],
    request_token_params={'scope': ['public_repo', 'user:email']},
    base_url='https://api.github.com/',
    request_token_url=None,
    access_token_method='POST',
    access_token_url='https://github.com/login/oauth/access_token',
    authorize_url='https://github.com/login/oauth/authorize'
)

file_details = collections.namedtuple('file_details', 'path, branch, sha, last_updated, url, text')


[docs]def default_repo_path(): """Get path to main repo""" return '%s/%s' % (app.config['REPO_OWNER'], app.config['REPO_NAME'])
[docs]def default_repo_url(): """Get URL to default repo""" return 'https://github.com/%s/%s' % (app.config['REPO_OWNER'], app.config['REPO_NAME'])
[docs]def log_error(message, url, resp, **kwargs): """ Log an error from a request and include URL, response status, response data and additional error information :params message: Message to log :param url: URL of request that failed :param resp: Response object holding failure information :param kwargs: Additional data to put in error message :returns: None """ additional_info = [] if kwargs: for key, value in kwargs.iteritems(): additional_info.append('%s: "%s"' % (key, value)) app.logger.error('%s at "%s", status: %d, data: %s, %s', message, url, resp.status, getattr(resp, 'data', None), ','.join(additional_info))
[docs]def files_from_github(repo, filename, limit=None): """ Iterate through files with a specific name from github :param repo: Path to repo to read files from :param filename: Name of filename to search for recursively :param limit: Optional limit of the number of files to return :returns: Iterator through file_details tuples """ sha = repo_sha_from_github(repo) if sha is None: raise StopIteration headers = {} cache_key = (repo, sha, filename) etag = cache.read_file_listing_etag(cache_key) if etag is not None: headers = {'If-None-Match': etag} resp = _fetch_files_from_github_api(repo, sha, headers=headers) if resp is None: raise StopIteration # Try to read articles from cache files = None if resp.status == 304: try: files = _gen_files_from_cache(cache_key, limit=limit) except KeyError: # Nothing in cache which is odd since we had a etag but that's ok # we can do a real read pass if files is None: try: files = _gen_files_from_github_api(repo, sha, filename, limit=limit, cache_key=cache_key) except ValueError: raise StopIteration for file_ in files: yield file_
def _fetch_files_from_github_api(repo, sha, headers=None): """ Grab listing of files from github API :param repo: Path to repo (owner/repo_name) :param sha: Sha of repo to read with :param headers: Optional dict of headers to use in request :returns: Response object from request or None if response failed """ url = 'repos/%s/git/trees/%s?recursive=1' % (repo, sha) app.logger.debug('GET: %s', url) resp = github.get(url, headers=headers) if resp.status not in (200, 304): log_error('Failed reading files', url, resp) return None try: truncated = resp.data['truncated'] except KeyError: truncated = False # FIXME: Handle this scenario if truncated: log_error('Too many files for API call', url, resp) return resp def _gen_files_from_cache(cache_key, limit=None): """ Get generator through files from cache :param cache_key: Key to retrieve files from cache :param limit: Optional limit of the number of files to return :returns: Iterator through file_details tuples :raises: KeyError if cache is a miss """ files = cache.read_file_listing(cache_key) if files is None: raise KeyError('No files found with %s' % (cache_key)) count = 0 for file_ in json.loads(files): yield file_details(file_[0], None, file_[1], None, None, None) count += 1 if limit is not None and count == limit: raise StopIteration def _gen_files_from_github_api(repo, sha, filename, limit=None, cache_key=None): """ Iterate through files with a specific name from github and cache files if cache_key is given :param repo: Path to repo to read files from :param sha: Sha of repo to read with :param filename: Name of filename to search for recursively :param limit: Optional limit of the number of files to return :param cache_key: Optional key to cache file listing with :returns: Iterator through file_details tuples or None if request fails """ resp = _fetch_files_from_github_api(repo, sha) if resp is None: raise ValueError('Failed reponse') count = 0 files = [] for obj in resp.data['tree']: if obj['path'].endswith(filename): full_path = '%s/%s' % (repo, obj['path']) yield file_details(full_path, None, obj['sha'], None, None, None) count += 1 if cache_key is not None: # Easier to serialize a standard tuple than namedtuple files.append((full_path, obj['sha'])) if limit is not None and count == limit: break if files and cache_key: cache.save_file_listing(cache_key, json.dumps(files))
[docs]def repo_sha_from_github(repo, branch=u'master'): """ Get sha from head of given repo :param repo: Path to repo (owner/repo_name) :param branch: Name of branch to get sha for :returns: Sha of branch """ url = 'repos/%s/git/refs/heads/%s' % (repo, branch) app.logger.debug('GET: %s', url) resp = github.get(url) if resp.status != 200: log_error('Failed reading sha', url, resp, branch=branch) return None return resp.data['object']['sha']
[docs]def primary_github_email_of_logged_in(): """Get primary email address of logged in user""" app.logger.debug('GET: user/emails') resp = github.get('user/emails') if resp.status != 200: return None for email_data in resp.data: if email_data['primary']: return email_data['email'] return None
[docs]def read_file_from_github(path, branch=u'master', rendered_text=True, allow_404=False): """ Get rendered file text from github API :param path: Path to file (<owner>/<repo>/<dir>/.../<filename>) :param branch: Name of branch to read file from :param rendered_text: Return rendered or raw text :param allow_404: False to log warning for 404 or True to allow it i.e. when you're just seeing if a file already exists :returns: file_details namedtuple or None if error Note when requesting rendered text there will be no SHA or last_updated data available. This is a restriction from the github API (https://developer.github.com/v3/media/#repository-contents) Requesting file 'details' like SHA and rendered text are 2 API calls. Therefore, if you want all of that information you should call this function twice, once with rendered_text=True and one with rendered_text=False and combine the information yourself. """ if rendered_text: text = rendered_markdown_from_github(path, branch, allow_404=allow_404) # This is a little tricky b/c this URL could change on github and we # would be wrong. However, those URLs have been the same for years so # seems like a safe enough bet at this point. owner, repo, file_path = split_full_file_path(path) # Cannot pass unicode data to pathname2url or it can raise KeyError. # Must only pass URL-safe bytes. So, something like u'\u2026' will # raise a # KeyError but if we encode it to bytes, '%E2%80%A6', things # work correctly. # http://stackoverflow.com/questions/15115588/urllib-quote-throws-keyerror url = u'https://github.com/%s/%s/blob/%s/%s' % ( owner, repo, branch, urllib.pathname2url(file_path.encode('utf-8'))) details = file_details(path, branch, None, None, url, text) else: details = file_details_from_github(path, branch, allow_404=allow_404) return details
[docs]def rendered_markdown_from_github(path, branch=u'master', allow_404=False): """ Get rendered markdown file text from github API :param path: Path to file (<owner>/<repo>/<dir>/.../<filename.md>) :param branch: Name of branch to read file from :param allow_404: False to log warning for 404 or True to allow it i.e. when you're just seeing if a file already exists :returns: HTML file text """ url = contents_url_from_path(path) headers = {'accept': 'application/vnd.github.html'} app.logger.debug('GET: %s, headers: %s, ref: %s', url, headers, branch) resp = github.get(url, headers=headers, data={'ref': branch}) if resp.status == 200: return unicode(resp.data, encoding='utf-8') if resp.status != 404 or not allow_404: log_error('Failed reading rendered markdown', url, resp, branch=branch) return None
[docs]def file_details_from_github(path, branch=u'master', allow_404=False): """ Get file details from github :param path: Path to file (<owner>/<repo>/<dir>/.../<filename>) :param branch: Name of branch to read file from :param allow_404: False to log warning for 404 or True to allow it i.e. when you're just seeing if a file already exists :returns: file_details namedtuple or None for error """ url = contents_url_from_path(path) app.logger.debug('GET: %s ref: %s', url, branch) resp = github.get(url, data={'ref': branch}) if resp.status == 200: # Temporary debug. It seems that sometimes github returns a 200 # response and a list of items, which should only happen if we ask for # the contents of a directory. This function should never be called # with a directory. try: sha = resp.data['sha'] except TypeError as err: app.logger.error('Incorrect SHA response for URL: %s, resp: %s, err: %s', url, resp.data, err) return None link = resp.data['_links']['html'] text = unicode(base64.b64decode(resp.data['content'].encode('utf-8')), encoding='utf-8') last_updated = resp._resp.headers.get('Last-Modified') else: if resp.status != 404 or (resp.status == 404 and not allow_404): app.logger.warning('Failed reading file details at "%s", status: %d, branch: %s, data: %s', url, resp.status, branch, resp.data) return None return file_details(path, branch, sha, last_updated, link, text)
[docs]def commit_file_to_github(path, message, content, name, email, sha=None, branch=u'master', auto_encode=True): """ Save given file content to github :param path: Path to file (<owner>/<repo>/<dir>/.../<filename>) :param message: Commit message to save file with :param content: Content of file :param name: Name of author who wrote file :param email: Email address of author :param sha: Optional SHA of file if it already exists on github :param branch: Name of branch to commit file to (branch must already exist) :param auto_encode: Boolean to automatically encode data as utf-8 :returns: SHA of commit or None for failure Note that name and email can be None if you want to make a commit with the REPO_OWNER. However, name and email should both exist or both be None, which is a requirement of the underlying Github API. """ url = contents_url_from_path(path) if auto_encode: content = base64.b64encode(content.encode('utf-8')) commit_info = {'message': message, 'content': content, 'branch': branch} if name is not None and email is not None: commit_info['author'] = {'name': name, 'email': email} commit_info['committer'] = {'name': name, 'email': email} elif (name is None and email is not None) or (name is not None and email is None): raise ValueError('Must specify both name and email or neither') if sha: commit_info['sha'] = sha # The flask-oauthlib API expects the access token to be in a tuple or a # list. Not exactly sure why since the underlying oauthlib library has a # separate kwargs for access_token. See flask_oauthlib.client.make_client # for more information. token = (app.config['REPO_OWNER_ACCESS_TOKEN'], ) app.logger.debug('PUT: %s, data: %s, token: %s', url, commit_info, token) resp = github.put(url, data=commit_info, format='json', token=token) if resp.status not in (201, 200): log_error('Failed saving file', url, resp, commit_msg=message, content=content, name=name, email=email, sha=sha, branch=branch) return None return resp.data['commit']['sha']
[docs]def commit_image_to_github(path, message, file_, name, email, sha=None, branch=u'master'): """ Save given image file content to github :param path: Path to file (<owner>/<repo>/<dir>/.../<filename>) :param message: Commit message to save file with :param file_: Open file object :param name: Name of author who wrote file :param email: Email address of author :param sha: Optional SHA of file if it already exists on github :param branch: Name of branch to commit file to (branch must already exist) :returns: SHA of commit or None for failure """ contents = base64.encodestring(file_.read()) return commit_file_to_github(path, message, contents, name, email, sha=sha, branch=branch, auto_encode=False)
[docs]def read_user_from_github(username=None): """ Read user information from github :param username: Optional username to search for, if no username given the currently logged in user will be returned (if any) :returns: Dict of information from github API call """ if username is not None: url = 'users/%s' % (username) else: url = 'user' app.logger.debug('GET: %s', url) resp = github.get(url) if resp.status != 200: log_error('Failed reading user', url, resp) return {} return resp.data
[docs]def read_repo_collaborators_from_github(owner=None, repo=None): """ Generator for collaborator login/usernames for a given repo :param owner: Owner of repository defaults to REPO_OWNER config value :param repo: Name of repository defaults to REPO_NAME config value :returns: Generator through login names """ owner = owner or app.config['REPO_OWNER'] repo = repo or app.config['REPO_NAME'] url = '/repos/%s/%s/collaborators' % (owner, repo) # This endpoint requires a user that has push access token = (app.config['REPO_OWNER_ACCESS_TOKEN'], ) app.logger.debug('GET: %s, token: %s', url, token) resp = github.get(url, token=token) if resp.status != 200: log_error('Failed reading collaborators', url, resp, repo=repo, owner=owner) raise StopIteration for obj in resp.data: yield obj['login']
[docs]@github.tokengetter def get_github_oauth_token(): """Read github token from session""" token = session.get('github_token') if token is None: # The flask-oauthlib API expects the access token to be in a tuple or a # list. Not exactly sure why since the underlying oauthlib library has a # separate kwargs for access_token. See # flask_oauthlib.client.make_client for more information. token = (app.config['REPO_OWNER_ACCESS_TOKEN'], ) return token
[docs]def split_full_file_path(path): """ Split full file path into owner, repo, and file_path :param path: Path to file (<owner>/<repo>/<dir>/.../<filename>) :returns: (owner, repo, file_path) """ tokens = path.split('/') owner = tokens[0] repo = tokens[1] file_path = '/'.join(tokens[2:]) return (owner, repo, file_path)
[docs]def contents_url_from_path(path): """ Get github API url for contents of file from full path :param path: Path to file (<owner>/<repo>/<dir>/.../<filename>) :returns: URL suitable for a content call with github API """ owner, repo, file_path = split_full_file_path(path) # Cannot pass unicode data to pathname2url or it can raise KeyError. Must # only pass URL-safe bytes. So, something like u'\u2026' will raise a # KeyError but if we encode it to bytes, '%E2%80%A6', things work # correctly. # http://stackoverflow.com/questions/15115588/urllib-quote-throws-keyerror owner = owner.encode('utf-8') repo = repo.encode('utf-8') file_path = file_path.encode('utf-8') return urllib.pathname2url('repos/%s/%s/contents/%s' % (owner, repo, file_path))
[docs]def read_branch(repo_path, name): """ Read branch and get HEAD sha :param repo_path: Path to repo of branch :param name: Name of branch to read :returns: SHA of HEAD or None if branch is not found """ url = 'repos/%s/git/refs/heads/%s' % (repo_path, name) app.logger.debug('GET: %s', url) resp = github.get(url) # Branch doesn't exist if resp.status == 404: return None if resp.status != 200: log_error('Failed reading branch', url, resp) return None return resp.data['object']['sha']
[docs]def create_branch(repo_path, name, sha): """ Create a new branch :param repo_path: Path to repo that branch should be created from :param name: Name of branch to create :param sha: SHA to branch from :returns: True if branch was created or False if branch already exists or could not be created """ url = 'repos/%s/git/refs' % (repo_path) data = {'ref': 'refs/heads/%s' % (name), 'sha': sha} # Must use token of owner for this request b/c only owners and # collaborators can create branches token = (app.config['REPO_OWNER_ACCESS_TOKEN'], ) app.logger.debug('POST: %s, data: %s, token: %s', url, data, token) resp = github.post(url, data=data, format='json', token=token) if resp.status == 422: # Maybe it already exists curr_sha = read_branch(repo_path, name) if curr_sha is not None: return True log_error('Failed reading existing branch', url, resp, sha=sha) return False elif resp.status != 201: log_error('Failed creating branch', url, resp, sha=sha) return False return True
[docs]def update_branch(repo_path, name, sha): """ Update branch to new commit SHA :param repo_path: Path to repo that branch should be created from :param name: Name of branch to create :param sha: SHA to branch from :returns: True if branch was update or False if branch could not be updated """ url = 'repos/%s/git/refs/heads/%s' % (repo_path, name) data = {'sha': sha} # Must use token of owner for this request b/c only owners and # collaborators can update branches token = (app.config['REPO_OWNER_ACCESS_TOKEN'], ) app.logger.debug('PATCH: %s, data: %s, token: %s', url, data, token) resp = github.patch(url, data=data, format='json', token=token) if resp.status != 200: log_error('Failed updating branch', url, resp, sha=sha) return False return True
[docs]def check_rate_limit(): """ Get rate limit data :returns: None in case of an error or raw rate limit request data """ url = '/rate_limit' app.logger.debug('GET: %s', url) resp = github.get(url) if resp.status != 200: log_error('Failed checking rate limit', url, resp) return None return resp.data
[docs]def remove_file_from_github(path, message, name, email, branch): """ Remove file from github repo :param path: Path to file (<owner>/<repo>/<dir>/.../<filename>) :param message: Commit message to remove file with :param name: Name of author who wrote file :param email: Email address of author :param branch: Name of branch to delete file from :returns: True if file was removed or False otherwise Note the file is only removed from the repository, not the history of the file. """ # Read most recent sha which is required to remove file details = file_details_from_github(path, branch) if details is None: return False url = contents_url_from_path(path) commit_info = {'sha': details.sha, 'branch': branch, 'message': message, 'author': {'name': name, 'email': email}, 'committer': {'name': name, 'email': email}} # The flask-oauthlib API expects the access token to be in a tuple or a # list. Not exactly sure why since the underlying oauthlib library has a # separate kwargs for access_token. See flask_oauthlib.client.make_client # for more information. token = (app.config['REPO_OWNER_ACCESS_TOKEN'], ) app.logger.debug('DELETE: %s, data: %s, token: %s', url, commit_info, token) resp = github.delete(url, data=commit_info, format='json', token=token) if resp.status != 200: log_error('Failed removing file', url, resp, file=path) return False return True
[docs]def merge_branch(repo_path, base, head, message): """ Attempt merge between two branches :param repo_path: Path to repo <owner>/<repo_name> :param base: Name of the base branch that the head will be merged into :param head: The name of the head to merge into base :param message: Commit message to use for merge :returns: True if merge was successful False otherwise """ url = '/repos/%s/merges' % (repo_path) data = {'base': base, 'head': head, 'commit_message': message} token = (app.config['REPO_OWNER_ACCESS_TOKEN'], ) app.logger.debug('POST: %s, data: %s, token: %s', url, data, token) resp = github.post(url, data=data, format='json', token=token) # 204 means no content i.e. no merge needed if resp.status in (201, 204): return True log_error('Failed merging', url, resp, repo=repo_path, base=base, head=head) return False
[docs]def file_contributors(path, branch=u'master'): """ Get dictionary of User objects representing authors and committers to a file :param path: Short-path to file (<dir>/.../<filename>) i.e. without repo and owner :param base: Name of branch to read contributors for :returns: Dictionary of the following form:: {'authors': set([(name, login), (name, login), ...]), 'committers': set([(name, login), (name, login), ...])} Note that name can be None if user doesn't have their full name setup on github account. """ contribs = {'authors': set(), 'committers': set()} url = u'/repos/%s/commits' % (default_repo_path()) app.logger.debug('GET: %s path: %s, branch: %s', url, path, branch) resp = github.get(url, data={'path': path, 'branch': branch}) if resp.status != 200: log_error('Failed reading commits from github', url, resp) return contribs def _extract_data_from_commit(commit, key): login = commit[key]['login'] try: author_name = commit['commit'][key]['name'] except KeyError: author_name = None else: if not author_name: author_name = None # API can return same name and login depending on how the account and # commit information is setup so don't bother storing duplicates. This # way caller knows we didn't get a real author name. if login == author_name: author_name = None return (author_name, commit[key]['login']) for commit in resp.data: # Check author/committer first b/c we've seen issues in github API # where these can actually be None, like this commit: # https://github.com/pluralsight/guides/commit/44cd2072df8994fea2cee9de6ffb6c174b57bf03 if commit['author']: contribs['authors'].add(_extract_data_from_commit(commit, 'author')) if commit['committer']: contribs['committers'].add(_extract_data_from_commit(commit, 'committer')) return contribs
[docs]def contributor_stats(repo_path=None): """ Get response of /repos/<repo_path>/stats/contributors from github.com :param repo_path: Default repo or repo path in owner/repo_name form :returns: Raw response of contributor stats from https://developer.github.com/v3/repos/statistics/#get-contributors-list-with-additions-deletions-and-commit-counts Note the github caches contributor results so an empty list can also be returned if the data is not available yet or there is an error """ repo_path = default_repo_path() if repo_path is None else repo_path url = u'/repos/%s/stats/contributors' % (repo_path) app.logger.debug('GET: %s', url) resp = github.get(url) stats = [] if resp.status == 200: stats = resp.data elif resp.status == 202: app.logger.info('Data not in cache from github.com') else: log_error('Failed reading stats from github', url, resp) return stats