原理是使用Github/Bitbucket提供的API获取所有的项目列表,然后使用系统的git命令克隆项目源码。
Github
使用本脚本需要配置 Github Access Token。
# Clone all repos from github.
# Usage:
# python3 github.py -t [GITHUB_TOKEN] -o [OUTPUT_DIR]
from requests.auth import HTTPBasicAuth
import requests
import os
repos = set()
def load_repos(token, page=1):
per_page = 100
url = f'https://api.github.com/user/repos?page={page}&per_page={per_page}'
headers = {'Authorization': f'token {token}'}
rs = requests.get(url, headers=headers).json()
for r in rs:
name = r['name']
url = r['ssh_url']
repos.add((name, url))
if len(rs) == per_page:
load_repos(token, page+1)
# load all repos and save in `repos`
def load_all_repos(token):
load_repos(token, 1)
def download_repo(output, name, href):
print(f'Cloning {name} {href}')
os.system(f'git clone --depth=1 {href} "{output}/{name}"')
if __name__ == "__main__":
import argparse
ap = argparse.ArgumentParser()
ap.add_argument("-t", "--token", help="Github developer token", type=str, required=True)
ap.add_argument("-o", "--output", help="Output directory", type=str, required=True)
args = vars(ap.parse_args())
token = args['token']
output = args['output']
load_all_repos(token)
for repo in repos:
download_repo(output, *repo)
Bitbucket
使用本脚本需要配置 Bitbucket App Password才能访问。
# Shallow clone all repos from bitbucket.
# Usage:
# python3 bitbucket.py -u [USERNAME] -p [BITbucket_APP_TOKEN] -o [OUTPUT_DIR]
# Requirements:
# 1. Setup app password in Bitbucket
# 2. Setup local ssh keys
# 3. Only git repositories are supported
from requests.auth import HTTPBasicAuth
import requests
import os
roles = 'owner member contributor admin'.split()
pagelen = 100
repos = set()
# recursively load all repos for this role
def load_repos(url, user, pwd):
rs = requests.get(url, auth=HTTPBasicAuth(user, pwd)).json()
for r in rs['values']:
name = r['name']
links = r['links']['clone']
href = next(link['href'] for link in links if link['name'] == 'ssh')
print(f'Found repo {name} {href}')
repos.add((name, href))
if 'next' in rs:
load_repos(rs['next'], user, pwd)
# load all repos and save in `repos`
def load_all_repos(user, pwd):
for role in roles:
url = f'https://api.bitbucket.org/2.0/repositories?pagelen=100&role={role}'
load_repos(url, user, pwd)
def download_repo(output, name, href):
print(f'Cloning {name} {href}')
os.system(f'git clone --depth=1 {href} "{output}/{name}"')
if __name__ == "__main__":
import argparse
ap = argparse.ArgumentParser()
ap.add_argument("-u", "--user", help="Bitbucket username", type=str, required=True)
ap.add_argument("-p", "--password", help="Bitbucket app password", type=str, required=True)
ap.add_argument("-o", "--output", help="Output directory", type=str, required=True)
args = vars(ap.parse_args())
user = args['user']
pwd = args['password']
output = args['output']
load_all_repos(user, pwd)
for repo in repos:
download_repo(output, *repo)