{%- set upload_artifact_s3_action = "seemethere/upload-artifact-s3@v3" -%} {# squid_proxy is an private ELB that only available for GHA custom runners #} {%- set squid_proxy = "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -%} {# squid_no_proxy is a list of common set of fixed domains or IPs that we don't need to proxy. See https://docs.aws.amazon.com/AmazonECS/latest/developerguide/http_proxy_config.html#windows-proxy #} {%- set squid_no_proxy = "localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" -%} {%- set timeout_minutes = 240 -%} {%- macro concurrency(build_environment) -%} concurrency: group: !{{ build_environment }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} cancel-in-progress: true {%- endmacro -%} {%- macro add_retry_to_env() -%} retry () { "$@" || (sleep 1 && "$@") || (sleep 2 && "$@") } {%- endmacro -%} {%- macro display_ec2_information() -%} - name: Display EC2 information shell: bash run: | set -euo pipefail function get_ec2_metadata() { # Pulled from instance metadata endpoint for EC2 # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html category=$1 curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" } echo "ami-id: $(get_ec2_metadata ami-id)" echo "instance-id: $(get_ec2_metadata instance-id)" echo "instance-type: $(get_ec2_metadata instance-type)" {%- endmacro -%} {%- macro parse_ref(pytorch_directory="") -%} - name: Parse ref {%- if pytorch_directory %} working-directory: !{{ pytorch_directory }} {%- endif %} id: parse-ref run: .github/scripts/parse_ref.py {%- endmacro -%} {%- macro upload_test_statistics(build_environment, when="always()", pytorch_directory="") -%} - name: Display and upload test statistics (Click Me) {%- if pytorch_directory %} working-directory: !{{ pytorch_directory }} {%- endif %} if: !{{ when }} # temporary hack: set CIRCLE_* vars, until we update # tools/stats/print_test_stats.py to natively support GitHub Actions env: AWS_DEFAULT_REGION: us-east-1 BRANCH: ${{ steps.parse-ref.outputs.branch }} JOB_BASE_NAME: !{{ build_environment }}-test PR_NUMBER: ${{ github.event.pull_request.number }} SHA1: ${{ github.event.pull_request.head.sha || github.sha }} TAG: ${{ steps.parse-ref.outputs.tag }} WORKFLOW_ID: '${{ github.run_id }}' shell: bash run: | python3 -m pip install -r requirements.txt python3 -m pip install boto3==1.19.12 python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test {%- endmacro -%} {%- macro chown_dir(dir) -%} - name: Chown artifacts if: always() run: | # Ensure the working directory gets chowned back to the current user docker run --rm -v "!{{ dir }}:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . {%- endmacro -%} {%- macro setup_ec2_windows() -%} !{{ display_ec2_information() }} - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" uses: seemethere/add-github-ssh-key@v1 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} {%- endmacro -%} {%- macro setup_ec2_linux() -%} !{{ display_ec2_information() }} - name: Log in to ECR env: AWS_RETRY_MODE: standard AWS_MAX_ATTEMPTS: 5 run: | AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") !{{ add_retry_to_env() }} retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \ --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com" - name: Chown workspace run: | !{{ add_retry_to_env() }} retry docker pull "${ALPINE_IMAGE}" # Ensure the working directory gets chowned back to the current user docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - name: Clean workspace run: | rm -rf "${GITHUB_WORKSPACE}" mkdir "${GITHUB_WORKSPACE}" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" uses: seemethere/add-github-ssh-key@v1 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Preserve github env variables for use in docker run: | env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" {%- endmacro -%} {%- macro setup_rocm_linux() -%} - name: Clean workspace run: | rm -rf "${GITHUB_WORKSPACE}" mkdir "${GITHUB_WORKSPACE}" - name: Set DOCKER_HOST run: echo "DOCKER_HOST=unix:///run/user/$(id -u)/docker.sock" >> "${GITHUB_ENV}" - name: Runner health check system info if: always() run: | cat /etc/os-release || true cat /etc/apt/sources.list.d/rocm.list || true cat /opt/rocm/.info/version || true whoami - name: Runner health check rocm-smi if: always() run: | rocm-smi - name: Runner health check rocminfo if: always() run: | rocminfo - name: Runner health check GPU count if: always() run: | ngpu=$(rocminfo | grep -c -E 'Name:.*\sgfx') if [[ "x$ngpu" != "x2" && "x$ngpu" != "x4" ]]; then echo "Failed to detect GPUs on the runner" exit 1 fi - name: Runner health check disconnect on failure if: ${{ failure() }} run: | killall runsvc.sh - name: Preserve github env variables for use in docker run: | env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}" {%- endmacro -%} {%- macro teardown_ec2_linux(pytorch_directory="") -%} - name: Hold runner for 2 hours or until ssh sessions have drained {%- if pytorch_directory %} working-directory: !{{ pytorch_directory }} {%- endif %} # Always hold for active ssh sessions if: always() run: .github/scripts/wait_for_ssh_to_drain.sh - name: Chown workspace if: always() run: | # Ensure the working directory gets chowned back to the current user docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" . - name: Kill containers, clean up images if: always() run: | # ignore expansion of "docker ps -q" since it could be empty # shellcheck disable=SC2046 docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af {%- endmacro -%} {%- macro teardown_rocm_linux() -%} - name: Kill containers, clean up images if: always() run: | # ignore expansion of "docker ps -q" since it could be empty # shellcheck disable=SC2046 docker stop $(docker ps -q) || true # Prune all of the docker images docker system prune -af {%- endmacro -%} {%- macro checkout(submodules="recursive", deep_clone=True, directory="", repository="pytorch/pytorch", branch="") -%} - name: Checkout !{{ 'PyTorch' if repository == "pytorch/pytorch" else repository }} uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9 with: {%- if deep_clone %} # deep clone, to allow use of git merge-base fetch-depth: 0 {%- endif %} submodules: !{{ submodules }} {%- if repository != "pytorch/pytorch" %} repository: !{{ repository }} {%- endif %} {%- if branch %} ref: !{{ branch }} {%- endif %} {%- if directory %} path: !{{ directory }} {%- endif %} - name: Clean !{{ 'PyTorch' if repository == "pytorch/pytorch" else repository }} checkout run: | # Remove any artifacts from the previous checkouts git clean -fxd {%- if directory%} working-directory: !{{ directory }} {%- endif %} {%- endmacro -%} {%- macro upload_downloaded_files(name, artifact_name="", use_s3=True, when="always()") -%} - name: Zip JSONs for upload if: !{{ when }} env: {%- if name == 'linux' or name == 'windows' or name == 'macos' %} FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}' {%- else %} FILE_SUFFIX: '!{{ name }}-${{ github.job }}' {%- endif %} {%- if name == 'windows' %} shell: powershell run: | # -ir => recursive include all files in pattern 7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json' {%- else %} run: | # Remove any previous test jsons if they exist rm -f test-jsons-*.zip zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json' {%- endif %} {%- if use_s3 %} - uses: !{{ upload_artifact_s3_action }} name: Store Test Downloaded JSONs on S3 {%- else %} - uses: actions/upload-artifact@v2 name: Store Test Downloaded JSONs on Github {%- endif %} if: !{{ when }} with: {%- if artifact_name != "" %} name: !{{ artifact_name }} {%- endif %} retention-days: 14 if-no-files-found: warn path: test-jsons-*.zip {%- endmacro -%} {%- macro upload_test_reports(name, artifact_name="", use_s3=True) -%} - name: Zip test reports for upload if: always() env: {%- if name == 'linux' or name == 'windows' or name == 'macos' %} FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}' {%- else %} FILE_SUFFIX: '!{{ name }}-${{ github.job }}' {%- endif %} {%- if name == 'windows' %} shell: powershell run: | # -ir => recursive include all files in pattern 7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml' {%- else %} run: | # Remove any previous test reports if they exist rm -f test-reports-*.zip zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml' {%- endif %} {%- if use_s3 %} - uses: !{{ upload_artifact_s3_action }} name: Store Test Reports on S3 {%- else %} - uses: actions/upload-artifact@v2 name: Store Test Reports on Github {%- endif %} if: always() with: {%- if artifact_name != "" %} name: !{{ artifact_name }} {%- endif %} retention-days: 14 if-no-files-found: error path: test-reports-*.zip {%- endmacro -%} {%- macro render_test_results() -%} - name: Install render_test_results dependencies if: always() shell: bash run: | python3 -m pip install junitparser==2.1.1 rich==10.9.0 - name: "[[ Click me for rendered test results (useful for finding failing tests) ]]" if: always() shell: bash # Encoding is weird on windows, just try to default to utf-8 if possible env: PYTHONIOENCODING: "utf-8" run: | python3 tools/render_junit.py test/ {%- endmacro -%} {%- macro calculate_docker_image(always_rebuild) -%} - name: Calculate docker image tag id: calculate-tag run: | DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker) echo "DOCKER_TAG=${DOCKER_TAG}" >> "${GITHUB_ENV}" echo "DOCKER_IMAGE=${DOCKER_IMAGE_BASE}:${DOCKER_TAG}" >> "${GITHUB_ENV}" echo "::set-output name=docker_tag::${DOCKER_TAG}" echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}" - name: Check if image should be built id: check env: BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }} run: | set -x {%- if not always_rebuild %} # Check if image already exists, if it does then skip building it if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then exit 0 fi if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then # if we're on the base branch then use the parent commit MERGE_BASE=$(git rev-parse HEAD~) else # otherwise we're on a PR, so use the most recent base commit MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION") fi # Covers the case where a previous tag doesn't exist for the tree # this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit" exit 1 fi PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker") # If no image exists but the hash is the same as the previous hash then we should error out here if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch" echo " contact the PyTorch team to restore the original images" exit 1 fi {%- endif %} echo ::set-output name=rebuild::yes - name: Build and push docker image if: ${{ steps.check.outputs.rebuild }} env: DOCKER_SKIP_S3_UPLOAD: 1 working-directory: .circleci/docker run: | export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/} ./build_docker.sh {%- endmacro -%} {%- macro setup_miniconda(python_version, activate_environment=True) -%} - name: Setup miniconda uses: conda-incubator/setup-miniconda@v2 with: auto-update-conda: true python-version: !{{ python_version }} {%- if activate_environment %} activate-environment: build {%- endif %} {%- endmacro -%} {%- macro set_xcode_version(xcode_version) -%} {%- if xcode_version != '' %} # Set xcode xcode version to !{{ xcode_version }} DEVELOPER_DIR: /Applications/Xcode_!{{ xcode_version }}.app/Contents/Developer {%- endif %} {%- endmacro -%} {%- macro wait_and_kill_ssh_windows(pytorch_directory="") -%} - name: Wait until all sessions have drained shell: powershell {%- if pytorch_directory %} working-directory: !{{ pytorch_directory }} {%- endif %} if: always() timeout-minutes: 120 run: | .github\scripts\wait_for_ssh_to_drain.ps1 - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) shell: powershell {%- if pytorch_directory %} working-directory: !{{ pytorch_directory }} {%- endif %} if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 {%- endmacro -%}